/******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at * https://www.apache.org/licenses/LICENSE-2.0. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. * * SPDX-License-Identifier: Apache-2.0 ******************************************************************************/ // // Created by raver119 on 18.12.17. // #include #include #include #include #include #include #include using namespace simdOps; namespace functions { namespace summarystats { template Y SummaryStatsReduce::execScalar(const int opNum, const bool biasCorrected, void *x, Nd4jLong *xShapeInfo, void *extraParams) { RETURNING_DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(biasCorrected, x, xShapeInfo, extraParams), SUMMARY_STATS_OPS); } template void SummaryStatsReduce::execScalar(const int opNum, const bool biasCorrected, void *x, Nd4jLong *xShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo) { DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(biasCorrected, x, xShapeInfo, extraParams, z, zShapeInfo), SUMMARY_STATS_OPS); } template void SummaryStatsReduce::exec(const int opNum, const bool biasCorrected, void *x, Nd4jLong *xShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(biasCorrected, x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength), SUMMARY_STATS_OPS); } template template void SummaryStatsReduce::execScalar(const bool biasCorrected, void *vx, Nd4jLong *xShapeInfo, void *vextraParams, void *vz, Nd4jLong *zShapeInfo) { auto z = reinterpret_cast(vz); z[0] = execScalar(biasCorrected, vx, xShapeInfo, vextraParams); } template template Z SummaryStatsReduce::execScalar(const bool biasCorrected, void *vx, Nd4jLong *xShapeInfo, void *vextraParams) { auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); SummaryStatsData startingIndex; startingIndex.initialize(); auto length = shape::length(xShapeInfo); uint xShapeInfoCast[MAX_RANK]; const bool canCast = sd::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast); for (Nd4jLong i = 0; i < length; i++) { auto xOffset = shape::indexOffset(i, xShapeInfo, xShapeInfoCast, canCast); SummaryStatsData curr; curr.initWithValue(x[xOffset]); startingIndex = update(startingIndex, curr, extraParams); } return OpType::getValue(biasCorrected, startingIndex); } template template void SummaryStatsReduce::exec(const bool biasCorrected, void *vx, Nd4jLong *xShapeInfo, void *vextraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength) { auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto resultLength = shape::length(zShapeInfo); if(sd::ArrayOptions::arrayType(xShapeInfo) == sd::ArrayType::EMPTY) { if(sd::ArrayOptions::arrayType(zShapeInfo) == sd::ArrayType::EMPTY) return; SummaryStatsData comp; comp.initWithValue(x[0]); for (Nd4jLong i = 0; i < resultLength; i++) z[i] = OpType::getValue(biasCorrected, comp); return; } if (shape::isScalar(zShapeInfo)) { z[0] = execScalar(biasCorrected, x, xShapeInfo, extraParams); return; } //no-op if (dimensionLength < 1) return; auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); //pre squeezed: this is for keeping the pointer to the original //shape information for tad offset //the squeezed information doesn't render the right strides for //tad offset if (resultLength == 1 || dimensionLength == shape::rank(xShapeInfo) || tadPack.numberOfTads() == 1) { z[0] = execScalar(biasCorrected, x, xShapeInfo, extraParams); return; } auto tadShapeShapeInfo = tadPack.primaryShapeInfo(); auto tadLength = shape::length(tadPack.primaryShapeInfo()); auto tadEWS = shape::elementWiseStride(tadPack.primaryShapeInfo()); auto tadOrder = shape::order(tadPack.primaryShapeInfo()); uint tadShapeShapeInfoCast[MAX_RANK]; const bool canCast = tadEWS == 1 && tadOrder == 'c' ? false : sd::DataTypeUtils::castShapeInfo(tadShapeShapeInfo, tadShapeShapeInfoCast); auto func = PRAGMA_THREADS_FOR { for (auto r = start; r < stop; r++) { auto tadOffsetForBlock = tadPack.primaryOffsets()[r]; auto tx = x + tadOffsetForBlock; SummaryStatsData comp; comp.initWithValue(tx[0]); if (tadEWS == 1 && tadOrder == 'c') { for (Nd4jLong i = 1; i < tadLength; i++) { SummaryStatsData indexVal2; indexVal2.initWithValue(tx[i]); comp = update(comp, OpType::op(indexVal2, extraParams), extraParams); } } else { for (Nd4jLong i = 1; i < tadLength; i++) { auto xOffset = shape::indexOffset(i, tadShapeShapeInfo, tadShapeShapeInfoCast, canCast); SummaryStatsData indexVal2; indexVal2.initWithValue(tx[xOffset]); comp = update(comp, OpType::op(indexVal2, extraParams), extraParams); } } z[r] = OpType::getValue(biasCorrected, comp); } }; sd::Threads::parallel_tad(func, 0, resultLength, 1); } BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT SummaryStatsReduce, , LIBND4J_TYPES, FLOAT_TYPES); } }