/******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. * Copyright (c) 2019 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at * https://www.apache.org/licenses/LICENSE-2.0. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. * * SPDX-License-Identifier: Apache-2.0 ******************************************************************************/ // // @author GS // #include #include #include #include namespace nd4j { namespace ops { namespace helpers { // segment max template static void segmentMaxFunctor_(NDArray* input, NDArray* indices, NDArray* output) { //int numClasses = output->sizeAt(0); // if input is a vector: (as if in doc sample) Nd4jLong idx = indices->e(0); if (input->isVector()) { T val = input->e(0); for (Nd4jLong e = 1; e < indices->lengthOf(); e++) { if (idx == indices->e(e)) { // max val = nd4j::math::nd4j_max(val, input->t(e)); } else { idx = indices->e(e); val = input->t(e); } output->t(idx) = val; } } else { std::vector restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); auto listOfTensors = input->allTensorsAlongDimension(restDims); auto listOfOutTensors = output->allTensorsAlongDimension(restDims); auto numOfClasses = output->sizeAt(0); // number of classes std::vector> outputs(numOfClasses); auto maxT = listOfOutTensors.at(idx); //int pos = 0; maxT->assign(listOfTensors.at(0)); for (Nd4jLong i = 1; i < indices->lengthOf(); i++) { if (indices->e(i) == idx) { for (Nd4jLong e = 0; e < maxT->lengthOf(); e++) { maxT->t(e) = nd4j::math::nd4j_max(maxT->t(e), listOfTensors.at(i)->t(e)); } } else { idx = indices->e(i); maxT = listOfOutTensors.at(idx); maxT->assign(listOfTensors.at(i)); } } } } // segmen min template static void segmentMinFunctor_(NDArray* input, NDArray* indices, NDArray* output) { //int numClasses = output->sizeAt(0); // if input is a vector: (as if in doc sample) Nd4jLong idx = indices->e(0); if (input->isVector()) { T val = input->e(0); for (int e = 1; e < indices->lengthOf(); e++) { if (idx == indices->e(e)) { // min val = nd4j::math::nd4j_min(val, input->t(e)); } else { idx = indices->e(e); val = input->t(e); } output->t(idx) = val; } } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); ResultSet listOfTensors = input->allTensorsAlongDimension(restDims); ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims); int numOfClasses = output->sizeAt(0); // number of classes std::vector> outputs(numOfClasses); auto minT = listOfOutTensors.at(idx); int pos = 0; minT->assign(listOfTensors.at(0)); for (Nd4jLong i = 1; i < indices->lengthOf(); i++) { if (indices->e(i) == idx) { for (int e = 0; e < minT->lengthOf(); e++) { minT->p(e, nd4j::math::nd4j_min(minT->e(e), listOfTensors.at(i)->e(e))); } } else { idx = indices->e(i); minT = listOfOutTensors.at(idx); minT->assign(listOfTensors.at(i)); } } } } // segmen mean template static void segmentMeanFunctor_(NDArray* input, NDArray* indices, NDArray* output) { int numClasses = output->sizeAt(0); // if input is a vector: (as if in doc sample) int idx = indices->e(0); if (input->isVector()) { T val = T(0.f); int count = 0; for (int e = 0; e < indices->lengthOf(); e++) { if (idx == indices->e(e)) { // mean val += input->e(e); count++; } else { output->p(idx, val / count); idx = indices->e(e); val = input->e(e); count = 1; } output->p(idx, val / count); } } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); auto listOfTensors = input->allTensorsAlongDimension(restDims); auto listOfOutTensors = output->allTensorsAlongDimension(restDims); int numOfClasses = output->sizeAt(0); // number of classes std::vector> outputs(numOfClasses); auto meanT = listOfOutTensors.at(idx); int count = 1; auto meanV = meanT->dup(); meanV.assign(listOfTensors.at(0)); for (int i = 1; i < indices->lengthOf(); i++) { if (indices->e(i) == idx) { auto func = PRAGMA_THREADS_FOR { for (auto e = start; e < stop; e += increment) { meanV.p(e, meanV.e(e) + listOfTensors.at(i)->e(e)); } }; samediff::Threads::parallel_for(func, 0, meanT->lengthOf()); count++; } else { //meanT->assign(meanV); meanV.applyScalar(scalar::Divide, count, *meanT); idx = indices->e(i); meanT = listOfOutTensors.at(idx); meanV.assign(listOfTensors.at(i)); count = 1; } meanV.applyScalar(scalar::Divide, count, *meanT); } } } template static void segmentSumFunctor_(NDArray* input, NDArray* indices, NDArray* output) { int numClasses = output->sizeAt(0); // if input is a vector: (as if in doc sample) int idx = indices->e(0); if (input->isVector()) { T val = T(0.f); int count = 0; for (int e = 0; e < indices->lengthOf(); e++) { if (idx == indices->e(e)) { // sum val += input->t(e); } else { idx = indices->e(e); val = input->t(e); } output->p(idx, val); } } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); auto listOfTensors = input->allTensorsAlongDimension(restDims); auto listOfOutTensors = output->allTensorsAlongDimension(restDims); int numOfClasses = output->sizeAt(0); // number of classes std::vector> outputs(numOfClasses); auto sumT = listOfOutTensors.at(idx); for (int i = 0; i < indices->lengthOf(); i++) { if (indices->e(i) == idx) { auto func = PRAGMA_THREADS_FOR { for (auto e = start; e < stop; e += increment) { sumT->p(e, sumT->e(e) + listOfTensors.at(i)->e(e)); } }; samediff::Threads::parallel_for(func, 0, sumT->lengthOf()); } else { idx = indices->e(i); sumT = listOfOutTensors.at(idx); sumT->assign(listOfTensors.at(i)); } } } } template static void segmentProdFunctor_(NDArray* input, NDArray* indices, NDArray* output) { //int numClasses = output->sizeAt(0); // if input is a vector: (as if in doc sample) int idx = indices->e(0); output->assign(1.f); if (input->isVector()) { T val = input->e(0); int count = 0; for (int e = 1; e < indices->lengthOf(); e++) { if (idx == indices->e(e)) { // sum val *= input->e(e); } else { idx = indices->e(e); val = input->e(e); } output->p(idx, val); } } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); auto listOfTensors = input->allTensorsAlongDimension(restDims); auto listOfOutTensors = output->allTensorsAlongDimension(restDims); int numOfClasses = output->sizeAt(0); // number of classes auto sumT = listOfOutTensors.at(idx); sumT->assign(listOfTensors.at(0)); for (int i = 1; i < indices->lengthOf(); i++) { if (indices->e(i) == idx) { auto func = PRAGMA_THREADS_FOR { for (auto e = start; e < stop; e += increment) { sumT->p(e, sumT->e(e) * listOfTensors.at(i)->e(e)); } }; samediff::Threads::parallel_for(func, 0, sumT->lengthOf()); } else { idx = indices->e(i); sumT = listOfOutTensors.at(idx); sumT->assign(listOfTensors.at(i)); } } } } // template // static bool segmentIndicesValidate_(NDArray* indices, NDArray& aexpected, NDArray& anOutput) { // } void segmentMaxFunctor(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* output) { BUILD_SINGLE_SELECTOR(input->dataType(), segmentMaxFunctor_, (input, indices, output), LIBND4J_TYPES); } void segmentMinFunctor(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* output) { BUILD_SINGLE_SELECTOR(input->dataType(), segmentMinFunctor_, (input, indices, output), LIBND4J_TYPES); } void segmentMeanFunctor(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* output) { BUILD_SINGLE_SELECTOR(input->dataType(), segmentMeanFunctor_, (input, indices, output), LIBND4J_TYPES); } void segmentSumFunctor(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* output) { BUILD_SINGLE_SELECTOR(input->dataType(), segmentSumFunctor_, (input, indices, output), LIBND4J_TYPES); } void segmentProdFunctor(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* output) { BUILD_SINGLE_SELECTOR(input->dataType(), segmentProdFunctor_, (input, indices, output), LIBND4J_TYPES); } bool segmentIndicesValidate(nd4j::LaunchContext * context, NDArray* indices, NDArray& expected, NDArray& output) { auto val = indices->e(0); for (int e = 1; e < indices->lengthOf(); e++) { output = indices->e(e); if (val.e(0) > output.e(0)) return false; val = indices->e(e); } return true; } //BUILD_SINGLE_TEMPLATE(template bool segmentIndicesValidate_, (NDArray*, NDArray&, NDArray&), LIBND4J_TYPES); BUILD_SINGLE_TEMPLATE(template void segmentProdFunctor_, (NDArray* input, NDArray* indices, NDArray* output), LIBND4J_TYPES); BUILD_SINGLE_TEMPLATE(template void segmentSumFunctor_, (NDArray* input, NDArray* indices, NDArray* output), LIBND4J_TYPES); BUILD_SINGLE_TEMPLATE(template void segmentMeanFunctor_, (NDArray* input, NDArray* indices, NDArray* output), LIBND4J_TYPES); BUILD_SINGLE_TEMPLATE(template void segmentMinFunctor_, (NDArray* input, NDArray* indices, NDArray* output), LIBND4J_TYPES); BUILD_SINGLE_TEMPLATE(template void segmentMaxFunctor_, (NDArray* input, NDArray* indices, NDArray* output), LIBND4J_TYPES); // -------------------------------------------------------------------------------------------------------------- // // Unsorted segment ops // -------------------------------------------------------------------------------------------------------------- // bool unsortedSegmentIndicesValidate(nd4j::LaunchContext * context, NDArray* indices, Nd4jLong expected, Nd4jLong& output) { Nd4jLong val = indices->e(0); Nd4jLong maxInd = indices->argMax(); if (indices->e(maxInd) >= expected) { output = val; return false; } output = expected; return true; } template static void unsortedSegmentMaxFunctor_(NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output) { // if input is a vector: (as if in doc sample) //int idx = static_cast((*indices)(0.)); std::map> idxs;//(indices->lengthOf()); for (Nd4jLong e = 0; e < indices->lengthOf(); ++e) idxs[indices->e(e)].push_back(e); //std::sort(idxs.begin(), idxs.end()); if (input->isVector()) { // 1D case T maxVal = DataTypeUtils::max(); output->assign(-maxVal); for (auto fi = idxs.begin(); fi != idxs.end(); ++fi) { T val = input->e(fi->second.at(0)); for (Nd4jLong idx = 1; idx < fi->second.size(); ++idx) { val = nd4j::math::nd4j_max(val, input->e(fi->second.at(idx))); } output->p(fi->first, val); } } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); ResultSet listOfTensors = input->allTensorsAlongDimension(restDims); ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims); T maxVal = DataTypeUtils::max(); output->assign(-maxVal); for (auto fi = idxs.begin(); fi != idxs.end(); ++fi) { auto outputT = listOfOutTensors.at(fi->first); outputT->assign(listOfTensors.at(fi->second.at(0))); for (Nd4jLong idx = 1; idx < fi->second.size(); ++idx) { auto maxT = listOfTensors.at(fi->second.at(idx)); for (Nd4jLong e = 0; e < outputT->lengthOf(); ++e) { T val = nd4j::math::nd4j_max(maxT->e(e), outputT->e(e)); outputT->p(e, val); } } } } } void unsortedSegmentMaxFunctor(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output) { BUILD_SINGLE_SELECTOR(input->dataType(), unsortedSegmentMaxFunctor_, (input, indices, numOfClasses, output), NUMERIC_TYPES); } BUILD_SINGLE_TEMPLATE(template void unsortedSegmentMaxFunctor_, (NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output), NUMERIC_TYPES); template static void unsortedSegmentMinFunctor_(NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output) { // if input is a vector: (as if in doc sample) //int idx = static_cast((*indices)(0.)); std::map> idxs;//(indices->lengthOf()); for (Nd4jLong e = 0; e < indices->lengthOf(); ++e) idxs[indices->e(e)].push_back(e); //std::sort(idxs.begin(), idxs.end()); if (input->isVector()) { // 1D case T maxVal = DataTypeUtils::max(); output->assign(maxVal); for (auto fi = idxs.begin(); fi != idxs.end(); ++fi) { T val = input->t(fi->second.at(0)); for (size_t idx = 1; idx < fi->second.size(); ++idx) { val = nd4j::math::nd4j_min(val, input->t(fi->second.at(idx))); } output->t(fi->first) = val; } } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); ResultSet listOfTensors = input->allTensorsAlongDimension(restDims); ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims); T maxVal = DataTypeUtils::max(); output->assign(maxVal); for (auto fi = idxs.begin(); fi != idxs.end(); ++fi) { auto outputT = listOfOutTensors.at(fi->first); outputT->assign(listOfTensors.at(fi->second.at(0))); for (Nd4jLong idx = 1; idx < fi->second.size(); ++idx) { auto minT = listOfTensors.at(fi->second.at(idx)); for (Nd4jLong e = 0; e < outputT->lengthOf(); ++e) { outputT->t(e) = nd4j::math::nd4j_min(minT->t(e), outputT->t(e)); } } //outputT->assign(maxT); } } } void unsortedSegmentMinFunctor(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output) { BUILD_SINGLE_SELECTOR(input->dataType(), unsortedSegmentMinFunctor_, (input, indices, numOfClasses, output), NUMERIC_TYPES); } BUILD_SINGLE_TEMPLATE(template void unsortedSegmentMinFunctor_, (NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output), NUMERIC_TYPES); void unsortedSegmentMeanFunctor(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output) { std::map> idxs;//(indices->lengthOf()); for (Nd4jLong e = 0; e < indices->lengthOf(); ++e) idxs[indices->e(e)].push_back(e); //std::sort(idxs.begin(), idxs.end()); if (input->isVector()) { // 1D case for (auto fi = idxs.begin(); fi != idxs.end(); ++fi) { double sumValue = input->e(fi->second.at(0)); int loop_size = fi->second.size(); // FIXME: parallelism here? for (size_t idx = 1; idx < loop_size; ++idx) { sumValue += input->e(fi->second.at(idx)); } output->p(fi->first, sumValue / fi->second.size()); } } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); ResultSet listOfTensors = input->allTensorsAlongDimension(restDims); ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims); // FIXME: parallelism here? for (auto fi = idxs.begin(); fi != idxs.end(); ++fi) { auto outputT = listOfOutTensors.at(fi->first); outputT->assign(listOfTensors.at(fi->second.at(0))); Nd4jLong loopSize = fi->second.size(); for (Nd4jLong idx = 1; idx < loopSize; ++idx) { auto current = listOfTensors.at(fi->second.at(idx)); *outputT += *current; } (*outputT) /= double(fi->second.size()); } } } void unsortedSegmentSumFunctor(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output) { std::map> idxs;//(indices->lengthOf()); for (Nd4jLong e = 0; e < indices->lengthOf(); ++e) idxs[indices->e(e)].push_back(e); if (input->isVector()) { // 1D case for (auto fi = idxs.begin(); fi != idxs.end(); ++fi) { double sumValue = input->e(fi->second.at(0)); Nd4jLong loop_size = fi->second.size(); // FIXME: parallelism here? for (Nd4jLong idx = 1; idx < loop_size; ++idx) { sumValue += input->e(fi->second.at(idx)); } output->p(fi->first, sumValue); } } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); ResultSet listOfTensors = input->allTensorsAlongDimension(restDims); ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims); for (auto fi = idxs.begin(); fi != idxs.end(); ++fi) { auto outputT = listOfOutTensors.at(fi->first); outputT->assign(listOfTensors.at(fi->second.at(0))); Nd4jLong loop_size = fi->second.size(); // FIXME: parallelism here? for (Nd4jLong idx = 1; idx < loop_size; ++idx) { auto current = listOfTensors.at(fi->second.at(idx)); *(outputT) += *current; } //outputT->assign(maxT); } } } template void unsortedSegmentProdFunctor_(NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output) { std::map> idxs;//(indices->lengthOf()); for (Nd4jLong e = 0; e < indices->lengthOf(); ++e) idxs[indices->e(e)].push_back(e); //std::sort(idxs.begin(), idxs.end()); output->assign(1.f); if (input->isVector()) { // 1D case for (auto fi = idxs.begin(); fi != idxs.end(); ++fi) { T prodValue = input->e(fi->second.at(0)); for (size_t idx = 1; idx < fi->second.size(); ++idx) { prodValue *= input->e(fi->second.at(idx)); } output->p(fi->first, prodValue); } } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); ResultSet listOfTensors = input->allTensorsAlongDimension(restDims); ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims); for (auto fi = idxs.begin(); fi != idxs.end(); ++fi) { auto outputT = listOfOutTensors.at(fi->first); outputT->assign(listOfTensors.at(fi->second.at(0))); for (Nd4jLong idx = 1; idx < fi->second.size(); ++idx) { auto current = listOfTensors.at(fi->second.at(idx)); *outputT *= *current; } } } } void unsortedSegmentProdFunctor(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output) { BUILD_SINGLE_SELECTOR(input->dataType(), unsortedSegmentProdFunctor_, (input, indices, numOfClasses, output), NUMERIC_TYPES); } BUILD_SINGLE_TEMPLATE(template void unsortedSegmentProdFunctor_, (NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output), NUMERIC_TYPES); void unsortedSegmentSqrtNFunctor(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output) { std::map> idxs;//(indices->lengthOf()); for (Nd4jLong e = 0; e < indices->lengthOf(); ++e) idxs[indices->e(e)].push_back(e); //std::sort(idxs.begin(), idxs.end()); if (input->isVector()) { // 1D case for (auto fi = idxs.begin(); fi != idxs.end(); ++fi) { double sumValue = input->e(fi->second.at(0)); for (Nd4jLong idx = 1; idx < fi->second.size(); ++idx) { sumValue += input->e(fi->second.at(idx)); } output->p(fi->first, sumValue / nd4j::math::nd4j_sqrt(fi->second.size())); } } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); ResultSet listOfTensors = input->allTensorsAlongDimension(restDims); ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims); for (auto fi = idxs.begin(); fi != idxs.end(); ++fi) { auto outputT = listOfOutTensors.at(fi->first); outputT->assign(listOfTensors.at(fi->second.at(0))); for (Nd4jLong idx = 1; idx < fi->second.size(); ++idx) { auto current = listOfTensors.at(fi->second.at(idx)); *outputT += *current; } //outputT->assign(maxT); (*outputT) /= nd4j::math::nd4j_sqrt(fi->second.size()); } } } // -------------------------------------------------------------------------------------------------------------- // // Backpropagate ops helpers // -------------------------------------------------------------------------------------------------------------- // // Sorted backpropagate ops // // segment max template int segmentMaxFunctorBP_(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* gradOut, NDArray* output) { //int numOfClasses = gradOut->sizeAt(0); // if input is a vector: (as if in doc sample) auto tempRes = gradOut->dup(); segmentMaxFunctor_(input, indices, &tempRes); if (input->isVector()) { Nd4jLong loop_size = input->lengthOf(); auto func = PRAGMA_THREADS_FOR { for (auto e = start; e < stop; e += increment) { auto classNum = indices->e(e); if (nd4j::math::nd4j_abs(tempRes.e(classNum) - input->e(e)) <= T(1.e-6)) output->p(e, gradOut->e(classNum)); } }; samediff::Threads::parallel_for(func, 0, loop_size); } else { std::vector restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); ResultSet listOfBPTensors = tempRes.allTensorsAlongDimension(restDims); ResultSet listOfGradOuts = gradOut->allTensorsAlongDimension(restDims); ResultSet listOfTensors = input->allTensorsAlongDimension(restDims); ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims); //int numOfClasses = tempRes.sizeAt(0); // number of classes //std::vector> outputs(numOfClasses); auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i += increment) { auto classNum = indices->e(i); auto current = listOfTensors.at(i); auto currentOut = listOfOutTensors.at(i); auto currentGradOut = listOfGradOuts.at(classNum); for (uint64_t e = 0; e < current->lengthOf(); e++) { if (nd4j::math::nd4j_abs(listOfBPTensors.at(classNum)->e(e) - current->e(e)) <= T(1.e-6)) currentOut->p(e, currentGradOut->e(e)); } } }; samediff::Threads::parallel_tad(func, 0, indices->lengthOf()); } return ND4J_STATUS_OK; } int segmentMaxFunctorBP(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* gradOut, NDArray* output) { BUILD_SINGLE_SELECTOR(output->dataType(), return segmentMaxFunctorBP_, (context, input, indices, gradOut, output), NUMERIC_TYPES); } BUILD_SINGLE_TEMPLATE(template int segmentMaxFunctorBP_, (nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* gradOut, NDArray* output), NUMERIC_TYPES); // segmen min int segmentMinFunctorBP(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* gradOut, NDArray* output) { NDArray tempRes = gradOut->dup(); segmentMinFunctor(context, input, indices, &tempRes); if (input->isVector()) { auto func = PRAGMA_THREADS_FOR { for (auto e = start; e < stop; e += increment) { auto classNum = indices->e(e); if (nd4j::math::nd4j_abs(tempRes.e(classNum) - input->e(e)) < 1.e-5) output->p(e, gradOut->e(classNum)); } }; samediff::Threads::parallel_for(func, 0, input->lengthOf()); } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); ResultSet listOfBPTensors = tempRes.allTensorsAlongDimension(restDims); ResultSet listOfGradOuts = gradOut->allTensorsAlongDimension(restDims); ResultSet listOfTensors = input->allTensorsAlongDimension(restDims); ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims); //int numOfClasses = tempRes.sizeAt(0); // number of classes //std::vector> outputs(numOfClasses); output->assign(0.); int pos = 0; auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i += increment) { auto classNum = indices->e(i); auto current = listOfTensors.at(i); auto currentOut = listOfOutTensors.at(i); auto currentGradOut = listOfGradOuts.at(classNum); for (int e = 0; e < current->lengthOf(); e++) { if (nd4j::math::nd4j_abs(listOfBPTensors.at(classNum)->e(e) - current->e(e)) < 1.e-5) currentOut->p(e, currentGradOut->e(e)); } } }; samediff::Threads::parallel_tad(func, 0, indices->lengthOf()); } return ND4J_STATUS_OK; } // segmen mean int segmentMeanFunctorBP(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* gradOut, NDArray* output) { int numClasses = output->sizeAt(0); std::map classCount;//(numClasses); for (Nd4jLong count = 0; count < numClasses; ++count) { classCount[count] = 0; } for (Nd4jLong e = 0; e < indices->lengthOf(); ++e) { classCount[indices->e(e)] ++; } // if input is a vector: (as if in doc sample) if (input->isVector()) { for (Nd4jLong e = 0; e < indices->lengthOf(); ++e) { Nd4jLong classNum = indices->e(e); output->p(e, gradOut->e(classNum) / classCount[classNum]); } } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); ResultSet listOfGradOuts = gradOut->allTensorsAlongDimension(restDims); ResultSet listOfTensors = input->allTensorsAlongDimension(restDims); ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims); ; int pos = 0; //auto func = [&](uint64_t thread_id, uint64_t start, uint64_t stop, uint64_t increment) -> void { for (auto i = 0; i < indices->lengthOf(); i++) { auto classNum = indices->e(i); auto current = listOfTensors.at(i); auto currentOut = listOfOutTensors.at(i); auto currentGradOut = listOfGradOuts.at(classNum); for (int e = 0; e < current->lengthOf(); e++) { currentOut->p(e, currentGradOut->e(e) / classCount.at(classNum)); } } //}; //samediff::Threads::parallel_for(func, 0, indices->lengthOf()); } return ND4J_STATUS_OK; } int segmentSumFunctorBP(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* gradOut, NDArray* output) { // int numClasses = output->sizeAt(0); // if input is a vector: (as if in doc sample) Nd4jLong idx = indices->e(0); if (input->isVector()) { for (Nd4jLong e = 0; e < indices->lengthOf(); ++e) { Nd4jLong classNum = indices->e(e); output->p(e, gradOut->e(classNum)); } } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); ResultSet listOfGradOuts = gradOut->allTensorsAlongDimension(restDims); ResultSet listOfTensors = input->allTensorsAlongDimension(restDims); ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims); //auto func = PRAGMA_THREADS_FOR { for (auto i = 0; i < indices->lengthOf(); i++) { auto classNum = indices->e(i); auto current = listOfTensors.at(i); auto currentOut = listOfOutTensors.at(i); auto currentGradOut = listOfGradOuts.at(classNum); currentOut->assign(currentGradOut); } //}; //samediff::Threads::parallel_for(func, 0, indices->lengthOf()); } return Status::OK(); } int segmentProdFunctorBP(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* gradOut, NDArray* output) { auto tempRes = gradOut->dup(); segmentProdFunctor(context, input, indices, &tempRes); if (input->isVector()) { for (Nd4jLong e = 0; e < indices->lengthOf(); ++e) { Nd4jLong classNum = indices->e(e); output->p(e, gradOut->e(classNum) * tempRes.e(classNum)/ input->e(e)); } } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); ResultSet listOfBPTensors = tempRes.allTensorsAlongDimension(restDims); ResultSet listOfGradOuts = gradOut->allTensorsAlongDimension(restDims); ResultSet listOfTensors = input->allTensorsAlongDimension(restDims); ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims); //int numOfClasses = tempRes.sizeAt(0); // number of classes //std::vector> outputs(numOfClasses); //auto func = PRAGMA_THREADS_FOR { for (auto i = 0; i < indices->lengthOf(); i++) { auto classNum = indices->e(i); auto current = listOfTensors.at(i); auto currentOut = listOfOutTensors.at(i); auto currentGradOut = listOfGradOuts.at(classNum); auto currentFFOut = listOfBPTensors.at(classNum); currentOut->assign((*currentFFOut) * (*currentGradOut) / (*current)); } //}; //samediff::Threads::parallel_for(func, 0, indices->lengthOf()); } return ND4J_STATUS_OK; } // -------------------------------------------------------------------------------------------------------------- // // Unsorted backpropagate segment ops // -------------------------------------------------------------------------------------------------------------- // template static int unsortedSegmentMaxFunctorBP_(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* gradOut, Nd4jLong numOfClasses, NDArray* output) { // int numOfClasses = gradOut->sizeAt(0); // if input is a vector: (as if in doc sample) auto tempRes = gradOut->dup(); unsortedSegmentMaxFunctor(context, input, indices, numOfClasses, &tempRes); if (input->isVector()) { for (Nd4jLong e = 0; e < input->lengthOf(); ++e) { Nd4jLong classNum = indices->e(e); if (nd4j::math::nd4j_abs(tempRes.e(classNum) - input->e(e)) < 1.e-5) output->p(e, gradOut->e(classNum)); } } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); ResultSet listOfBPTensors = tempRes.allTensorsAlongDimension(restDims); ResultSet listOfGradOuts = gradOut->allTensorsAlongDimension(restDims); ResultSet listOfTensors = input->allTensorsAlongDimension(restDims); ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims); for (int i = 0; i < indices->lengthOf(); i++) { Nd4jLong classNum = indices->e(i); NDArray* current = listOfTensors.at(i); NDArray* currentOut = listOfOutTensors.at(i); NDArray* currentGradOut = listOfGradOuts.at(classNum); for (int e = 0; e < current->lengthOf(); e++) { if (nd4j::math::nd4j_abs(listOfBPTensors.at(classNum)->e(e) - current->e(e)) < 1.e-5) currentOut->p(e, currentGradOut->e(e)); } } } return ND4J_STATUS_OK; } int unsortedSegmentMaxFunctorBP(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* gradOut, Nd4jLong numOfClasses, NDArray* output) { BUILD_SINGLE_SELECTOR(output->dataType(), return unsortedSegmentMaxFunctorBP_, (context, input, indices, gradOut, numOfClasses, output), NUMERIC_TYPES); } BUILD_SINGLE_TEMPLATE(template int unsortedSegmentMaxFunctorBP_, (nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* gradOut, Nd4jLong numOfClasses, NDArray* output), NUMERIC_TYPES); template static int unsortedSegmentMinFunctorBP_(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* gradOut, Nd4jLong numOfClasses, NDArray* output) { auto tempRes = gradOut->dup(); unsortedSegmentMinFunctor(context, input, indices, numOfClasses, &tempRes); if (input->isVector()) { auto func = PRAGMA_THREADS_FOR { for (auto e = start; e < stop; e += increment) { auto classNum = indices->e(e); if (nd4j::math::nd4j_abs(tempRes.t(classNum) - input->t(e)) < 1.e-6) output->t(e) = gradOut->t(classNum); } }; samediff::Threads::parallel_for(func, 0, input->lengthOf()); } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); ResultSet listOfBPTensors = tempRes.allTensorsAlongDimension(restDims); ResultSet listOfGradOuts = gradOut->allTensorsAlongDimension(restDims); ResultSet listOfTensors = input->allTensorsAlongDimension(restDims); ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims); //auto func = PRAGMA_THREADS_FOR { for (auto i = 0; i < indices->lengthOf(); i++) { auto classNum = indices->e(i); auto current = listOfTensors.at(i); auto currentOut = listOfOutTensors.at(i); auto currentGradOut = listOfGradOuts.at(classNum); for (int e = 0; e < current->lengthOf(); e++) { if (nd4j::math::nd4j_abs(listOfBPTensors.at(classNum)->t(e) - current->t(e)) < 1.e-6) currentOut->t(e) = currentGradOut->t(e); } } //}; //samediff::Threads::parallel_for(func, 0, indices->lengthOf()); } return ND4J_STATUS_OK; } int unsortedSegmentMinFunctorBP(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* gradOut, Nd4jLong numOfClasses, NDArray* output) { BUILD_SINGLE_SELECTOR(output->dataType(), return unsortedSegmentMinFunctorBP_, (context, input, indices, gradOut, numOfClasses, output), NUMERIC_TYPES); } BUILD_SINGLE_TEMPLATE(template int unsortedSegmentMinFunctorBP_, (nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* gradOut, Nd4jLong numOfClasses, NDArray* output), NUMERIC_TYPES); int unsortedSegmentMeanFunctorBP(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* gradOut, Nd4jLong numOfClasses, NDArray* output) { std::map classCount;//(numClasses); for (Nd4jLong count = 0; count < numOfClasses; ++count) { classCount[count] = 0; } for (Nd4jLong e = 0; e < indices->lengthOf(); ++e) { classCount[indices->e(e)]++; } // if input is a vector: (as if in doc sample) if (input->isVector()) { for (Nd4jLong e = 0; e < indices->lengthOf(); ++e) { Nd4jLong classNum = indices->e(e); output->p(e, gradOut->e(classNum) / classCount[classNum]); } } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); ResultSet listOfGradOuts = gradOut->allTensorsAlongDimension(restDims); ResultSet listOfTensors = input->allTensorsAlongDimension(restDims); ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims); for (int i = 0; i < indices->lengthOf(); i++) { Nd4jLong classNum = indices->e(i); NDArray* current = listOfTensors.at(i); NDArray* currentOut = listOfOutTensors.at(i); NDArray* currentGradOut = listOfGradOuts.at(classNum); currentOut->assign(*currentGradOut / double(classCount[classNum])); } } return ND4J_STATUS_OK; } int unsortedSegmentSumFunctorBP(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* gradOut, Nd4jLong numOfClasses, NDArray* output) { // if input is a vector: (as if in doc sample) Nd4jLong idx = indices->e(0); if (input->isVector()) { for (Nd4jLong e = 0; e < indices->lengthOf(); ++e) { Nd4jLong classNum = indices->e(e); output->p(e, gradOut->e(classNum)); } } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); ResultSet listOfGradOuts = gradOut->allTensorsAlongDimension(restDims); ResultSet listOfTensors = input->allTensorsAlongDimension(restDims); ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims); //auto func = PRAGMA_THREADS_FOR { for (auto i = 0; i < indices->lengthOf(); i++) { auto classNum = indices->e(i); auto currentOut = listOfOutTensors.at(i); auto currentGradOut = listOfGradOuts.at(classNum); currentOut->assign(currentGradOut); } //}; //samediff::Threads::parallel_for(func, 0, indices->lengthOf()); } return Status::OK(); } int unsortedSegmentProdFunctorBP(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* gradOut, Nd4jLong numOfClasses, NDArray* output) { auto tempRes = gradOut->dup(); unsortedSegmentProdFunctor(context, input, indices, numOfClasses, &tempRes); if (input->isVector()) { auto func = PRAGMA_THREADS_FOR { for (auto e = start; e < stop; e += increment) { auto classNum = indices->e(e); output->p(e, gradOut->e(classNum) * tempRes.e(classNum) / input->e(e)); } }; samediff::Threads::parallel_for(func, 0, indices->lengthOf()); } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); ResultSet listOfBPTensors = tempRes.allTensorsAlongDimension(restDims); ResultSet listOfGradOuts = gradOut->allTensorsAlongDimension(restDims); ResultSet listOfTensors = input->allTensorsAlongDimension(restDims); ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims); //auto func = PRAGMA_THREADS_FOR { for (auto i = 0; i < indices->lengthOf(); i++) { auto classNum = indices->e(i); auto current = listOfTensors.at(i); auto currentOut = listOfOutTensors.at(i); auto currentGradOut = listOfGradOuts.at(classNum); auto currentFFOut = listOfBPTensors.at(classNum); currentOut->assign((*currentFFOut) * (*currentGradOut) / (*current)); } //}; //samediff::Threads::parallel_for(func, 0, indices->lengthOf()); } return Status::OK(); } // template int unsortedSegmentSqrtNFunctorBP(nd4j::LaunchContext * context, NDArray* input, NDArray* indices, NDArray* gradOut, Nd4jLong numOfClasses, NDArray* output) { std::map classCount;//(numClasses); for (Nd4jLong count = 0; count < numOfClasses; ++count) { classCount[count] = 0; } for (Nd4jLong e = 0; e < indices->lengthOf(); ++e) { classCount[indices->e(e)]++; } // if input is a vector: (as if in doc sample) if (input->isVector()) { //auto func = PRAGMA_THREADS_FOR { for (auto e = 0; e < indices->lengthOf(); e++) { auto classNum = indices->e(e); output->p(e, gradOut->e(classNum) / nd4j::math::nd4j_sqrt(classCount[classNum])); } //}; //samediff::Threads::parallel_for(func, 0, indices->lengthOf()); } else { auto restDims = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); ResultSet listOfGradOuts =gradOut->allTensorsAlongDimension(restDims); ResultSet listOfTensors =input->allTensorsAlongDimension(restDims); ResultSet listOfOutTensors =output->allTensorsAlongDimension(restDims); //auto func = PRAGMA_THREADS_FOR { for (auto i = 0; i < indices->lengthOf(); i++) { auto classNum = indices->e(i); auto current = listOfTensors.at(i); auto currentOut = listOfOutTensors.at(i); auto currentGradOut = listOfGradOuts.at(classNum); for (int e = 0; e < current->lengthOf(); e++) { currentOut->p(e, currentGradOut->e(e) / nd4j::math::nd4j_sqrt(classCount[classNum])); } } //}; //samediff::Threads::parallel_for(func, 0, indices->lengthOf()); } return Status::OK(); } } } }