/* ****************************************************************************** * * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at * https://www.apache.org/licenses/LICENSE-2.0. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. * * SPDX-License-Identifier: Apache-2.0 ******************************************************************************/ /* * * Created on: Dec 28, 2015 * Author: agibsonccc */ #ifndef INDEXREDUCE_H_ #define INDEXREDUCE_H_ #include #ifdef _OPENMP #include #endif #include #include #include #include #include #ifdef __CUDACC__ #include #include #endif #include #include "system/pairwise_util.h" #include "legacy_ops.h" namespace functions { namespace indexreduce { template class IndexReduce { public: #ifdef __CUDABLAS__ static __device__ void transform(int opNum, const void *x, const Nd4jLong *xShapeInfo, void *extraParams, void *result, const Nd4jLong *resultShapeInfo, int *dimension,int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset); template static __device__ void aggregatePartials(IndexValue *sPartialsRef, Nd4jLong tid, Nd4jLong numElements, void *extraParams); template static __device__ void transform(const void *dx, const Nd4jLong *xShapeInfo, void *extraParams, void *result, const Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets); static _CUDA_H void executeIndexReduceScalar(dim3 launchDims, cudaStream_t *stream, int op, const void *dx, const Nd4jLong *xShapeInfo, int xRank, void *extraParams, void *result, const Nd4jLong *resultShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets); static _CUDA_H void executeIndexReduce(dim3 launchDims, cudaStream_t *stream, int op, const void *dx, const Nd4jLong *xShapeInfo, int xRank, void *extraParams, void *result, const Nd4jLong *resultShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets); #else static Nd4jLong execScalar(int opNum, const void *x, const Nd4jLong *xShapeInfo, void *extraParams); static void exec(int opNum, const void *x, const Nd4jLong *xShapeInfo, void *extraParams, void *result, const Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset); template static _CUDA_H Nd4jLong execScalar(const void *x, const Nd4jLong *xShapeInfo, void *extraParams); template static _CUDA_H void exec(const void *x, const Nd4jLong *xShapeInfo, void *extraParams, void *result, const Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset); #endif }; } } #endif /* INDEXREDUCE_H_ */