cavis/libnd4j/include/loops/cuda/inplace_loops/scalar_inplace.h

/*******************************************************************************
 * Copyright (c) 2015-2018 Skymind, Inc.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Apache License, Version 2.0 which is available at
 * https://www.apache.org/licenses/LICENSE-2.0.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 *
 * SPDX-License-Identifier: Apache-2.0
 ******************************************************************************/

//
//  @author raver119@gmail.com
//


#ifndef DEV_TESTS_SCALAR_INPLACE_H
#define DEV_TESTS_SCALAR_INPLACE_H

#include <ops.h>
#include <types/types.h>
#include <op_boilerplate.h>
#include <shape.h>

using namespace simdOps;

namespace functions {
    namespace scalar {
        template <typename X, typename Y, typename Z>
        class ScalarInplace {
        public:
            static FORCEINLINE _CUDA_D void transformCudaLegacy(int opNum, void* vscalar, void *vy, Nd4jLong *yShapeInfo, void *vparams, void *vz, Nd4jLong *zShapeInfo, int *allocationBuffer);

            template <typename OpClass>
            static FORCEINLINE _CUDA_D void transformCuda(void* vscalar, void *vy, Nd4jLong *yShapeInfo, void *vparams, void *vz, Nd4jLong *zShapeInfo, int *allocationBuffer);
        };

        template<typename X, typename Y, typename Z>
        FORCEINLINE _CUDA_D void ScalarInplace<X,Y,Z>::transformCudaLegacy(int opNum, void* vscalar,
                                                                    void *vy, Nd4jLong *yShapeInfo,
                                                                    void *vparams,
                                                                    void *vz, Nd4jLong *zShapeInfo,
                                                                    int *allocationBuffer) {

            DISPATCH_BY_OPNUM_TTT(transformCuda, PARAMS(vscalar, vy, yShapeInfo, vparams, vz, zShapeInfo, allocationBuffer), SCALAR_OPS);
        }

        template<typename X, typename Y, typename Z>
        template<typename OpType>
        FORCEINLINE _CUDA_D void ScalarInplace<X,Y,Z>::transformCuda(void* vscalar,
                                                              void *vy, Nd4jLong *yShapeInfo,
                                                              void *vparams,
                                                              void *vz, Nd4jLong *zShapeInfo,
                                                              int *allocationBuffer) {

            auto scalar = reinterpret_cast<X*>(vscalar)[0];
            auto y      = reinterpret_cast<Y*>(vy);
            auto params = reinterpret_cast<Z*>(vparams);
            auto z = reinterpret_cast<Z*>(vz);

            int totalThreads = gridDim.x * blockDim.x;
            int tid = blockIdx.x * blockDim.x + threadIdx.x;

            __shared__ Nd4jLong length;
            if(threadIdx.x == 0)
                length = shape::length(yShapeInfo);
            __syncthreads();


            for (Nd4jLong i = tid; i < length; i+= totalThreads) {
                z[shape::getIndexOffset(i, zShapeInfo, length)] = OpType::op(y[shape::getIndexOffset(i, yShapeInfo, length)], scalar, params);
            }
        }
    }
}

#endif //DEV_TESTS_SCALAR_INPLACE_H
Eclipse Migration Initial Commit 2019-06-06 14:21:15 +02:00			`/*******************************************************************************`
			`* Copyright (c) 2015-2018 Skymind, Inc.`
			`*`
			`* This program and the accompanying materials are made available under the`
			`* terms of the Apache License, Version 2.0 which is available at`
			`* https://www.apache.org/licenses/LICENSE-2.0.`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT`
			`* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the`
			`* License for the specific language governing permissions and limitations`
			`* under the License.`
			`*`
			`* SPDX-License-Identifier: Apache-2.0`
			`******************************************************************************/`

			`//`
			`// @author raver119@gmail.com`
			`//`


			`#ifndef DEV_TESTS_SCALAR_INPLACE_H`
			`#define DEV_TESTS_SCALAR_INPLACE_H`

			`#include <ops.h>`
			`#include <types/types.h>`
			`#include <op_boilerplate.h>`
			`#include <shape.h>`

			`using namespace simdOps;`

			`namespace functions {`
			`namespace scalar {`
			`template <typename X, typename Y, typename Z>`
			`class ScalarInplace {`
			`public:`
			`static FORCEINLINE _CUDA_D void transformCudaLegacy(int opNum, void* vscalar, void vy, Nd4jLong yShapeInfo, void vparams, void vz, Nd4jLong zShapeInfo, int allocationBuffer);`

			`template <typename OpClass>`
			`static FORCEINLINE _CUDA_D void transformCuda(void* vscalar, void vy, Nd4jLong yShapeInfo, void vparams, void vz, Nd4jLong zShapeInfo, int allocationBuffer);`
			`};`

			`template<typename X, typename Y, typename Z>`
			`FORCEINLINE _CUDA_D void ScalarInplace<X,Y,Z>::transformCudaLegacy(int opNum, void* vscalar,`
			`void vy, Nd4jLong yShapeInfo,`
			`void *vparams,`
			`void vz, Nd4jLong zShapeInfo,`
			`int *allocationBuffer) {`

			`DISPATCH_BY_OPNUM_TTT(transformCuda, PARAMS(vscalar, vy, yShapeInfo, vparams, vz, zShapeInfo, allocationBuffer), SCALAR_OPS);`
			`}`

			`template<typename X, typename Y, typename Z>`
			`template<typename OpType>`
			`FORCEINLINE _CUDA_D void ScalarInplace<X,Y,Z>::transformCuda(void* vscalar,`
			`void vy, Nd4jLong yShapeInfo,`
			`void *vparams,`
			`void vz, Nd4jLong zShapeInfo,`
			`int *allocationBuffer) {`

			`auto scalar = reinterpret_cast<X*>(vscalar)[0];`
			`auto y = reinterpret_cast<Y*>(vy);`
			`auto params = reinterpret_cast<Z*>(vparams);`
			`auto z = reinterpret_cast<Z*>(vz);`

			`int totalThreads = gridDim.x * blockDim.x;`
			`int tid = blockIdx.x * blockDim.x + threadIdx.x;`

			`__shared__ Nd4jLong length;`
			`if(threadIdx.x == 0)`
			`length = shape::length(yShapeInfo);`
			`__syncthreads();`


			`for (Nd4jLong i = tid; i < length; i+= totalThreads) {`
			`z[shape::getIndexOffset(i, zShapeInfo, length)] = OpType::op(y[shape::getIndexOffset(i, yShapeInfo, length)], scalar, params);`
			`}`
			`}`
			`}`
			`}`

			`#endif //DEV_TESTS_SCALAR_INPLACE_H`