cavis/libnd4j/include/loops/cuda/pairwise_int.cu

/*******************************************************************************
 * Copyright (c) 2015-2018 Skymind, Inc.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Apache License, Version 2.0 which is available at
 * https://www.apache.org/licenses/LICENSE-2.0.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 *
 * SPDX-License-Identifier: Apache-2.0
 ******************************************************************************/

//  @author raver119@gmail.com
// @author Yurii Shyrma (iuriish@yahoo.com), created on 08.11.2018

#ifndef PAIRWISE_INT_CU
#define PAIRWISE_INT_CU


#include "../pairwise_int.h"


using namespace simdOps;

////////////////////////////////////////////////////////////////////////////////
template <typename X, typename OpType>
__global__ static void pairwiseSimpleShaped(void* vx, Nd4jLong *xShapeInfo,
											void *vy, Nd4jLong *yShapeInfo,
											void *vz, Nd4jLong *zShapeInfo,
											void *vextraParams) {

	auto x = reinterpret_cast<X*>(vx);
	auto y = reinterpret_cast<X*>(vy);
	auto z = reinterpret_cast<X*>(vz);
	auto extraParams = reinterpret_cast<X*>(vextraParams);

	int tid = blockIdx.x * blockDim.x + threadIdx.x;

	__shared__ int xEws;
	__shared__ int yEws;
	__shared__ int zEws;
	__shared__ char xOrder;
	__shared__ char yOrder;
	__shared__ char zOrder;
	__shared__ Nd4jLong len;

	if (threadIdx.x == 0) {
		xEws = shape::elementWiseStride(xShapeInfo);
		yEws = shape::elementWiseStride(yShapeInfo);
    	zEws = shape::elementWiseStride(zShapeInfo);
		xOrder = shape::order(xShapeInfo);
		yOrder = shape::order(yShapeInfo);
		zOrder = shape::order(zShapeInfo);
		len = shape::length(xShapeInfo);
	}
	__syncthreads();


	if (xEws >= 1 && yEws >= 1 && zEws >= 1 && xOrder == yOrder && xOrder == zOrder) {
		for (Nd4jLong i = tid; i < len; i += gridDim.x * blockDim.x) {
			z[i * zEws] = OpType::op(x[i * xEws], y[i * yEws], extraParams);
		}
	}
	else if (vx == vz) {
		for (Nd4jLong i = tid; i < len; i += gridDim.x * blockDim.x) {
			auto xOffset = shape::getIndexOffset(i, xShapeInfo);
			auto yOffset = shape::getIndexOffset(i, yShapeInfo);

			z[xOffset] = OpType::op(x[xOffset], y[yOffset], extraParams);
		}
	}
	else {
		for (Nd4jLong i = tid; i < len; i += gridDim.x * blockDim.x) {
			auto xOffset = shape::getIndexOffset(i, xShapeInfo);
			auto yOffset = shape::getIndexOffset(i, yShapeInfo);
			auto zOffset = shape::getIndexOffset(i, zShapeInfo);

			z[zOffset] = OpType::op(x[xOffset], y[yOffset], extraParams);
		}
	}
}


namespace functions           {
namespace pairwise_transforms {

////////////////////////////////////////////////////////////////////////////////
template<typename X>
template<typename OpType>
void _CUDA_H PairWiseIntTransform<X>::intermediateShaped(dim3& launchDims, cudaStream_t *stream,
														void *vx, Nd4jLong *xShapeInfo,
														void *vy, Nd4jLong *yShapeInfo,
														void *vz, Nd4jLong *zShapeInfo,
														void *vextraParams){

	pairwiseSimpleShaped<X, OpType><<<launchDims.x, launchDims.y, launchDims.z, *stream>>>(vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, vextraParams);
}


////////////////////////////////////////////////////////////////////////////////
template<typename X>
void PairWiseIntTransform<X>::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vextraParams) {
    auto xType = sd::DataTypeUtils::fromT<X>();

	DISPATCH_BY_OPNUM_T(intermediateShaped, PARAMS(launchDims, stream, vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, vextraParams), PAIRWISE_INT_OPS);
}

    BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT PairWiseIntTransform, , INTEGER_TYPES);
}
}

#endif // PAIRWISE_INT_CU
[WIP] Int broadcastables (#195) * Removed invalid resource and fixed tests Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * legacy scalar/pairwise/broadcast int ops Signed-off-by: raver119 <raver119@gmail.com> * NDArray int broadcastables Signed-off-by: raver119 <raver119@gmail.com> * few more bitwise tests Signed-off-by: raver119 <raver119@gmail.com> * java side update Signed-off-by: raver119 <raver119@gmail.com> * Argument type changed for shift ops Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * legacy scalar/pairwise/broadcast int ops Signed-off-by: raver119 <raver119@gmail.com> * NDArray int broadcastables Signed-off-by: raver119 <raver119@gmail.com> * few more bitwise tests Signed-off-by: raver119 <raver119@gmail.com> * java side update Signed-off-by: raver119 <raver119@gmail.com> * Argument type changed for shift ops Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> 2019-08-30 10:12:40 +03:00			`/*******************************************************************************`
			`* Copyright (c) 2015-2018 Skymind, Inc.`
			`*`
			`* This program and the accompanying materials are made available under the`
			`* terms of the Apache License, Version 2.0 which is available at`
			`* https://www.apache.org/licenses/LICENSE-2.0.`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT`
			`* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the`
			`* License for the specific language governing permissions and limitations`
			`* under the License.`
			`*`
			`* SPDX-License-Identifier: Apache-2.0`
			`******************************************************************************/`

			`// @author raver119@gmail.com`
			`// @author Yurii Shyrma (iuriish@yahoo.com), created on 08.11.2018`

			`#ifndef PAIRWISE_INT_CU`
			`#define PAIRWISE_INT_CU`


			`#include "../pairwise_int.h"`


			`using namespace simdOps;`

			`////////////////////////////////////////////////////////////////////////////////`
			`template <typename X, typename OpType>`
			`__global__ static void pairwiseSimpleShaped(void* vx, Nd4jLong *xShapeInfo,`
			`void vy, Nd4jLong yShapeInfo,`
			`void vz, Nd4jLong zShapeInfo,`
			`void *vextraParams) {`

			`auto x = reinterpret_cast<X*>(vx);`
			`auto y = reinterpret_cast<X*>(vy);`
			`auto z = reinterpret_cast<X*>(vz);`
			`auto extraParams = reinterpret_cast<X*>(vextraParams);`

			`int tid = blockIdx.x * blockDim.x + threadIdx.x;`

			`__shared__ int xEws;`
			`__shared__ int yEws;`
			`__shared__ int zEws;`
			`__shared__ char xOrder;`
			`__shared__ char yOrder;`
			`__shared__ char zOrder;`
			`__shared__ Nd4jLong len;`

			`if (threadIdx.x == 0) {`
			`xEws = shape::elementWiseStride(xShapeInfo);`
			`yEws = shape::elementWiseStride(yShapeInfo);`
			`zEws = shape::elementWiseStride(zShapeInfo);`
			`xOrder = shape::order(xShapeInfo);`
			`yOrder = shape::order(yShapeInfo);`
			`zOrder = shape::order(zShapeInfo);`
			`len = shape::length(xShapeInfo);`
			`}`
			`__syncthreads();`


			`if (xEws >= 1 && yEws >= 1 && zEws >= 1 && xOrder == yOrder && xOrder == zOrder) {`
			`for (Nd4jLong i = tid; i < len; i += gridDim.x * blockDim.x) {`
			`z[i * zEws] = OpType::op(x[i * xEws], y[i * yEws], extraParams);`
			`}`
			`}`
			`else if (vx == vz) {`
			`for (Nd4jLong i = tid; i < len; i += gridDim.x * blockDim.x) {`
[WIP] bunch of improvements (#257) * - profiling bias_add op - add some docementation Signed-off-by: Yurii <yurii@skymind.io> * - minor change Signed-off-by: Yurii <yurii@skymind.io> * - provide addBias cuda kernel Signed-off-by: Yurii <yurii@skymind.io> * - improve shape::getIndexOfffset and change its signature Signed-off-by: Yurii <yurii@skymind.io> * - same as previous Signed-off-by: Yurii <yurii@skymind.io> * - improve and change signature in some shape:: stuff which has to do with calculation of offsets for array elements Signed-off-by: Yurii <yurii@skymind.io> * - minor changes in flatten Signed-off-by: Yurii <shyrma@skymind.io> * - add function shape::getIndexOffsetOrdered Signed-off-by: Yurii <shyrma@skymind.io> * - correct shape::getIndexOffsetOrdered() Signed-off-by: Yurii <shyrma@skymind.io> * - move getIndexOffsetOrdered to flatten.h header in order to isolate this function Signed-off-by: Yurii <shyrma@skymind.io> 2019-09-11 20:12:09 +03:00			`auto xOffset = shape::getIndexOffset(i, xShapeInfo);`
			`auto yOffset = shape::getIndexOffset(i, yShapeInfo);`

[WIP] Int broadcastables (#195) * Removed invalid resource and fixed tests Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * legacy scalar/pairwise/broadcast int ops Signed-off-by: raver119 <raver119@gmail.com> * NDArray int broadcastables Signed-off-by: raver119 <raver119@gmail.com> * few more bitwise tests Signed-off-by: raver119 <raver119@gmail.com> * java side update Signed-off-by: raver119 <raver119@gmail.com> * Argument type changed for shift ops Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * legacy scalar/pairwise/broadcast int ops Signed-off-by: raver119 <raver119@gmail.com> * NDArray int broadcastables Signed-off-by: raver119 <raver119@gmail.com> * few more bitwise tests Signed-off-by: raver119 <raver119@gmail.com> * java side update Signed-off-by: raver119 <raver119@gmail.com> * Argument type changed for shift ops Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> 2019-08-30 10:12:40 +03:00			`z[xOffset] = OpType::op(x[xOffset], y[yOffset], extraParams);`
			`}`
			`}`
			`else {`
			`for (Nd4jLong i = tid; i < len; i += gridDim.x * blockDim.x) {`
[WIP] bunch of improvements (#257) * - profiling bias_add op - add some docementation Signed-off-by: Yurii <yurii@skymind.io> * - minor change Signed-off-by: Yurii <yurii@skymind.io> * - provide addBias cuda kernel Signed-off-by: Yurii <yurii@skymind.io> * - improve shape::getIndexOfffset and change its signature Signed-off-by: Yurii <yurii@skymind.io> * - same as previous Signed-off-by: Yurii <yurii@skymind.io> * - improve and change signature in some shape:: stuff which has to do with calculation of offsets for array elements Signed-off-by: Yurii <yurii@skymind.io> * - minor changes in flatten Signed-off-by: Yurii <shyrma@skymind.io> * - add function shape::getIndexOffsetOrdered Signed-off-by: Yurii <shyrma@skymind.io> * - correct shape::getIndexOffsetOrdered() Signed-off-by: Yurii <shyrma@skymind.io> * - move getIndexOffsetOrdered to flatten.h header in order to isolate this function Signed-off-by: Yurii <shyrma@skymind.io> 2019-09-11 20:12:09 +03:00			`auto xOffset = shape::getIndexOffset(i, xShapeInfo);`
			`auto yOffset = shape::getIndexOffset(i, yShapeInfo);`
			`auto zOffset = shape::getIndexOffset(i, zShapeInfo);`
[WIP] Int broadcastables (#195) * Removed invalid resource and fixed tests Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * legacy scalar/pairwise/broadcast int ops Signed-off-by: raver119 <raver119@gmail.com> * NDArray int broadcastables Signed-off-by: raver119 <raver119@gmail.com> * few more bitwise tests Signed-off-by: raver119 <raver119@gmail.com> * java side update Signed-off-by: raver119 <raver119@gmail.com> * Argument type changed for shift ops Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * legacy scalar/pairwise/broadcast int ops Signed-off-by: raver119 <raver119@gmail.com> * NDArray int broadcastables Signed-off-by: raver119 <raver119@gmail.com> * few more bitwise tests Signed-off-by: raver119 <raver119@gmail.com> * java side update Signed-off-by: raver119 <raver119@gmail.com> * Argument type changed for shift ops Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> 2019-08-30 10:12:40 +03:00
			`z[zOffset] = OpType::op(x[xOffset], y[yOffset], extraParams);`
			`}`
			`}`
			`}`


			`namespace functions {`
			`namespace pairwise_transforms {`

			`////////////////////////////////////////////////////////////////////////////////`
			`template<typename X>`
			`template<typename OpType>`
			`void _CUDA_H PairWiseIntTransform<X>::intermediateShaped(dim3& launchDims, cudaStream_t *stream,`
			`void vx, Nd4jLong xShapeInfo,`
			`void vy, Nd4jLong yShapeInfo,`
			`void vz, Nd4jLong zShapeInfo,`
			`void *vextraParams){`

			`pairwiseSimpleShaped<X, OpType><<<launchDims.x, launchDims.y, launchDims.z, *stream>>>(vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, vextraParams);`
			`}`


			`////////////////////////////////////////////////////////////////////////////////`
			`template<typename X>`
			`void PairWiseIntTransform<X>::executeCudaShaped(dim3& launchDims, cudaStream_t stream, int opNum, void vx, Nd4jLong xShapeInfo, void vy, Nd4jLong yShapeInfo, void vz, Nd4jLong zShapeInfo, void vextraParams) {`
libnd4j polishing (#273) * initial set of include changes Signed-off-by: raver119 <raver119@gmail.com> * one more tweak Signed-off-by: raver119 <raver119@gmail.com> * few more rearrangements Signed-off-by: raver119 <raver119@gmail.com> * few more rearrangements Signed-off-by: raver119 <raver119@gmail.com> * few more rearrangements Signed-off-by: raver119 <raver119@gmail.com> * cuda includes rearrangements Signed-off-by: raver119 <raver119@gmail.com> * java update Signed-off-by: raver119 <raver119@gmail.com> * = namespace changed to sd - few CMake variables renamed with SD_ prefix Signed-off-by: raver119 <raver119@gmail.com> * java update Signed-off-by: raver119 <raver119@gmail.com> * LoopKind minor fix Signed-off-by: raver119 <raver119@gmail.com> * few more changes Signed-off-by: raver119 <raver119@gmail.com> * few more changes Signed-off-by: raver119 <raver119@gmail.com> * few more changes Signed-off-by: raver119 <raver119@gmail.com> * sanitizer is optional now Signed-off-by: raver119 <raver119@gmail.com> * dev tests updated Signed-off-by: raver119 <raver119@gmail.com> * few more changes Signed-off-by: raver119 <raver119@gmail.com> * last update Signed-off-by: raver119 <raver119@gmail.com> * java update Signed-off-by: raver119 <raver119@gmail.com> 2020-03-02 12:49:41 +03:00			`auto xType = sd::DataTypeUtils::fromT<X>();`
[WIP] Int broadcastables (#195) * Removed invalid resource and fixed tests Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * legacy scalar/pairwise/broadcast int ops Signed-off-by: raver119 <raver119@gmail.com> * NDArray int broadcastables Signed-off-by: raver119 <raver119@gmail.com> * few more bitwise tests Signed-off-by: raver119 <raver119@gmail.com> * java side update Signed-off-by: raver119 <raver119@gmail.com> * Argument type changed for shift ops Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * legacy scalar/pairwise/broadcast int ops Signed-off-by: raver119 <raver119@gmail.com> * NDArray int broadcastables Signed-off-by: raver119 <raver119@gmail.com> * few more bitwise tests Signed-off-by: raver119 <raver119@gmail.com> * java side update Signed-off-by: raver119 <raver119@gmail.com> * Argument type changed for shift ops Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> 2019-08-30 10:12:40 +03:00
			`DISPATCH_BY_OPNUM_T(intermediateShaped, PARAMS(launchDims, stream, vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, vextraParams), PAIRWISE_INT_OPS);`
			`}`

			`BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT PairWiseIntTransform, , INTEGER_TYPES);`
			`}`
			`}`

			`#endif // PAIRWISE_INT_CU`