diff --git a/libnd4j/blas/cpu/NDArrayFactory.cpp b/libnd4j/blas/cpu/NDArrayFactory.cpp index ec99ef7db..d8b686b12 100644 --- a/libnd4j/blas/cpu/NDArrayFactory.cpp +++ b/libnd4j/blas/cpu/NDArrayFactory.cpp @@ -172,7 +172,9 @@ template void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); + template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); + template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); diff --git a/libnd4j/include/array/impl/NDArrayList.cpp b/libnd4j/include/array/impl/NDArrayList.cpp index 1762565a1..75df72e70 100644 --- a/libnd4j/include/array/impl/NDArrayList.cpp +++ b/libnd4j/include/array/impl/NDArrayList.cpp @@ -137,8 +137,8 @@ namespace nd4j { auto newAxis = ShapeUtils::evalDimsToExclude(array->rankOf(), args); auto result = array->allTensorsAlongDimension(newAxis); for (int e = 0; e < result->size(); e++) { - auto chunk = result->at(e)->dup(array->ordering()); - write(e, chunk); + auto chunk = result->at(e);//->dup(array->ordering()); + write(e, chunk->dup(array->ordering())); } delete result; } diff --git a/libnd4j/include/op_boilerplate.h b/libnd4j/include/op_boilerplate.h index ad37a1618..ac30b28d8 100644 --- a/libnd4j/include/op_boilerplate.h +++ b/libnd4j/include/op_boilerplate.h @@ -1328,7 +1328,7 @@ REGISTER_C(NAME) \ nd4j::ShapeList* nd4j::ops::NAME::calculateOutputShape(nd4j::ShapeList* inputShape, nd4j::graph::Context& block) { \ auto shapeList = SHAPELIST(); \ - for (int e = 0; e < this->getOpDescriptor()->getNumberOfOutputs(); e++) { \ + for (int e = 0; e < block.width(); e++) { \ auto newshape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e))); \ shapeList->push_back(newshape); \ } \ @@ -1365,7 +1365,7 @@ REGISTER_C(NAME) \ nd4j::ShapeList* nd4j::ops::NAME::calculateOutputShape(nd4j::ShapeList* inputShape, nd4j::graph::Context& block) { \ auto shapeList = SHAPELIST(); \ - for (int e = 0; e < this->getOpDescriptor()->getNumberOfOutputs(); e++) { \ + for (int e = 0; e < block.width(); e++) { \ Nd4jLong* newshape; \ COPY_SHAPE(inputShape->at(0), newshape); \ shapeList->push_back(CONSTANT(newshape)); \ @@ -1388,7 +1388,7 @@ REGISTER_C(NAME) \ nd4j::ShapeList* nd4j::ops::NAME::calculateOutputShape(nd4j::ShapeList* inputShape, nd4j::graph::Context& block) { \ auto shapeList = SHAPELIST(); \ - for (int e = 0; e < this->getOpDescriptor()->getNumberOfOutputs(); e++) { \ + for (int e = 0; e < block.width(); e++) { \ auto newshape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inputShape->at(e)), shape::order(inputShape->at(e)), shape::rank(inputShape->at(e)), shape::shapeOf(inputShape->at(e))); \ shapeList->push_back(newshape); \ } \ diff --git a/libnd4j/include/ops/declarable/generic/bitwise/toggle_bits.cpp b/libnd4j/include/ops/declarable/generic/bitwise/toggle_bits.cpp index d554d46ef..1813946d0 100644 --- a/libnd4j/include/ops/declarable/generic/bitwise/toggle_bits.cpp +++ b/libnd4j/include/ops/declarable/generic/bitwise/toggle_bits.cpp @@ -27,14 +27,14 @@ namespace nd4j { namespace ops { - OP_IMPL(toggle_bits, -1, -1, true) { + OP_IMPL(toggle_bits, -1, 1, true) { for (int i = 0; i < block.width(); i++) { auto x = INPUT_VARIABLE(i); auto z = OUTPUT_VARIABLE(i); REQUIRE_TRUE(x->dataType() == z->dataType(), 0, "Toggle bits requires input and output to have same type"); - REQUIRE_TRUE(x->isR(),0, "Toggle bits requires input and output to be integer type (int8, int16, int32, int64)"); + REQUIRE_TRUE(x->isZ(),0, "Toggle bits requires input and output to be integer type (int8, int16, int32, int64)"); helpers::__toggle_bits(block.launchContext(), *x, *z); } @@ -44,7 +44,8 @@ namespace nd4j { DECLARE_TYPES(toggle_bits) { getOpDescriptor() ->setAllowedInputTypes({ALL_INTS}) - ->setSameMode(true); + ->setAllowedOutputTypes({ALL_INTS}) + ->setSameMode(false); } } } diff --git a/libnd4j/include/ops/declarable/generic/datatypes/to_double.cpp b/libnd4j/include/ops/declarable/generic/datatypes/to_double.cpp index 59a7b7546..2cee857fa 100644 --- a/libnd4j/include/ops/declarable/generic/datatypes/to_double.cpp +++ b/libnd4j/include/ops/declarable/generic/datatypes/to_double.cpp @@ -25,7 +25,7 @@ namespace nd4j { namespace ops { - OP_IMPL(to_double, 1, 1, true) { + CUSTOM_OP_IMPL(to_double, 1, 1, true, 0, 0) { auto input = INPUT_VARIABLE(0); auto output = OUTPUT_VARIABLE(0); @@ -42,6 +42,12 @@ namespace nd4j { ->setAllowedInputTypes(nd4j::DataType::ANY) ->setAllowedOutputTypes(nd4j::DataType::DOUBLE); } + + DECLARE_SHAPE_FN(to_double) { + auto outShape = ShapeBuilders::copyShapeInfoAndType(inputShape->at(0), DataType::DOUBLE, true, block.workspace()); + return SHAPELIST(outShape); + } + } } diff --git a/libnd4j/include/ops/declarable/generic/datatypes/to_float16.cpp b/libnd4j/include/ops/declarable/generic/datatypes/to_float16.cpp index 8cdd38e4f..d3e2f3cd0 100644 --- a/libnd4j/include/ops/declarable/generic/datatypes/to_float16.cpp +++ b/libnd4j/include/ops/declarable/generic/datatypes/to_float16.cpp @@ -25,7 +25,7 @@ namespace nd4j { namespace ops { - OP_IMPL(to_float16, 1, 1, true) { + CUSTOM_OP_IMPL(to_float16, 1, 1, true, 0, 0) { auto input = INPUT_VARIABLE(0); auto output = OUTPUT_VARIABLE(0); @@ -42,6 +42,12 @@ namespace nd4j { ->setAllowedInputTypes(nd4j::DataType::ANY) ->setAllowedOutputTypes(nd4j::DataType::HALF); } + + DECLARE_SHAPE_FN(to_float16) { + auto outShape = ShapeBuilders::copyShapeInfoAndType(inputShape->at(0), DataType::HALF, true, block.workspace()); + return SHAPELIST(outShape); + } + } } diff --git a/libnd4j/include/ops/declarable/generic/datatypes/to_float32.cpp b/libnd4j/include/ops/declarable/generic/datatypes/to_float32.cpp index 3fdcafaab..c558f4d78 100644 --- a/libnd4j/include/ops/declarable/generic/datatypes/to_float32.cpp +++ b/libnd4j/include/ops/declarable/generic/datatypes/to_float32.cpp @@ -25,7 +25,7 @@ namespace nd4j { namespace ops { - OP_IMPL(to_float32, 1, 1, true) { + CUSTOM_OP_IMPL(to_float32, 1, 1, true, 0, 0) { auto input = INPUT_VARIABLE(0); auto output = OUTPUT_VARIABLE(0); @@ -42,6 +42,12 @@ namespace nd4j { ->setAllowedInputTypes(nd4j::DataType::ANY) ->setAllowedOutputTypes(nd4j::DataType::FLOAT32); } + + DECLARE_SHAPE_FN(to_float32) { + auto outShape = ShapeBuilders::copyShapeInfoAndType(inputShape->at(0), DataType::FLOAT32, true, block.workspace()); + return SHAPELIST(outShape); + } + } } diff --git a/libnd4j/include/ops/declarable/generic/datatypes/to_int32.cpp b/libnd4j/include/ops/declarable/generic/datatypes/to_int32.cpp index a5eef8595..960aab483 100644 --- a/libnd4j/include/ops/declarable/generic/datatypes/to_int32.cpp +++ b/libnd4j/include/ops/declarable/generic/datatypes/to_int32.cpp @@ -25,7 +25,7 @@ namespace nd4j { namespace ops { - OP_IMPL(to_int32, 1, 1, true) { + CUSTOM_OP_IMPL(to_int32, 1, 1, true, 0, 0) { auto input = INPUT_VARIABLE(0); auto output = OUTPUT_VARIABLE(0); @@ -42,6 +42,11 @@ namespace nd4j { ->setAllowedInputTypes(nd4j::DataType::ANY) ->setAllowedOutputTypes(nd4j::DataType::INT32); } + DECLARE_SHAPE_FN(to_int32) { + auto outShape = ShapeBuilders::copyShapeInfoAndType(inputShape->at(0), DataType::INT32, true, block.workspace()); + return SHAPELIST(outShape); + } + } } diff --git a/libnd4j/include/ops/declarable/generic/datatypes/to_int64.cpp b/libnd4j/include/ops/declarable/generic/datatypes/to_int64.cpp index 450c57c1d..d5721b266 100644 --- a/libnd4j/include/ops/declarable/generic/datatypes/to_int64.cpp +++ b/libnd4j/include/ops/declarable/generic/datatypes/to_int64.cpp @@ -25,7 +25,7 @@ namespace nd4j { namespace ops { - OP_IMPL(to_int64, 1, 1, true) { + CUSTOM_OP_IMPL(to_int64, 1, 1, true, 0, 0) { auto input = INPUT_VARIABLE(0); auto output = OUTPUT_VARIABLE(0); @@ -42,6 +42,11 @@ namespace nd4j { ->setAllowedInputTypes(nd4j::DataType::ANY) ->setAllowedOutputTypes(nd4j::DataType::INT64); } + DECLARE_SHAPE_FN(to_int64) { + auto outShape = ShapeBuilders::copyShapeInfoAndType(inputShape->at(0), DataType::INT64, true, block.workspace()); + return SHAPELIST(outShape); + } + } } diff --git a/libnd4j/include/ops/declarable/generic/datatypes/to_uint32.cpp b/libnd4j/include/ops/declarable/generic/datatypes/to_uint32.cpp index 5b6822797..b0833dd25 100644 --- a/libnd4j/include/ops/declarable/generic/datatypes/to_uint32.cpp +++ b/libnd4j/include/ops/declarable/generic/datatypes/to_uint32.cpp @@ -25,7 +25,7 @@ namespace nd4j { namespace ops { - OP_IMPL(to_uint32, 1, 1, true) { + CUSTOM_OP_IMPL(to_uint32, 1, 1, true, 0, 0) { auto input = INPUT_VARIABLE(0); auto output = OUTPUT_VARIABLE(0); @@ -40,8 +40,13 @@ namespace nd4j { DECLARE_TYPES(to_uint32) { getOpDescriptor() ->setAllowedInputTypes(nd4j::DataType::ANY) - ->setAllowedOutputTypes(nd4j::DataType::INT16); + ->setAllowedOutputTypes(nd4j::DataType::INT32); } + DECLARE_SHAPE_FN(to_uint32) { + auto outShape = ShapeBuilders::copyShapeInfoAndType(inputShape->at(0), DataType::UINT32, true, block.workspace()); + return SHAPELIST(outShape); + } + } } diff --git a/libnd4j/include/ops/declarable/generic/datatypes/to_uint64.cpp b/libnd4j/include/ops/declarable/generic/datatypes/to_uint64.cpp index a0402cdb7..64633d34b 100644 --- a/libnd4j/include/ops/declarable/generic/datatypes/to_uint64.cpp +++ b/libnd4j/include/ops/declarable/generic/datatypes/to_uint64.cpp @@ -25,7 +25,7 @@ namespace nd4j { namespace ops { - OP_IMPL(to_uint64, 1, 1, true) { + CUSTOM_OP_IMPL(to_uint64, 1, 1, true, 0, 0) { auto input = INPUT_VARIABLE(0); auto output = OUTPUT_VARIABLE(0); @@ -42,6 +42,10 @@ namespace nd4j { ->setAllowedInputTypes(nd4j::DataType::ANY) ->setAllowedOutputTypes(nd4j::DataType::INT8); } + DECLARE_SHAPE_FN(to_uint64) { + auto outShape = ShapeBuilders::copyShapeInfoAndType(inputShape->at(0), DataType::UINT64, true, block.workspace()); + return SHAPELIST(outShape); + } } } diff --git a/libnd4j/include/ops/declarable/generic/list/unstack_list.cpp b/libnd4j/include/ops/declarable/generic/list/unstack_list.cpp index 886959943..b5e5f207e 100644 --- a/libnd4j/include/ops/declarable/generic/list/unstack_list.cpp +++ b/libnd4j/include/ops/declarable/generic/list/unstack_list.cpp @@ -26,13 +26,19 @@ namespace nd4j { namespace ops { LIST_OP_IMPL(unstack_list, 1, 1, 0, 0) { - auto input = INPUT_VARIABLE(0); + auto outputList = INPUT_LIST(0); + auto input = INPUT_VARIABLE(int(outputList != nullptr) ); - auto list = new NDArrayList(0, true); - list->unstack(input, 0); + if (outputList == nullptr) { + outputList = new NDArrayList(0, true); + //block.trackList(outputList); + setupResultList(outputList, block); + } + outputList->unstack(input, INT_ARG(0)); //OVERWRITE_RESULT(list); - setupResultList(list, block); + + // return Status::OK(); } } diff --git a/libnd4j/include/ops/declarable/generic/random/get_seed.cpp b/libnd4j/include/ops/declarable/generic/random/get_seed.cpp index e8acfa067..2161a2378 100644 --- a/libnd4j/include/ops/declarable/generic/random/get_seed.cpp +++ b/libnd4j/include/ops/declarable/generic/random/get_seed.cpp @@ -26,11 +26,11 @@ namespace nd4j { namespace ops { CUSTOM_OP_IMPL(get_seed, -2, 1, false, 0, 0) { - REQUIRE_TRUE(block.getRNG() != nullptr, 0, "RNG should be defined in Graph"); - auto rng = block.getRNG(); +// REQUIRE_TRUE(block.getRNG() != nullptr, 0, "RNG should be defined in Graph"); + auto rng = block.getRng(); auto z = OUTPUT_VARIABLE(0); - z->p(Nd4jLong(0), rng->getSeed()); + z->p(Nd4jLong(0), rng.rootState()); return Status::OK(); } diff --git a/libnd4j/include/ops/declarable/generic/random/set_seed.cpp b/libnd4j/include/ops/declarable/generic/random/set_seed.cpp index b42c7c763..fa9dcf992 100644 --- a/libnd4j/include/ops/declarable/generic/random/set_seed.cpp +++ b/libnd4j/include/ops/declarable/generic/random/set_seed.cpp @@ -27,8 +27,9 @@ namespace nd4j { namespace ops { CUSTOM_OP_IMPL(set_seed, -2, 1, false, 0, -2) { - REQUIRE_TRUE(block.getRNG() != nullptr, 0, "RNG should be defined in Graph"); - auto rng = block.getRNG(); +// REQUIRE_TRUE(block.getRNG() != nullptr, 0, "RNG should be defined in Graph"); + auto rng = block.getRng(); //.getRNG(); + Nd4jLong seed = 0; if (block.getIArguments()->size() > 0) { seed = INT_ARG(0); @@ -41,8 +42,8 @@ namespace nd4j { } // FIXME: this approach isn't really good for cuda, since it'll assume that CUDA might get nullptr instead of stream - refreshBuffer(nullptr, seed, (Nd4jPointer) rng); - + //refreshBuffer(nullptr, seed, (Nd4jPointer) rng); + rng.setSeed((int)seed); return Status::OK(); } diff --git a/libnd4j/include/ops/declarable/generic/transforms/log1p.cpp b/libnd4j/include/ops/declarable/generic/transforms/log1p.cpp index 1398eae47..3d45bcf42 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/log1p.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/log1p.cpp @@ -25,7 +25,7 @@ namespace nd4j { namespace ops { - OP_IMPL(Log1p, 2, 1, true) { + OP_IMPL(Log1p, 1, 1, true) { auto x = INPUT_VARIABLE(0); auto z = OUTPUT_VARIABLE(0); diff --git a/libnd4j/include/ops/declarable/generic/transforms/merge_max_idx.cpp b/libnd4j/include/ops/declarable/generic/transforms/merge_max_idx.cpp index f087eaf1b..f6aeeebcf 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/merge_max_idx.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/merge_max_idx.cpp @@ -27,7 +27,7 @@ namespace nd4j { namespace ops { -OP_IMPL(mergemaxindex, -1, 1, false) { +CUSTOM_OP_IMPL(mergemaxindex, -1, 1, false, 0, 0) { REQUIRE_OK(this->validateInputDimensionsMatch(block)); auto output = OUTPUT_VARIABLE(0); @@ -49,6 +49,15 @@ DECLARE_SYN(MergeMaxIndex, mergemaxindex); ->setAllowedInputTypes({ALL_INTS, ALL_FLOATS}); } } +DECLARE_SHAPE_FN(mergemaxindex) { + auto in = inputShape->at(0); + auto dtype = DataType::INT32; + if (block.getIArguments()->size()> 0) + dtype = (DataType)INT_ARG(0); + + auto resShape = ShapeBuilders::copyShapeInfoAndType(in, dtype, block.workspace()); + return SHAPELIST(resShape); +} } #endif \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/headers/datatypes.h b/libnd4j/include/ops/declarable/headers/datatypes.h index 43983ecb6..d8ff39d48 100644 --- a/libnd4j/include/ops/declarable/headers/datatypes.h +++ b/libnd4j/include/ops/declarable/headers/datatypes.h @@ -30,7 +30,7 @@ namespace nd4j { * PLEASE NOTE: This op is disabled atm, and reserved for future releases. */ #if NOT_EXCLUDED(OP_to_double) - DECLARE_OP(to_double, 1, 1, true); + DECLARE_CUSTOM_OP(to_double, 1, 1, true, 0, 0); #endif /** @@ -39,7 +39,7 @@ namespace nd4j { * PLEASE NOTE: This op is disabled atm, and reserved for future releases. */ #if NOT_EXCLUDED(OP_to_float16) - DECLARE_OP(to_float16, 1, 1, true); + DECLARE_CUSTOM_OP(to_float16, 1, 1, true, 0, 0); #endif /** @@ -48,7 +48,7 @@ namespace nd4j { * PLEASE NOTE: This op is disabled atm, and reserved for future releases. */ #if NOT_EXCLUDED(OP_to_float32) - DECLARE_OP(to_float32, 1, 1, true); + DECLARE_CUSTOM_OP(to_float32, 1, 1, true, 0, 0); #endif /** @@ -57,7 +57,7 @@ namespace nd4j { * PLEASE NOTE: This op is disabled atm, and reserved for future releases. */ #if NOT_EXCLUDED(OP_to_int32) - DECLARE_OP(to_int32, 1, 1, true); + DECLARE_CUSTOM_OP(to_int32, 1, 1, true, 0, 0); #endif /** @@ -66,7 +66,7 @@ namespace nd4j { * PLEASE NOTE: This op is disabled atm, and reserved for future releases. */ #if NOT_EXCLUDED(OP_to_int64) - DECLARE_OP(to_int64, 1, 1, true); + DECLARE_CUSTOM_OP(to_int64, 1, 1, true, 0, 0); #endif /** @@ -75,7 +75,7 @@ namespace nd4j { * PLEASE NOTE: This op is disabled atm, and reserved for future releases. */ #if NOT_EXCLUDED(OP_to_uint32) - DECLARE_OP(to_uint32, 1, 1, true); + DECLARE_CUSTOM_OP(to_uint32, 1, 1, true, 0, 0); #endif /** @@ -84,7 +84,7 @@ namespace nd4j { * PLEASE NOTE: This op is disabled atm, and reserved for future releases. */ #if NOT_EXCLUDED(OP_to_uint64) - DECLARE_OP(to_uint64, 1, 1, true); + DECLARE_CUSTOM_OP(to_uint64, 1, 1, true, 0, 0); #endif /** diff --git a/libnd4j/include/ops/declarable/headers/transforms.h b/libnd4j/include/ops/declarable/headers/transforms.h index b24fad482..75715f78e 100644 --- a/libnd4j/include/ops/declarable/headers/transforms.h +++ b/libnd4j/include/ops/declarable/headers/transforms.h @@ -65,9 +65,15 @@ namespace nd4j { #if NOT_EXCLUDED(OP_mergemax) DECLARE_OP(mergemax, -1, 1, false); #endif - + /* + * Complete tensor with max indices merged from all input tensors list + * + * INPUT: tensors with the same shape + * OUTPUT: integer tensor with the same shape + * INT_ARG: result type (one of int), INT32 by default + */ #if NOT_EXCLUDED(OP_mergemaxindex) - DECLARE_OP(mergemaxindex, -1, 1, false); + DECLARE_CUSTOM_OP(mergemaxindex, -1, 1, false, 0, 0); #endif #if NOT_EXCLUDED(OP_mergeadd) diff --git a/libnd4j/include/ops/declarable/helpers/cuda/histogram.cu b/libnd4j/include/ops/declarable/helpers/cuda/histogram.cu index eda19ccd8..52b059dad 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/histogram.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/histogram.cu @@ -25,7 +25,7 @@ namespace nd4j { namespace ops { namespace helpers { template - void _CUDA_G histogramKernel(void *xBuffer, Nd4jLong *xShapeInfo, void *zBuffer, Nd4jLong *zShapeInfo, void *allocationPointer, void *reductionPointer, Nd4jLong numBins, double min_val, double max_val) { + void _CUDA_G histogramKernel(void *xBuffer, Nd4jLong *xShapeInfo, void *zBuffer, Nd4jLong *zShapeInfo, void *allocationPointer, void *reductionPointer, Nd4jLong numBins, X* min_val, X* max_val) { int tid = blockIdx.x * blockDim.x + threadIdx.x; auto dx = reinterpret_cast(xBuffer); auto result = reinterpret_cast(zBuffer); @@ -42,19 +42,19 @@ namespace nd4j { } __syncthreads(); - Z binSize = (max_val - min_val) / (numBins); + X binSize = X((*max_val - *min_val) / numBins); for (int e = threadIdx.x; e < numBins; e += blockDim.x) { - bins[e] = (Z) 0.0f; + bins[e] = (Z) 0; } __syncthreads(); - for (int e = tid; e < length; e+= blockDim.x * gridDim.x) { - int idx = (int) ((dx[e] - min_val) / binSize); - if (idx < 0) idx = 0; - else if (idx >= numBins) idx = numBins - 1; - - nd4j::math::atomics::nd4j_atomicAdd(&bins[idx], (Z) 1.0f); + for (int e = tid; e < length; e += blockDim.x * gridDim.x) { + int idx = int((dx[e] - *min_val) / binSize); + idx = math::nd4j_max(idx, 0); //atomicMax(&idx, 0);//atomicMax(&idx, 0); + idx = math::nd4j_min(idx, int(numBins - 1)); //atomicMin(&idx, int(numBins - 1)); + nd4j::math::atomics::nd4j_atomicAdd(&bins[idx], (Z)1); +// bins[idx]++; } __syncthreads(); @@ -82,7 +82,7 @@ namespace nd4j { // nullify shared memory for future accumulation for (int e = threadIdx.x; e < numBins; e += blockDim.x) { - bins[e] = (Z) 0.0f; + bins[e] = (Z) 0; } // accumulate reduced bins @@ -90,7 +90,7 @@ namespace nd4j { Z *ptrBuf = ((Z *)allocationPointer) + (r * numBins); for (int e = threadIdx.x; e < numBins; e += blockDim.x) { - bins[e] += ptrBuf[e]; + math::atomics::nd4j_atomicAdd(&bins[e], ptrBuf[e]); } } __syncthreads(); @@ -109,24 +109,26 @@ namespace nd4j { } template - static void histogram_(nd4j::LaunchContext *context, void *xBuffer, Nd4jLong *xShapeInfo, void *zBuffer, Nd4jLong *zShapeInfo, Nd4jLong numBins, double min_val, double max_val) { + static void histogram_(nd4j::LaunchContext *context, void *xBuffer, Nd4jLong *xShapeInfo, Nd4jLong *dxShapeInfo, void *zBuffer, Nd4jLong *zShapeInfo, Nd4jLong numBins, void* min_val, void* max_val) { int numThreads = 256; int numBlocks = nd4j::math::nd4j_max(256, nd4j::math::nd4j_min(1, shape::length(xShapeInfo) / numThreads)); int workspaceSize = numBlocks * numBins; - auto tmp = NDArrayFactory::create('c',{workspaceSize}); + auto tmp = NDArrayFactory::create('c', {workspaceSize}); - histogramKernel<<getCudaStream()>>>(xBuffer, xShapeInfo, zBuffer, zShapeInfo, tmp.getSpecialBuffer(), context->getReductionPointer(), numBins, min_val, max_val); + histogramKernel<<getCudaStream()>>>(xBuffer, dxShapeInfo, zBuffer, zShapeInfo, tmp.getSpecialBuffer(), context->getReductionPointer(), numBins, reinterpret_cast(min_val), reinterpret_cast(max_val)); cudaStreamSynchronize(*context->getCudaStream()); } void histogramHelper(nd4j::LaunchContext *context, NDArray &input, NDArray &output) { Nd4jLong numBins = output.lengthOf(); - double min_val = input.reduceNumber(reduce::SameOps::Min).e(0); - double max_val = input.reduceNumber(reduce::SameOps::Max).e(0); - - BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), histogram_, (context, input.specialBuffer(), input.specialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), numBins, min_val, max_val), LIBND4J_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({&output}, {&input}); + auto min_val = input.reduceNumber(reduce::SameOps::Min); + auto max_val = input.reduceNumber(reduce::SameOps::Max); +// min_val.printIndexedBuffer("MIN"); +// max_val.printIndexedBuffer("MAX"); + BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), histogram_, (context, input.specialBuffer(), input.shapeInfo(), input.specialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), numBins, min_val.specialBuffer(), max_val.specialBuffer()), LIBND4J_TYPES, INTEGER_TYPES); NDArray::registerSpecialUse({&output}, {&input}); } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu index 2cec0a065..0da1fbc28 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu @@ -68,21 +68,21 @@ namespace helpers { static __global__ void shouldSelectKernel(T* boxesBuf, Nd4jLong* boxesShape, I* indexBuf, I* selectedIndicesData, double threshold, int numSelected, int i, bool* shouldSelect) { auto tid = blockIdx.x * blockDim.x + threadIdx.x; auto step = gridDim.x * blockDim.x; - __shared__ bool shouldSelectShared; + __shared__ unsigned int shouldSelectShared; if (threadIdx.x == 0) { - shouldSelectShared = shouldSelect[0]; + shouldSelectShared = (unsigned int)shouldSelect[0]; } __syncthreads(); for (int j = numSelected - 1 - tid; j >= 0; j -= step) { if (shouldSelectShared) { if (needToSuppressWithThreshold(boxesBuf, boxesShape, indexBuf[i], indexBuf[selectedIndicesData[j]], T(threshold))) - shouldSelectShared = false; + atomicCAS(&shouldSelectShared, 1, 0); } } __syncthreads(); if (threadIdx.x == 0) { - *shouldSelect = shouldSelectShared; + *shouldSelect = shouldSelectShared > 0; } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/merge.cu b/libnd4j/include/ops/declarable/helpers/cuda/merge.cu index ceb748453..082472fce 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/merge.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/merge.cu @@ -48,8 +48,10 @@ namespace nd4j { auto x = reinterpret_cast(inArrs[i]); auto xShape = reinterpret_cast(inShapes[i]); auto val = x[shape::getIndexOffset(e, xShape, length)];; - if (mVal < val) - mIdx = static_cast(e); + if (mVal < val) { + mIdx = static_cast(i); + mVal = val; + } } __syncthreads(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/sg_cb.cu b/libnd4j/include/ops/declarable/helpers/cuda/sg_cb.cu index da212d287..6a9fd28e6 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/sg_cb.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/sg_cb.cu @@ -123,14 +123,236 @@ namespace nd4j { nSamplingKernel<<<1,1,128, *stream>>>(vsyn0, vsyn1Neg, vexpTable, vneu1e, alpha, vectorLength, code, expLength, isInference); } + /* + * binarySearch - find element in haystack buffer (haystack - sorted device memory) + * */ int binarySearch(const int *haystack, const int needle, const int totalElements) { - return 0; + int firstIndex = 0; + int lastIndex = totalElements - 1; + int halfIndex = nd4j::math::nd4j_floor((lastIndex + firstIndex) / (float) 2); + + while(haystack[halfIndex] != needle && firstIndex < lastIndex) { + if (needle < haystack[halfIndex]) { + lastIndex = halfIndex - 1; + } else if (needle > haystack[halfIndex]) { + firstIndex = halfIndex + 1; + } + halfIndex = nd4j::math::nd4j_floor((lastIndex + firstIndex) / (float) 2); + } + + return (haystack[halfIndex] == needle) ? halfIndex : -1; + } + template + __global__ void addInfVectorKernel(T* neu1, T* infVector, int vectorLength) { + auto start = blockIdx.x * blockDim.x + threadIdx.x; + auto step = blockDim.x * gridDim.x; + + for (auto i = start; i < vectorLength; i += step) { + neu1[i] += infVector[i]; + } } - void skipgram(NDArray &syn0, NDArray &syn1, NDArray &syn1Neg, NDArray &expTable, NDArray &negTable, NDArray &target, NDArray &ngStarter, int nsRounds, NDArray &indices, NDArray &codes, NDArray &alpha, NDArray &randomValue, NDArray &inferenceVector, const bool preciseMode, const int numWorkers) { + template + void skipgram_(NDArray& s0, NDArray& s1, NDArray& s1n, NDArray& expTableV, NDArray& negTableV, NDArray& infV, int target, int ngStarter, NDArray& indices, NDArray& codes, double alpha, Nd4jLong randomValue, const int hsRounds, const int nsRounds) { +// void *vsyn0, void *vsyn1, void *vsyn1Neg, void *vexpTable, void *vnegTable, void *vinfVector, int target, int ngStarter, int *indices, int8_t *codes, double alpha, Nd4jLong randomValue, const int hsRounds, const int nsRounds, const int vocabSize, const int vectorLength, const int expLength, const int negLength) { + auto syn0 = reinterpret_cast(s0.specialBuffer()); + auto syn1 = reinterpret_cast(s1.specialBuffer()); + auto syn1Neg = reinterpret_cast(s1n.specialBuffer()); + auto expTable = reinterpret_cast(expTableV.specialBuffer()); + auto negTable = reinterpret_cast(negTableV.specialBuffer()); + auto infVector = reinterpret_cast(infV.specialBuffer()); + const int vocabSize = s0.sizeAt(0); + const int vectorLength = s0.sizeAt(1); + const int expLength = expTableV.lengthOf(); + const int negLength = negTableV.lengthOf(); + indices.tickReadDevice(); + indices.syncToHost(); + codes.tickReadDevice(); + codes.syncToHost(); + auto stream = s0.getContext()->getCudaStream(); + + T* neu1e; // = new T[vectorLength]; + //memset(neu1e, 0, vectorLength * sizeof(T)); + auto err = cudaMalloc(&neu1e, sizeof(T) * vectorLength); + err = cudaMemset(neu1e, 0, sizeof(T) * vectorLength); + // hierarchic softmax goes first (if enabled) + + auto syn0row = infVector != nullptr ? infVector : syn0 + (target * vectorLength); + auto irow = 0; + if (hsRounds > 0) { + for (int r = 0; r < hsRounds; r++) { + irow = indices.t(r); + if (irow < 0 || irow >= vocabSize) + break; + + hSoftmax_(syn0row, syn1 + (irow * vectorLength), expTable, neu1e, alpha, vectorLength, codes.t(r), expLength, infVector != nullptr, stream); + } + } + + // negative sampling goes second (if enabled) + auto nsStarter = ngStarter; + irow = nsStarter; + if (nsRounds > 0) { + for (int r = 0; r < nsRounds + 1; r++) { + if (r == 0) { + // target is known in advance + } else { + randomValue = randomValue * (unsigned long long) 25214903917 + 11; + auto idx = nd4j::math::nd4j_abs((randomValue >> 16) % negLength); + irow = idx >= negLength ? -1 : negTableV.e(idx); + + if (irow < 0 || irow >= vocabSize) irow = randomValue % (vocabSize - 1) + 1; + if (irow == nsStarter) + continue; + } + + nSampling_(syn0row, syn1Neg + (irow * vectorLength), expTable, neu1e, alpha, vectorLength, r == 0 ? 1 : 0, expLength, infVector != nullptr, stream); + } + } + + if (infVector == nullptr) { + addInfVectorKernel<<<128, 256, 256, *stream>>>(syn0row, neu1e, vectorLength); + } else { + addInfVectorKernel<<<128, 256, 256, *stream>>>(infVector, neu1e, vectorLength); + } + + err = cudaFree(neu1e); + if (0 != err) { + throw cuda_exception::build("helpers::skipgram_: Cannot deallocate temp memory for lingual net", err); + } + } + BUILD_SINGLE_TEMPLATE(template void skipgram_, (NDArray& syn0, NDArray& syn1, NDArray& syn1Neg, NDArray& expTable, NDArray& negTable, NDArray& infVector, int target, int ngStarter, NDArray& indices, NDArray& codes, double alpha, Nd4jLong randomValue, const int hsRounds, const int nsRounds), FLOAT_TYPES); + + /* + * batched version of skipgram routine + * */ + template + void skipgramBatchExec_(NDArray &s0, NDArray &s1, NDArray &s1n, NDArray& expTableV, NDArray& negTableV, NDArray &targets, NDArray &negStarters, NDArray &indices, NDArray &codes, NDArray &lr, NDArray &nextRandom, const int nsRounds, const bool preciseMode, const int numThreads) { +// (NDArray &s0, NDArray &s1, NDArray &s1n, NDArray& expTable, NDArray& negTable, NDArray& infVector, NDArray& targets, NDArray& negStarters, NDArray& indices, NDArray& codes, NDArray& lr, NDArray& nextRandom, const int nsRounds, const bool preciseMode, const int numThreads) { + //auto syn0 = reinterpret_cast(vsyn0); + //auto syn1 = reinterpret_cast(vsyn1); + //auto syn1Neg = reinterpret_cast(vsyn1Neg); + auto stream = s0.getContext()->getCudaStream(); + negTableV.tickReadDevice(); + negTableV.syncToHost(); + const auto expTable = reinterpret_cast(expTableV.specialBuffer()); + const auto negTable = reinterpret_cast(negTableV.buffer()); + const auto infVector = (T*)nullptr; //reinterpret_cast(infVector.specialBuffer()); + + const int vocabSize = s0.sizeAt(0); + const int vectorLength = s0.sizeAt(1); + const int expLength = expTableV.lengthOf(); + const int negLength = negTableV.lengthOf(); + + //T sneu1e[600]; + + //const auto numThreads = omp_get_max_threads(); + const auto idxShift = indices.isEmpty() ? 0 : indices.sizeAt(1); + const auto hsRounds = codes.isEmpty() ? 0 : codes.sizeAt(1); + + // regular mode provides 0 guarantees for reproducibility + auto numTargets = targets.lengthOf(); + targets.syncToHost(); + indices.syncToHost(); + codes.syncToHost(); + lr.syncToHost(); + nextRandom.syncToHost(); + negStarters.tickReadDevice(); + negStarters.syncToHost(); + auto bTarget = reinterpret_cast(targets.buffer()); //targets.bufferAsT(); + auto bIndices = reinterpret_cast(indices.buffer()); //indices.bufferAsT(); + auto bCodes = reinterpret_cast(codes.buffer()); //codes.bufferAsT(); + +// PRAGMA_OMP_PARALLEL_FOR_ARGS(num_threads(numThreads)) + for (int t = 0; t < numTargets; t++) { + T* neu1e;//lvectorLength <= 600 ? sneu1e : new T[vectorLength]; + auto err = cudaMalloc(&neu1e, vectorLength * sizeof(T)); + err = cudaMemset(neu1e, 0, vectorLength * sizeof(T)); + //memset(neu1e, 0, vectorLength * sizeof(T)); + + auto target = bTarget[t]; + auto alpha = lr.e(t); + unsigned long long randomValue = nextRandom.e(t); + + auto syn0row = reinterpret_cast(s0.specialBuffer()) + (target * vectorLength); + + if (hsRounds > 0) { + int irow = 0; + auto cShift = t * idxShift; + + for (int e = 0; e < hsRounds; e++) { + irow = bIndices[e + cShift]; + if (irow < 0 || irow >= vocabSize) + continue; + + auto syn1row = reinterpret_cast(s1.getSpecialBuffer()) + (irow * vectorLength); + auto code = bCodes[e + cShift]; + + //nd4j_printf("syn0: [%i]; syn1: [%i]; code: [%i]\n", target, irow, code); + hSoftmax_(syn0row, syn1row, expTable, neu1e, alpha, vectorLength, code, expLength, false, stream); + } + } + + + if (nsRounds > 0) { + int irow = negStarters.e(t); + int nsStarter = irow; + for (int r = 0; r < nsRounds + 1; r++) { + if (r == 0) { + // target is known in advance + } else { + randomValue = randomValue * (unsigned long long) 25214903917 + 11; + auto idx = nd4j::math::nd4j_abs((randomValue >> 16) % negLength); + irow = idx >= negLength ? -1 : static_cast(negTable[idx]); + + if (irow < 0 || irow >= vocabSize) + irow = randomValue % (vocabSize - 1) + 1; + + if (irow == nsStarter) + continue; + } + auto syn1row = reinterpret_cast(s1n.getSpecialBuffer()) + (irow * vectorLength); + + nSampling_(syn0row, syn1row, expTable, neu1e, alpha, vectorLength, r == 0 ? 1 : 0, expLength, false, stream); + } + } + addInfVectorKernel<<<128, 256, 256, *stream>>>(syn0row, neu1e, vectorLength); + + // optionally release temp arrays + err = cudaFree(neu1e); + if (err != 0) { + break; + } +// if (vectorLength > 600) +// delete[] neu1e; + } + } + BUILD_SINGLE_TEMPLATE(template void skipgramBatchExec_, (NDArray &s0, NDArray &s1, NDArray &s1n, NDArray& expTable, NDArray& negTable, NDArray &targets, NDArray &negStarters, NDArray &indices, NDArray &codes, NDArray &lr, NDArray &nextRandom, const int nsRounds, const bool preciseMode, const int numThreads), FLOAT_TYPES); + + void skipgram(NDArray &syn0, NDArray &syn1, NDArray &syn1Neg, NDArray &expTable, NDArray &negTable, + NDArray &target, NDArray &ngStarter, int nsRounds, NDArray &indices, NDArray &codes, NDArray &alpha, NDArray &randomValue, NDArray &inferenceVector, const bool preciseMode, const int numWorkers) { auto xType = syn0.dataType(); - + // single round case + if ((ngStarter.isScalar() && !ngStarter.isEmpty())|| (target.isScalar() && !target.isEmpty())) { + auto hsRounds = codes.lengthOf(); + target.syncToHost(); + ngStarter.syncToHost(); + alpha.syncToHost(); + randomValue.syncToHost(); + + auto targetV = target.isEmpty() ? -1 : target.e(0); + auto starterV = ngStarter.isEmpty() ? -1 : ngStarter.e(0); + auto alphaV = alpha.e(0); + auto randomV = randomValue.e(0); + BUILD_SINGLE_SELECTOR(xType, skipgram_, (syn0, syn1, syn1Neg, expTable, negTable, inferenceVector, targetV, starterV, indices, codes, alphaV, randomV, hsRounds, nsRounds), FLOAT_TYPES); + } else if (ngStarter.isVector() || target.isVector()){ + // batch mode +// NDArray& infVector, NDArray &targets, NDArray &negStarters, NDArray &indices, NDArray &codes, NDArray &lr, NDArray &nextRandom, const int nsRounds, const bool preciseMode, const int numThreads) + BUILD_SINGLE_SELECTOR(xType, skipgramBatchExec_, (syn0, syn1, syn1Neg, expTable, negTable, target, ngStarter, indices, codes, alpha, randomValue, nsRounds, preciseMode, numWorkers), FLOAT_TYPES); + } else + throw std::runtime_error("SkipGram: target must have rank 0 or 1"); } + template static __global__ void checkContextKernel(int* context, T* syn0, T* neu1, int contextWidth, int vectorLength, int vocabSize) { __shared__ bool hasError; @@ -157,16 +379,6 @@ namespace nd4j { } } - template - __global__ void addInfVectorKernel(T* neu1, T* infVector, int vectorLength) { - auto start = blockIdx.x * blockDim.x + threadIdx.x; - auto step = blockDim.x * gridDim.x; - - for (auto i = start; i < vectorLength; i += step) { - neu1[i] += infVector[i]; - } - } - template __global__ void shiftKernel(T* neu1, T* infVector, int contextWidth, int vectorLength) { auto start = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/libnd4j/include/ops/declarable/helpers/cuda/toggle_bits.cu b/libnd4j/include/ops/declarable/helpers/cuda/toggle_bits.cu index c8db7b0e1..f90c9f77f 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/toggle_bits.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/toggle_bits.cu @@ -26,7 +26,13 @@ namespace nd4j { namespace helpers { template void toggle_bits__(NDArray &in, NDArray &out) { + NDArray::prepareSpecialUse({&out}, {&in}); + auto lambda = LAMBDA_T(_x) { + return ~_x;//eUtils::flip_bits(_x); + }; + in.applyLambda(lambda, &out); + NDArray::registerSpecialUse({&out}, {&in}); } BUILD_SINGLE_TEMPLATE(template void toggle_bits__, (NDArray &in, NDArray &out), INTEGER_TYPES); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu b/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu index bb311ed01..a457d4c94 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu @@ -685,13 +685,12 @@ void clipByNormBP(nd4j::LaunchContext* context, const NDArray& input, const NDAr BUILD_SINGLE_TEMPLATE(template void randomShuffle_, (nd4j::LaunchContext * context, NDArray& input, NDArray& output, nd4j::graph::RandomGenerator& rng, const bool isInplace), LIBND4J_TYPES); - ////////////////////////////////////////////////////////////////////////// void eye(nd4j::LaunchContext * context, NDArray& output) { + output.setIdentity(); } - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template static __global__ void clipByNormInplaceKernel(Nd4jLong numOfSubArrs, T* inputBuffer, Nd4jLong* shape, Nd4jLong* inputOffsets, T* norm2Buf, Nd4jLong* norm2shape, T clipNorm) { diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests2.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests2.cpp index c2af3cef4..d02ddcb69 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests2.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests2.cpp @@ -502,6 +502,7 @@ TEST_F(DeclarableOpsTests2, Test_FloorDiv_2) { auto x = NDArrayFactory::create('c', {1, 3}, {3.0, 6.0, -3.0}); auto y = NDArrayFactory::create('c', {1, 3}, {-2.0, 2.0, -2.0}); auto eps = NDArrayFactory::create('c', {1, 3}, {1, 2, 3}); + auto exp1 = NDArrayFactory::create('c', {1, 3}, {0.f, 0.f, 0.f}); auto exp2 = NDArrayFactory::create('c', {1, 3}, {0.f, 0.f, 0.f}); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests5.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests5.cpp index 2e1833548..b596ebcd5 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests5.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests5.cpp @@ -223,21 +223,221 @@ TEST_F(DeclarableOpsTests5, Test_Boolean_diff_1) { delete result; } +TEST_F(DeclarableOpsTests5, Test_SetSeed_1) { + auto x = NDArrayFactory::create('c', {1, 1}, {120}); + auto y = NDArrayFactory::create(5); + nd4j::ops::set_seed op; + auto result = op.execute({&x, &y}, {}, {120, 5}, {}, false, nd4j::DataType::INT32); + ASSERT_EQ(Status::OK(), result->status()); +// result->at(0)->printIndexedBuffer("RES SEED"); + nd4j::ops::get_seed getOp; + auto getRes = getOp.execute({}, {}, {}); + ASSERT_EQ(Status::OK(), getRes->status()); +// getRes->at(0)->printIndexedBuffer("Output RES GET SEED"); +// ASSERT_EQ(result->at(0)->t(0), true); + delete result; + delete getRes; +} +//////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests5, scatterMul_test1) { + auto matrix = NDArrayFactory::create('c', {2, 2}, {1, 2, 3, 4}); + NDArray idc('c', {1}, {0LL}, nd4j::DataType::INT64); + auto updates = NDArrayFactory::create('c', {1, 2}, {10, 1}); + auto exp = NDArrayFactory::create('c', {2, 2}, {10, 2, 3, 4}); + nd4j::ops::scatter_mul op; + auto result = op.execute({&matrix, &idc, &updates}, {}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, result->status()); + auto z = result->at(0); + ASSERT_TRUE(exp.equalsTo(z)); + delete result; +} +//////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests5, scatterDiv_test1) { + auto matrix = NDArrayFactory::create('c', {2, 2}, {1, 2, 3, 4}); + NDArray idc('c', {1}, {0LL}, nd4j::DataType::INT64); + auto updates = NDArrayFactory::create('c', {1, 2}, {10, 1}); + auto exp = NDArrayFactory::create('c', {2, 2}, {0.10, 2, 3, 4}); + nd4j::ops::scatter_div op; + auto result = op.execute({&matrix, &idc, &updates}, {}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, result->status()); + auto z = result->at(0); +// z->printIndexedBuffer("Scatter Div"); + ASSERT_TRUE(exp.equalsTo(z)); + delete result; +} +//////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests5, scatterSub_test1) { + auto matrix = NDArrayFactory::create('c', {2, 2}, {1, 2, 3, 4}); + NDArray idc('c', {1}, {0LL}, nd4j::DataType::INT64); + auto updates = NDArrayFactory::create('c', {1, 2}, {10, 1}); + auto exp = NDArrayFactory::create('c', {2, 2}, {-9, 1, 3, 4}); + nd4j::ops::scatter_sub op; + auto result = op.execute({&matrix, &idc, &updates}, {}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, result->status()); + auto z = result->at(0); +// z->printIndexedBuffer("Scatter Sub"); + ASSERT_TRUE(exp.equalsTo(z)); + delete result; +} +//////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests5, hardsigmoid_test1) { + auto matrix = NDArrayFactory::create('c', {2, 2}, {1, 2, 3, 4}); + auto exp = NDArrayFactory::create('c', {2, 2}, {0.7, 0.9, 1, 1}); + + nd4j::ops::hardsigmoid op; + auto result = op.execute({&matrix}, {}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, result->status()); + + auto z = result->at(0); + z->printIndexedBuffer("Hadrdsigmoid 2x2"); + ASSERT_TRUE(exp.equalsTo(z)); + + delete result; +} + +//////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests5, hardsigmoid_test2) { + auto matrix = NDArrayFactory::create('c', {2, 2}, {1, 2, 3, 4}); + auto eps = NDArrayFactory::create('c', {2, 2}, {1, 2, 3, 4}); + auto exp = NDArrayFactory::create('c', {2, 2}, {0.2, 0.4, 0, 0}); + + nd4j::ops::hardsigmoid_bp op; + auto result = op.execute({&matrix, &eps}, {}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, result->status()); + + auto z = result->at(0); + z->printIndexedBuffer("Hadrdsigmoid 2x2"); + ASSERT_TRUE(exp.equalsTo(z)); + + delete result; +} + +//////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests5, hardtanh_test1) { + auto matrix = NDArrayFactory::create('c', {3, 3}, {-4, -3, -2, -1, 0, 1, 2, 3, 4}); + auto exp = NDArrayFactory::create('c', {3, 3}, {-1, -1, -1, -1, 0, 1, 1, 1, 1}); + + nd4j::ops::hardtanh op; + auto result = op.execute({&matrix}, {}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, result->status()); + + auto z = result->at(0); +// z->printIndexedBuffer("Hardtanh 2x2"); + ASSERT_TRUE(exp.equalsTo(z)); + + delete result; +} +//////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests5, hardtanh_test2) { + auto matrix = NDArrayFactory::create('c', {3, 3}, {-4, -3, -2, -1, 0, 1, 2, 3, 4}); + auto eps = NDArrayFactory::create('c', {3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9}); + auto exp = NDArrayFactory::create('c', {3, 3}, {0, 0, 0, 4, 5, 6, 0, 0, 0}); + + nd4j::ops::hardtanh_bp op; + auto result = op.execute({&matrix, &eps}, {}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, result->status()); + + auto z = result->at(0); +// z->printIndexedBuffer("Hardtanh_bp 2x2"); + ASSERT_TRUE(exp.equalsTo(z)); + + delete result; +} + +//////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests5, histogram_test1) { + auto matrix = NDArrayFactory::create('c', {3, 3}, {-4, -3, -2, -1, 0, 1, 2, 3, 4}); + auto exp = NDArrayFactory::create('c', {3}, {3, 3, 3}); + + nd4j::ops::histogram op; + auto result = op.execute({&matrix}, {}, {3}, {}); + ASSERT_EQ(ND4J_STATUS_OK, result->status()); + + auto z = result->at(0); +// z->printIndexedBuffer("Histogram3"); + ASSERT_TRUE(exp.equalsTo(z)); + + delete result; +} +//////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests5, histogram_test2) { + auto matrix = NDArrayFactory::create('c', {3}, {1, 2, 1}); + auto exp = NDArrayFactory::create('c', {4}, {2, 0, 0, 1}); + + nd4j::ops::histogram op; + auto result = op.execute({&matrix}, {}, {4}, {}); + ASSERT_EQ(ND4J_STATUS_OK, result->status()); + + auto z = result->at(0); + z->printIndexedBuffer("Histogram4"); + ASSERT_TRUE(exp.equalsTo(z)); + + delete result; +} + +//////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests5, Identity_test1) { + auto matrix = NDArrayFactory::create('c', {3, 3}, {-4, -3, -2, -1, 0, 1, 2, 3, 4}); +// auto exp = NDArrayFactory::create('c', {3, 3}, {3, 3, 3}); + + nd4j::ops::identity op; + auto result = op.execute({&matrix}, {}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, result->status()); + + auto z = result->at(0); +// z->printIndexedBuffer("Histogram3"); + ASSERT_TRUE(matrix.equalsTo(z)); + + delete result; +} + +//////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests5, Identity_test2) { + auto matrix = NDArrayFactory::create('c', {3, 3}, {-4, -3, -2, -1, 0, 1, 2, 3, 4}); + auto eps = NDArrayFactory::create('c', {3, 3}, {1,2,3,4,5,6,7,8,9}); +// auto exp = NDArrayFactory::create('c', {3,3}); + nd4j::ops::identity_bp op; + auto result = op.execute({&matrix, &eps}, {}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, result->status()); + + auto z = result->at(0); + z->printIndexedBuffer("Identity_BP"); + ASSERT_TRUE(z->equalsTo(eps)); + + delete result; +} +//////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests5, Log1p_test1) { + auto matrix = NDArrayFactory::create('c', {3, 3}, {4, 3, 2, 1, 0, 1, 2, 3, 4}); + auto y = NDArrayFactory::create('c', {3,3}, {5,4,3,2,1,2,3,4,5}); + // auto eps = NDArrayFactory::create('c', {3, 3}, {1,2,3,4,5,6,7,8,9}); +// auto exp = NDArrayFactory::create('c', {3,3}); + nd4j::ops::Log1p op; + y.applyTransform(nd4j::transform::Log, nullptr, nullptr); + auto result = op.execute({&matrix}, {}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, result->status()); + + auto z = result->at(0); + z->printIndexedBuffer("Log1p"); + ASSERT_TRUE(z->equalsTo(y)); + + delete result; +} TEST_F(DeclarableOpsTests5, Test_SpaceToBatch_1) { diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp index 24701f70f..81086594d 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp @@ -737,6 +737,44 @@ TEST_F(DeclarableOpsTests6, cumSum_20) { delete result; } +//////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests6, TestMergeMaxIndex_1) { + + auto x = NDArrayFactory::create('c', {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f}); + auto y = NDArrayFactory::create('c', {2, 2, 2}, {10.f, 2.f, 30.f, 4.f, 50.f, 6.f, 70.f, 8.f}); + auto z = NDArrayFactory::create('c', {2, 2, 2}, {1.f, 20.f, 3.f, 40.f, 5.f, 60.f, 7.f, 80.f}); + auto exp = NDArrayFactory::create('c', {2, 2, 2}, {1, 2, 1, 2, 1, 2, 1, 2}); + nd4j::ops::mergemaxindex op; + + auto ress = op.execute({&x, &y, &z}, {}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, ress->status()); +// ress->at(0)->printIndexedBuffer("MergeMaxIndex Result is "); +// ress->at(0)->printShapeInfo("Shape info for MergeMaxIdex"); +// x.printIndexedBuffer("Input is"); + ASSERT_TRUE(ress->at(0)->equalsTo(exp)); + delete ress; +} + +//////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests6, TestMergeMaxIndex_2) { + + auto x = NDArrayFactory::create('c', {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f}); + auto y = NDArrayFactory::create('c', {2, 2, 2}, {10.f, 2.f, 30.f, 4.f, 50.f, 6.f, 70.f, 8.f}); + auto z = NDArrayFactory::create('c', {2, 2, 2}, {1.f, 20.f, 3.f, 40.f, 5.f, 60.f, 7.f, 80.f}); + auto exp = NDArrayFactory::create('c', {2, 2, 2}, {1, 2, 1, 2, 1, 2, 1, 2}); + nd4j::ops::mergemaxindex op; + + auto ress = op.execute({&x, &y, &z}, {}, {nd4j::DataType::INT64}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, ress->status()); +// ress->at(0)->printIndexedBuffer("MergeMaxIndex2 Result is "); +// ress->at(0)->printShapeInfo("Shape info for MergeMaxIdex2"); +// x.printIndexedBuffer("Input is"); + ASSERT_TRUE(ress->at(0)->equalsTo(exp)); + delete ress; +} + //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests6, TestDropout_1) { @@ -752,8 +790,60 @@ TEST_F(DeclarableOpsTests6, TestDropout_1) { delete ress; } +//////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests6, TestMod_1) { + auto x = NDArrayFactory::create('c', {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f}); + auto y = NDArrayFactory::create('c', {2, 2, 2}, {10.f, 2.f, 30.f, 4.f, 50.f, 6.f, 70.f, 8.f}); + auto exp = NDArrayFactory::create('c', {2, 2, 2}, {1, 0, 3, 0, 5, 0, 7, 0}); + nd4j::ops::mod op; + auto ress = op.execute({&x, &y}, {}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, ress->status()); +// ress->at(0)->printIndexedBuffer("MOD Result is "); +// x.printIndexedBuffer("Input is"); + ASSERT_TRUE(ress->at(0)->equalsTo(exp)); + delete ress; +} + +//////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests6, TestMod_BP_1) { + + auto x = NDArrayFactory::create('c', {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f}); + auto y = NDArrayFactory::create('c', {2, 2, 2}, {10.f, 2.f, 30.f, 4.f, 50.f, 6.f, 70.f, 8.f}); + auto eps = NDArrayFactory::create('c', {2, 2, 2}, {10.f, 2.f, 30.f, 4.f, 50.f, 6.f, 70.f, 8.f}); + auto exp = NDArrayFactory::create('c', {2, 2, 2}); + nd4j::ops::mod_bp op; + + auto ress = op.execute({&x, &y, &eps}, {}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, ress->status()); +// ress->at(0)->printIndexedBuffer("MOD_BP Result is "); + + // x.printIndexedBuffer("Input is"); + ASSERT_TRUE(ress->at(0)->equalsTo(exp)); + delete ress; +} + +/////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests6, TestRank_1) { + + auto x = NDArrayFactory::create('c', {2, 2, 2}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f}); + auto y = NDArrayFactory::create('c', {2, 2, 2}, {10.f, 2.f, 30.f, 4.f, 50.f, 6.f, 70.f, 8.f}); + auto eps = NDArrayFactory::create('c', {2, 2, 2}, {10.f, 2.f, 30.f, 4.f, 50.f, 6.f, 70.f, 8.f}); + auto exp = NDArrayFactory::create(3); + nd4j::ops::rank op; + + auto ress = op.execute({&x}, {}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, ress->status()); + ress->at(0)->printIndexedBuffer("RANK Result is "); + + // x.printIndexedBuffer("Input is"); + ASSERT_TRUE(ress->at(0)->equalsTo(exp)); + delete ress; +} TEST_F(DeclarableOpsTests6, TestDropout_2) { // auto x0 = NDArrayFactory::create('c', {10, 10}); // auto x1 = NDArrayFactory::create('c', {10, 10}); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests7.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests7.cpp index febb65c21..996dd4f23 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests7.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests7.cpp @@ -24,6 +24,7 @@ #include #include #include +#include using namespace nd4j; @@ -3605,6 +3606,289 @@ TEST_F(DeclarableOpsTests7, transpose_test3) { delete result; } +//////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests7, rationaltanh_test1) { + + auto input = NDArrayFactory::create('c', {8}, {0, 1, 2, 3, 4, 5, 6, 7}); + NDArray exp = NDArrayFactory::create({0.000000, 0.998222, 1.516093, 1.658054, 1.695077, 1.706884, 1.711427, 1.713446}); + + nd4j::ops::rationaltanh op; + auto result = op.execute({&input}, {}, {}); + auto output = result->at(0); +// output->printIndexedBuffer("Output rationaltanh"); + ASSERT_TRUE(exp.isSameShape(output)); + ASSERT_TRUE(exp.equalsTo(output)); + + delete result; +} + +//////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests7, rationaltanh_test2) { + + auto input = NDArrayFactory::create('c', {2,2,2}, {0, 1, 2, 3, 4, 5, 6, 7}); + NDArray exp = NDArrayFactory::create('c', {2,2,2}, {0.000000, 0.998222, 1.516093, 1.658054, 1.695077, 1.706884, 1.711427, 1.713446}); + + nd4j::ops::rationaltanh op; + auto result = op.execute({&input}, {}, {}); + auto output = result->at(0); +// output->printIndexedBuffer("Output rationaltanh"); + ASSERT_TRUE(exp.isSameShape(output)); + ASSERT_TRUE(exp.equalsTo(output)); + + delete result; +} + +//////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests7, rationaltanh_test3) { + + auto input = NDArrayFactory::create('c', {2,2,2}, {0, 1, 2, 3, 4, 5, 6, 7}); + auto eps = NDArrayFactory::create('c', {2,2,2}, {1, 2, 3, 4, 5, 6, 7, 8}); + NDArray exp = NDArrayFactory::create('c', {2,2,2}, {1.143933, 1.605747, 0.795557, 0.261710, 0.095832, 0.041218, 0.020221, 0.010971}); + + nd4j::ops::rationaltanh_bp op; + auto result = op.execute({&input, &eps}, {}, {}); + auto output = result->at(0); +// output->printBuffer("Output rationaltanh BP"); + ASSERT_TRUE(exp.isSameShape(output)); + ASSERT_TRUE(exp.equalsTo(output)); + + delete result; +} + +//////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests7, rectifiedtanh_test1) { + + auto input = NDArrayFactory::create('c', {2,2,2}, {0, 1, 2, 3, 4, 5, 6, 7}); + NDArray exp = NDArrayFactory::create('c', {2,2,2}, {0.000000, 0.761594, 0.964028, 0.995055, 0.999329, 0.999909, 0.999988, 0.999998}); + + nd4j::ops::rectifiedtanh op; + auto result = op.execute({&input}, {}, {}); + auto output = result->at(0); +// output->printIndexedBuffer("Output rectifiedtanh"); + ASSERT_TRUE(exp.isSameShape(output)); + ASSERT_TRUE(exp.equalsTo(output)); + + delete result; +} + +//////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests7, rectifiedtanh_test2) { + + auto input = NDArrayFactory::create('c', {2,2,2}, {0, 1, 2, 3, 4, 5, 6, 7}); + auto eps = NDArrayFactory::create('c', {2,2,2}, {1, 2, 3, 4, 5, 6, 7, 8}); + NDArray exp = NDArrayFactory::create('c', {2,2,2}, {0.000000, 0.839949, 0.211952, 0.039464, 0.006705, 0.001089, 0.000172, 0.000027}); + + nd4j::ops::rectifiedtanh_bp op; + auto result = op.execute({&input, &eps}, {}, {}); + auto output = result->at(0); +// output->printBuffer("Output rectifiedtanh BP"); + ASSERT_TRUE(exp.isSameShape(output)); + ASSERT_TRUE(exp.equalsTo(output)); + + delete result; +} + +TEST_F(DeclarableOpsTests7, RealDiv_1) { + + NDArray x = NDArrayFactory::create('c', {1, 2, 1}, {2, 4}); + NDArray y = NDArrayFactory::create('c', {1, 2}, {1,2}); + NDArray e = NDArrayFactory::create('c', {1, 2, 2}, {2, 1, 4, 2}); + + nd4j::ops::realdiv op; + auto result = op.execute({&x, &y}, {}, {}); + + ASSERT_EQ(Status::OK(), result->status()); + + auto z = result->at(0); +// z->printIndexedBuffer("OUtput RealDiv"); + ASSERT_TRUE(e.isSameShape(z)); + ASSERT_TRUE(e.equalsTo(*z)); + + delete result; +} + +//////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests7, RealDiv_BP_1) { + + NDArray x = NDArrayFactory::create('c', {1, 2, 1}, {2, 4}); + NDArray y = NDArrayFactory::create('c', {1, 2}, {1,2}); + NDArray e0 = NDArrayFactory::create('c', {1, 2, 1}, {2, 5}); + NDArray e1 = NDArrayFactory::create('c', {1, 2}, {-14, -5}); + NDArray eps = NDArrayFactory::create('c', {1, 2, 2}, {1, 2, 3, 4}); + + nd4j::ops::realdiv_bp op; + auto result = op.execute({&x, &y, &eps}, {}, {}); + + ASSERT_EQ(Status::OK(), result->status()); + + auto z0 = result->at(0); + auto z1 = result->at(1); +// z0->printShapeInfo("OUtput RealDiv BP0 shape"); +// z1->printShapeInfo("OUtput RealDiv BP1 shape"); +// z0->printIndexedBuffer("OUtput RealDiv BP0"); +// z1->printIndexedBuffer("OUtput RealDiv BP1"); +// ASSERT_TRUE(e.isSameShape(z)); + ASSERT_TRUE(e0.equalsTo(z0)); + ASSERT_TRUE(e1.equalsTo(z1)); + + delete result; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests7, ShapesOf_1) { + + NDArray x = NDArrayFactory::create('c', {1, 2, 1}, {2, 4}); +// NDArray y = NDArrayFactory::create('c', {1, 2}, {1,2}); + NDArray e = NDArrayFactory::create({1, 2, 1}); + + nd4j::ops::shapes_of op; + auto result = op.execute({&x}, {}, {}); + + ASSERT_EQ(Status::OK(), result->status()); + + auto z = result->at(0); +// z->printIndexedBuffer("OUtput RealDiv"); +// ASSERT_TRUE(e.isSameShape(z)); + ASSERT_TRUE(e.equalsTo(*z)); + + delete result; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests7, ShapesOf_2) { + + NDArray x = NDArrayFactory::create('c', {1, 2, 1}, {2, 4}); + NDArray y = NDArrayFactory::create('c', {1, 2}, {1,2}); + NDArray e0 = NDArrayFactory::create({1, 2, 1}); + NDArray e1 = NDArrayFactory::create({1, 2}); + + nd4j::ops::shapes_of op; + auto result = op.execute({&x, &y}, {}, {}); + + ASSERT_EQ(Status::OK(), result->status()); + + auto z0 = result->at(0); + auto z1 = result->at(1); +// z0->printIndexedBuffer("OUtput shapes2"); +// z1->printIndexedBuffer("OUtput shapes2"); +// ASSERT_TRUE(e.isSameShape(z)); + ASSERT_TRUE(e0.equalsTo(z0)); + ASSERT_TRUE(e1.equalsTo(z1)); + + delete result; +} + +TEST_F(DeclarableOpsTests7, Size_1) { + + NDArray x = NDArrayFactory::create('c', {1, 2, 1}, {2, 4}); + NDArray y = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); + NDArray e = NDArrayFactory::create(2); + + nd4j::ops::size op; + auto result = op.execute({&x}, {}, {}); + + ASSERT_EQ(Status::OK(), result->status()); + + auto z = result->at(0); +// z->printIndexedBuffer("OUtput SIZE"); +/// ASSERT_TRUE(e.isSameShape(z)); + ASSERT_TRUE(e.equalsTo(*z)); + + delete result; +} + +TEST_F(DeclarableOpsTests7, Size_2) { + + NDArray x = NDArrayFactory::create('c', {1, 2, 1}, {2, 4}); + NDArray y = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); + NDArray e = NDArrayFactory::create(10); + + nd4j::ops::size op; + auto result = op.execute({&y}, {}, {}); + + ASSERT_EQ(Status::OK(), result->status()); + + auto z = result->at(0); +// z->printIndexedBuffer("OUtput SIZE"); +/// ASSERT_TRUE(e.isSameShape(z)); + ASSERT_TRUE(e.equalsTo(*z)); + + delete result; +} + +TEST_F(DeclarableOpsTests7, Softplus_1) { + + NDArray x = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); + NDArray e = NDArrayFactory::create('c', {5, 2}, {1.3132616, 2.126928, 3.0485873, 4.01815, 5.0067153, 7.0009117, 9.000123, 10.000046, 10.000046, 11.000016}); + + nd4j::ops::softplus op; + auto result = op.execute({&x}, {}, {}); + + ASSERT_EQ(Status::OK(), result->status()); + + auto z = result->at(0); +// z->printIndexedBuffer("OUtput Softplus"); +/// ASSERT_TRUE(e.isSameShape(z)); + ASSERT_TRUE(e.equalsTo(*z)); + + delete result; +} + +TEST_F(DeclarableOpsTests7, Softplus_BP_1) { + + NDArray x = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); +// NDArray e = NDArrayFactory::create('c', {5, 2}, {1.3132616, 2.126928, 3.0485873, 4.01815, 5.0067153, 7.0009117, 9.000123, 10.000046, 10.000046, 11.000016}); + NDArray eps = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,6,7,8, 9, 10}); + nd4j::ops::softplus ffOP; + nd4j::ops::softplus_bp bpOp; + const OpArgsHolder argsHolderFF({&x}, {}, {}); + const OpArgsHolder argsHolderBP({&x, &eps}, {}, {}); + + bool gradOK = GradCheck::checkGrad(ffOP, bpOp, argsHolderFF, argsHolderBP); + + ASSERT_TRUE(gradOK); +// +// auto z = result->at(0); +// z->printIndexedBuffer("OUtput Softplus"); +///// ASSERT_TRUE(e.isSameShape(z)); +// ASSERT_TRUE(e.equalsTo(*z)); +// +// delete result; +} + +TEST_F(DeclarableOpsTests7, Softsign_1) { + + NDArray x = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); + NDArray e = NDArrayFactory::create('c', {5, 2}, {0.5, 0.6666667, 0.75, 0.8, 0.8333333, 0.875, 0.9, 0.90909094, 0.90909094, 0.9166667}); + + nd4j::ops::softsign op; + auto result = op.execute({&x}, {}, {}); + + ASSERT_EQ(Status::OK(), result->status()); + + auto z = result->at(0); +// z->printIndexedBuffer("OUtput Softsign"); +/// ASSERT_TRUE(e.isSameShape(z)); + ASSERT_TRUE(e.equalsTo(*z)); + + delete result; +} + +TEST_F(DeclarableOpsTests7, Softsign_BP_1) { + + NDArray x = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); +// NDArray e = NDArrayFactory::create('c', {5, 2}, {1.3132616, 2.126928, 3.0485873, 4.01815, 5.0067153, 7.0009117, 9.000123, 10.000046, 10.000046, 11.000016}); + NDArray eps = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,6,7,8, 9, 10}); + nd4j::ops::softsign ffOP; + nd4j::ops::softsign_bp bpOp; + const OpArgsHolder argsHolderFF({&x}, {}, {}); + const OpArgsHolder argsHolderBP({&x, &eps}, {}, {}); + + bool gradOK = GradCheck::checkGrad(ffOP, bpOp, argsHolderFF, argsHolderBP); + + ASSERT_TRUE(gradOK); +} + //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests7, fill_test2) { @@ -3644,6 +3928,185 @@ TEST_F(DeclarableOpsTests7, fill_test3) { delete result; } +//////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests7, ToggleBits_test1) { + + auto x = NDArrayFactory::create('c', {2}, {2, 2}); + auto exp = NDArrayFactory::create('c', {2}, {-3, -3}); + + nd4j::ops::toggle_bits op; + auto result = op.execute({&x}, {}, {}, {}, false, nd4j::DataType::INT32); + auto output = result->at(0); + + ASSERT_EQ(ND4J_STATUS_OK, result->status()); +// output->printIndexedBuffer("Toggled"); + ASSERT_TRUE(exp.isSameShape(output)); + ASSERT_TRUE(exp.equalsTo(output)); + + delete result; +} + +//////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests7, ToggleBits_test2) { + + auto x = NDArrayFactory::create('c', {2}, {2, 2}); + auto y = NDArrayFactory::create('c', {2}, {1, 1}); + auto exp0 = NDArrayFactory::create('c', {2}, {-3, -3}); + auto exp1 = NDArrayFactory::create('c', {2}, {-2, -2}); + + nd4j::ops::toggle_bits op; + auto result = op.execute({&x, &y}, {}, {}, {}, false, nd4j::DataType::INT32); + auto output = result->at(0); + auto z = result->at(1); + + ASSERT_EQ(ND4J_STATUS_OK, result->status()); +// output->printIndexedBuffer("Toggled"); + ASSERT_TRUE(exp0.isSameShape(output)); + ASSERT_TRUE(exp0.equalsTo(output)); + ASSERT_TRUE(exp1.isSameShape(z)); + ASSERT_TRUE(exp1.equalsTo(z)); + + delete result; +} + +//////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests7, Truncatediv_test1) { + NDArray x = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); + NDArray y = NDArrayFactory::create('c', {5, 2}, {2,2,2,2,2,2,2,2, 2, 2}); + NDArray exp = NDArrayFactory::create('c', {5, 2}, {0.5, 1., 1.5, 2., 2.5, 3.5, 4.5, 5., 5., 5.5}); + + nd4j::ops::truncatediv op; + auto result = op.execute({&x, &y}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, result->status()); + auto output = result->at(0); +// output->printIndexedBuffer("Toggled"); + ASSERT_TRUE(exp.isSameShape(output)); + + delete result; +} + +//////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests7, Truncatediv_test2) { + NDArray x = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); + NDArray y = NDArrayFactory::create('c', {1, 2}, {2,2}); + NDArray exp = NDArrayFactory::create('c', {5, 2}, {0.5, 1., 1.5, 2., 2.5, 3.5, 4.5, 5., 5., 5.5}); + + nd4j::ops::truncatediv op; + auto result = op.execute({&x, &y}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, result->status()); + auto output = result->at(0); +// output->printIndexedBuffer("Toggled"); + ASSERT_TRUE(exp.isSameShape(output)); + + delete result; +} + +//////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests7, TypesConversion_test1) { + NDArray x = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); + NDArray expI = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); + NDArray expL = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); + NDArray expF = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); + NDArray expF16 = NDArrayFactory::create('c', {5, 2}, {1.f,2.f,3.f,4.f,5.f,7.f,9.f,10.f, 10.f, 11.f}); + + nd4j::ops::to_int32 op32; + nd4j::ops::to_int64 op64; + auto result32 = op32.execute({&x}, {}, {}); + auto result64 = op64.execute({&x}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, result32->status()); + ASSERT_EQ(ND4J_STATUS_OK, result64->status()); + auto out1 = result32->at(0); +// out1->printIndexedBuffer("OUT_I"); + auto out2 = result64->at(0); +// out2->printIndexedBuffer("OUT_L"); + +// output->printIndexedBuffer("Toggled"); + ASSERT_TRUE(expI.equalsTo(out1)); + ASSERT_TRUE(expL.equalsTo(out2)); + + delete result32; + delete result64; +} + +//////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests7, TypesConversion_test2) { + NDArray x = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); + NDArray expF = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); + NDArray expH = NDArrayFactory::create('c', {5, 2}, {1.f,2.f,3.f,4.f,5.f,7.f,9.f,10.f, 10.f, 11.f}); + + nd4j::ops::to_float32 op32; + nd4j::ops::to_float16 op16; + auto result32 = op32.execute({&x}, {}, {}); + auto result16 = op16.execute({&x}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, result32->status()); + ASSERT_EQ(ND4J_STATUS_OK, result16->status()); + auto out1 = result32->at(0); +// out1->printIndexedBuffer("OUT_F"); + auto out2 = result16->at(0); +// out2->printIndexedBuffer("OUT_H"); + +// output->printIndexedBuffer("Toggled"); + ASSERT_TRUE(expF.equalsTo(out1)); + ASSERT_TRUE(expH.equalsTo(out2)); + + delete result32; + delete result16; +} + +//////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests7, TypesConversion_test3) { + NDArray x = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); + NDArray exp32 = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); + NDArray exp64 = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); + + nd4j::ops::to_uint32 op32; + nd4j::ops::to_uint64 op64; + auto result32 = op32.execute({&x}, {}, {}); + auto result64 = op64.execute({&x}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, result32->status()); + ASSERT_EQ(ND4J_STATUS_OK, result64->status()); + auto out1 = result32->at(0); +// out1->printIndexedBuffer("OUT_U32"); + auto out2 = result64->at(0); +// out2->printIndexedBuffer("OUT_U64"); + +// output->printIndexedBuffer("Toggled"); + ASSERT_TRUE(exp32.equalsTo(out1)); + ASSERT_TRUE(exp64.equalsTo(out2)); + + delete result32; + delete result64; +} + +//////////////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests7, TypesConversion_test4) { + NDArray x = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); + NDArray exp32 = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); + NDArray exp64 = NDArrayFactory::create('c', {5, 2}, {1,2,3,4,5,7,9,10, 10, 11}); + + nd4j::ops::to_float32 op32; + nd4j::ops::to_double op64; + auto result32 = op32.execute({&x}, {}, {}); + auto result64 = op64.execute({&x}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, result32->status()); + ASSERT_EQ(ND4J_STATUS_OK, result64->status()); + auto out1 = result32->at(0); + out1->printIndexedBuffer("OUT_F"); + auto out2 = result64->at(0); + out2->printIndexedBuffer("OUT_D"); + +// output->printIndexedBuffer("Toggled"); + ASSERT_TRUE(exp32.equalsTo(out1)); + ASSERT_TRUE(exp64.equalsTo(out2)); + + delete result32; + delete result64; +} + //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests7, mirrorPad_test1) { diff --git a/libnd4j/tests_cpu/layers_tests/ListOperationsTests.cpp b/libnd4j/tests_cpu/layers_tests/ListOperationsTests.cpp index ac778d971..2890c9012 100644 --- a/libnd4j/tests_cpu/layers_tests/ListOperationsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/ListOperationsTests.cpp @@ -78,6 +78,71 @@ TEST_F(ListOperationsTests, BasicTest_Stack_1) { delete tads; } +TEST_F(ListOperationsTests, BasicTest_UnStackList_1) { + NDArrayList list(0, true); + auto x = NDArrayFactory::create('c', {10, 100}); + auto tads = x.allTensorsAlongDimension({1}); + for (int e = 0; e < 10; e++) { + auto row = NDArrayFactory::create_('c', {100}); + row->assign((double) e); + //list.write(e, row); + tads->at(e)->assign(row); + delete row; + } + + nd4j::ops::unstack_list op; + + auto result = op.execute(&list, {&x}, {}, {0}); + + ASSERT_EQ(ND4J_STATUS_OK, result->status()); + ASSERT_EQ(list.elements(), 10); + +// auto z = result->at(0); +// z->printShapeInfo("The first of"); +// ASSERT_TRUE(exp.isSameShape(z)); +// ASSERT_TRUE(exp.equalsTo(z)); + for (int e = 0; e < 10; e++) { + auto row = list.read(e); + ASSERT_TRUE(row->equalsTo(tads->at(e))); + //list.write(e, row); + } + + delete result; + delete tads; +} + +//TEST_F(ListOperationsTests, BasicTest_UnStackList_2) { +//// NDArrayList list(0, true); +// auto x = NDArrayFactory::create('c', {10, 100}); +// auto tads = x.allTensorsAlongDimension({1}); +// for (int e = 0; e < 10; e++) { +// auto row = NDArrayFactory::create_('c', {100}); +// row->assign((double) e); +// //list.write(e, row); +// tads->at(e)->assign(row); +// delete row; +// } +// +// nd4j::ops::unstack_list op; +// +// auto result = op.execute(nullptr, {&x}, {}, {0}); +// +// ASSERT_EQ(ND4J_STATUS_OK, result->status()); +// ASSERT_EQ(result->size(), 10); +// +// // auto z = result->at(0); +//// z->printShapeInfo("The first of"); +//// ASSERT_TRUE(exp.isSameShape(z)); +//// ASSERT_TRUE(exp.equalsTo(z)); +// for (int e = 0; e < 10; e++) { +// auto row = result->at(e); +// ASSERT_TRUE(row->equalsTo(tads->at(e))); +// //list.write(e, row); +// } +// +// delete result; +// delete tads; +//} TEST_F(ListOperationsTests, BasicTest_Read_1) { NDArrayList list(10);