From 70af8c2afc646a8ef1780ffb43bdf06f978a9fb9 Mon Sep 17 00:00:00 2001 From: Yurii Shyrma Date: Wed, 28 Aug 2019 18:27:08 +0300 Subject: [PATCH] Shyrma svd (#191) * - add one additional test for svd * - provide float argument in eye op to be a type of output array Signed-off-by: Yurii * - add cuda capability check to mmulHelper Signed-off-by: Yurii * - make use another method for divice id evaluation Signed-off-by: Yurii * Eye data type as T argument Signed-off-by: raver119 --- .../include/helpers/cuda_off/MmulHelper.cu | 11 +++++----- .../ops/declarable/generic/transforms/eye.cpp | 17 +++++++-------- .../ops/declarable/headers/transforms.h | 21 +++++++++++-------- .../layers_tests/CudaBasicsTests2.cu | 1 + .../layers_tests/DeclarableOpsTests3.cpp | 20 ++++++++++++++---- .../layers_tests/DeclarableOpsTests5.cpp | 8 +++---- .../nd4j/linalg/api/ops/impl/shape/Eye.java | 5 +++++ 7 files changed, 52 insertions(+), 31 deletions(-) diff --git a/libnd4j/include/helpers/cuda_off/MmulHelper.cu b/libnd4j/include/helpers/cuda_off/MmulHelper.cu index 19e0d5baf..32394f705 100644 --- a/libnd4j/include/helpers/cuda_off/MmulHelper.cu +++ b/libnd4j/include/helpers/cuda_off/MmulHelper.cu @@ -218,6 +218,9 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, dou const bool AB(aType == bType), AC(aType == cType), ABC(AB && AC); + const int deviceId = AffinityManager::currentDeviceId(); + const int major = Environment::getInstance()->capabilities()[deviceId].first(); + NDArray::prepareSpecialUse({pC}, {pA, pB}); // choose appropriate cuda gemm api depending on data types @@ -228,20 +231,18 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, dou float alphaF(alpha), betaF(beta); status = cublasSgemm(*handle, transAblas, transBblas, M, N, K, &alphaF, (float*)pA->getSpecialBuffer(), lda, (float*)pB->getSpecialBuffer(), ldb, &betaF, (float*)pC->getSpecialBuffer(), ldc); } -#if __CUDA_ARCH__ >= 530 - else if(ABC && aType == DataType::HALF) { + else if(ABC && aType == DataType::HALF && major >= 6) { float16 alphaH(alpha), betaH(beta); status = cublasHgemm(*handle, transAblas, transBblas, M, N, K, &alphaH.data, (__half*)pA->getSpecialBuffer(), lda, (__half*)pB->getSpecialBuffer(), ldb, &betaH.data, (__half*)pC->getSpecialBuffer(), ldc); } - else if(AB && aType == DataType::INT8 && cType == DataType::FLOAT32) { + else if(AB && aType == DataType::INT8 && cType == DataType::FLOAT32 && major >= 6) { float alphaF(alpha), betaF(beta); status = cublasSgemmEx(*handle, transAblas, transBblas, M, N, K, &alphaF, pA->getSpecialBuffer(), CUDA_R_8I, lda, pB->getSpecialBuffer(), CUDA_R_8I, ldb, &betaF, pC->getSpecialBuffer(), CUDA_R_32F, ldc); } - else if(AB && aType == DataType::HALF && cType == DataType::FLOAT32) { + else if(AB && aType == DataType::HALF && cType == DataType::FLOAT32 && major >= 6) { float alphaF(alpha), betaF(beta); status = cublasSgemmEx(*handle, transAblas, transBblas, M, N, K, &alphaF, pA->getSpecialBuffer(), CUDA_R_16F, lda, pB->getSpecialBuffer(), CUDA_R_16F, ldb, &betaF, pC->getSpecialBuffer(), CUDA_R_32F, ldc); } -#endif else { dim3 threadsPerBlock(N, M); dim3 blocksPerGrid(1, 1); diff --git a/libnd4j/include/ops/declarable/generic/transforms/eye.cpp b/libnd4j/include/ops/declarable/generic/transforms/eye.cpp index ef1d8e1e5..c5f3dbff6 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/eye.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/eye.cpp @@ -27,7 +27,7 @@ namespace nd4j { namespace ops { - CUSTOM_OP_IMPL(eye, -2, 1, false, 0, -2) { + CUSTOM_OP_IMPL(eye, -2, 1, false, -2, -2) { helpers::eye(block.launchContext(), *OUTPUT_VARIABLE(0)); @@ -44,8 +44,7 @@ namespace ops { std::vector params; - // FIX ME: original has a dtype param - so should be used here instead. e.g. (DataType) INT_ARG(0); - nd4j::DataType dtype = nd4j::DataType::FLOAT32; + nd4j::DataType dtype = block.getTArguments()->empty() ? nd4j::DataType::FLOAT32 : nd4j::DataTypeUtils::fromInt(T_ARG(0)); if(block.width() == 0) { params = *block.getIArguments(); @@ -54,27 +53,27 @@ namespace ops { for (int i = 0; i < block.width(); i++) { auto input = INPUT_VARIABLE(i); REQUIRE_TRUE(input->rankOf() == 1, 0, "Inputs to eye should be 1D"); - for (int e = 0; e < input->lengthOf(); e++) { + + for (int e = 0; e < input->lengthOf(); e++) params.emplace_back(input->e(e)); - } } } - REQUIRE_TRUE(params.size() > 0, 0, "Size not provided for eye op."); + REQUIRE_TRUE(params.size() > 0, 0, "Size is not provided for eye op."); const bool ordered = (params[0] == -99 || params[0] == -102); // -99 :'c', -102 : 'f' if (!ordered) params.insert(params.begin(), -99); - REQUIRE_TRUE(params.size() > 1, 0, "Size not provided for eye op."); + REQUIRE_TRUE(params.size() > 1, 0, "Size is not provided for eye op."); Nd4jLong* outShapeInfo(nullptr); const int size = params.size(); switch(size) { - + case 2: ALLOCATE(outShapeInfo, block.getWorkspace(), shape::shapeInfoLength(2), Nd4jLong); outShapeInfo[0] = 2; @@ -99,7 +98,7 @@ namespace ops { outShapeInfo[i] = params[i+2]; break; } - + shape::updateStrides(outShapeInfo, static_cast(-params[0])); auto result = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outShapeInfo, dtype)); RELEASE(outShapeInfo, block.getWorkspace()); diff --git a/libnd4j/include/ops/declarable/headers/transforms.h b/libnd4j/include/ops/declarable/headers/transforms.h index 75715f78e..b6fd57112 100644 --- a/libnd4j/include/ops/declarable/headers/transforms.h +++ b/libnd4j/include/ops/declarable/headers/transforms.h @@ -111,18 +111,21 @@ namespace nd4j { /** * creates identity 2D matrix or batch of identical 2D identity matrices - * + * * Input array: * provide some array - in any case operation simply neglects it - * + * + * Input float argument (if passed): + * TArgs[0] - type of elements of output array, default value is 5 (float) + * * Input integer arguments: * IArgs[0] - order of output identity matrix, 99 -> 'c'-order, 102 -> 'f'-order * IArgs[1] - the number of rows in output inner-most 2D identity matrix * IArgs[2] - optional, the number of columns in output inner-most 2D identity matrix, if this argument is not provided then it is taken to be equal to number of rows - * IArgs[3,4,...] - optional, shape of batch, output matrix will have leading batch dimensions of this shape + * IArgs[3,4,...] - optional, shape of batch, output matrix will have leading batch dimensions of this shape */ #if NOT_EXCLUDED(OP_eye) - DECLARE_CUSTOM_OP(eye, -2, 1, false, 0, 2); + DECLARE_CUSTOM_OP(eye, -2, 1, false, -2, 2); #endif #if NOT_EXCLUDED(OP_gather_nd) @@ -143,10 +146,10 @@ namespace nd4j { /** * clip a list of given tensors with given average norm when needed - * + * * Input: * a list of tensors (at least one) - * + * * Input floating point argument: * clip_norm - a value that used as threshold value and norm to be used * @@ -182,12 +185,12 @@ namespace nd4j { /** * returns histogram (as 1D array) with fixed bins width - * + * * Input arrays: - * - input array with elements to be binned into output histogram + * - input array with elements to be binned into output histogram * - range array with first element being bottom limit and second element being top limit of histogram, please note that input_value <= range[0] will be mapped to histogram[0], input_value >= range[1] will be mapped to histogram[-1] - * + * * Input integer arguments: * nbins (optional) - number of histogram bins, default value is 100 */ diff --git a/libnd4j/tests_cpu/layers_tests/CudaBasicsTests2.cu b/libnd4j/tests_cpu/layers_tests/CudaBasicsTests2.cu index 19f107ea4..027dcdd42 100644 --- a/libnd4j/tests_cpu/layers_tests/CudaBasicsTests2.cu +++ b/libnd4j/tests_cpu/layers_tests/CudaBasicsTests2.cu @@ -272,6 +272,7 @@ TEST_F(CudaBasicsTests2, mmulMxM_12) { NDArray exp('f', {M,N}, {-16., -22., -23., -25., 30., -12., -38., -70., 20., 16., 18., 18., 22., -8., -28., -52.}, nd4j::DataType::FLOAT32); nd4j::MmulHelper::mmul(&a, &b, &c, 1., 0.); + // c.printBuffer(); ASSERT_TRUE(c.equalsTo(&exp)); } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests3.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests3.cpp index d786a68cb..3b4ff6cd0 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests3.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests3.cpp @@ -2757,10 +2757,18 @@ TEST_F(DeclarableOpsTests3, svd_test10) { /////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests3, svd_test11) { - auto x = NDArrayFactory::create('c', {3,3}, {1.,2.,3.,4.,5.,6.,7.,8.,9.}); - auto expS = NDArrayFactory::create('c', {3}); - auto expU = NDArrayFactory::create('c', {3,3}); - auto expV = NDArrayFactory::create('c', {3,3}); + NDArray x('c', {2,2,3,3}, {0.7788, 0.8012, 0.7244, 0.2309, 0.7271, 0.1804, 0.5056, 0.8925, 0.5461, 0.9234, 0.0856, 0.7938, 0.6591, 0.5555, + 0.1596, 0.3087, 0.1548, 0.4695, 0.7788, 0.8012, 0.7244, 0.2309, 0.7271, 0.1804, 0.5056, 0.8925, -0.5461, 0.9234, + 0.0856, -0.7938, 0.6591, 0.5555, 0.1500, 0.3087, 0.1548, 0.4695}); + NDArray expS('c', {2,2,3}, {1.89671, 0.37095, 0.05525,1.51296, 0.52741, 0.17622, 1.69095, 0.90438, 0.24688,1.33551, 0.87475, 0.21571}); + NDArray expU('c', {2,2,3,3}, {6.9205e-01, 6.0147e-01, -3.9914e-01, 3.8423e-01, -7.7503e-01, -5.0170e-01, 6.1110e-01, -1.9384e-01, 7.6746e-01, + 7.8967e-01, 4.5442e-01, -4.1222e-01, 4.9381e-01, -8.6948e-01, -1.2540e-02, 3.6412e-01, 1.9366e-01, 9.1100e-01, + 7.1764e-01, 5.9844e-01, 3.5617e-01, 4.4477e-01, -3.1000e-04, -8.9564e-01, 5.3588e-01, -8.0116e-01, 2.6639e-01, + 8.7050e-01, -4.2088e-01, -2.5513e-01, 4.8622e-01, 6.5499e-01, 5.7843e-01, 7.6340e-02, 6.2757e-01, -7.7481e-01}); + NDArray expV('c', {2,2,3,3}, {0.49383, 0.51614, -0.69981, 0.72718, -0.68641, 0.00688, 0.4768 , 0.51228, 0.7143 , 0.77137, -0.17763, + -0.6111 , 0.26324, -0.7852 , 0.56051, 0.57939, 0.59322, 0.55892, 0.55149, 0.06737, 0.83146, 0.81413, + -0.26072, -0.51887, 0.18182, 0.96306, -0.19863, 0.85948, 0.2707 , -0.4336 , 0.26688, 0.48582, 0.83232, + -0.43596, 0.83108, -0.34531}); nd4j::ops::svd op; auto results = op.execute({&x}, {}, {0, 1, 16}); @@ -2775,6 +2783,10 @@ TEST_F(DeclarableOpsTests3, svd_test11) { ASSERT_TRUE(expU.isSameShape(u)); ASSERT_TRUE(expV.isSameShape(v)); + ASSERT_TRUE(expS.equalsTo(s)); + ASSERT_TRUE(expU.equalsTo(u)); + ASSERT_TRUE(expV.equalsTo(v)); + delete results; } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests5.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests5.cpp index 2ff2fad58..3af53bad0 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests5.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests5.cpp @@ -639,10 +639,10 @@ TEST_F(DeclarableOpsTests5, eye_test2) { ////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests5, eye_test3) { - auto expected = NDArrayFactory::create('c', {2, 3, 4}, {1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0}); + auto expected = NDArrayFactory::create('c', {2, 3, 4}, {1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0}); nd4j::ops::eye op; - auto results = op.execute({}, {}, {-99, 3, 4, 2}); + auto results = op.execute({}, {9 /*int*/}, {-99, 3, 4, 2}); auto output = results->at(0); // output->printIndexedBuffer("Output eye"); @@ -656,10 +656,10 @@ TEST_F(DeclarableOpsTests5, eye_test3) { ////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests5, eye_test4) { - auto expected = NDArrayFactory::create('c', {2, 2, 3, 4}, {1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0.}); + auto expected = NDArrayFactory::create('c', {2, 2, 3, 4}, {1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0.}); nd4j::ops::eye op; - auto results = op.execute({}, {}, {-99, 3, 4, 2, 2}); + auto results = op.execute({}, {6/*double*/}, {-99, 3, 4, 2, 2}); auto output = results->at(0); ASSERT_EQ(Status::OK(), results->status()); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Eye.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Eye.java index f01e4a448..c4747a371 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Eye.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Eye.java @@ -98,6 +98,9 @@ public class Eye extends DynamicCustomOp { } protected void addArgs() { + iArguments.clear(); + tArguments.clear(); + addIArgument(numRows); addIArgument(numCols); if(batchDimension != null) { @@ -105,6 +108,8 @@ public class Eye extends DynamicCustomOp { addIArgument(dim); } } + + addTArgument((double) dataType.toInt()); } @Override