From 0748c7e7c2d05e932b5053d11933438d512738f2 Mon Sep 17 00:00:00 2001 From: Oleh Date: Fri, 21 Feb 2020 06:46:05 +0200 Subject: [PATCH] Oleh broadcast4d (#257) * libnd4j raw implementation of native broadcast for special cases Signed-off-by: Oleg * libnd4j fixed bugs for special case of 4D loop broadcast, add some tests, need more testing and discussion Signed-off-by: Oleg * libnd4j added 3D and 5D cases support and tests, need testing with different orders Signed-off-by: Oleg * libnd4j correctd case selection for broadcast 3,4,5D loops, fixed several places for more stable behavior, clean up Signed-off-by: Oleg * libnd4j minor corrections to avoid some risks in strides selection, added tests and rename some variables Signed-off-by: Oleg * libnd4j optimize usage the stride selection for all loops in separate ShapeUtils method copyCertainStridesFromShapeInfo, merge master Signed-off-by: Oleg * libnd4j remove per request several tests for 3D, 4D and 5D broadcast loops Signed-off-by: Oleg * libnd4j removed some loac changes that had not been sync with serve playground, turn on new loops usage --- libnd4j/blas/cpu/NativeOpExecutioner.cpp | 12 + libnd4j/include/helpers/LoopKind.h | 21 +- libnd4j/include/helpers/ShapeUtils.h | 11 + libnd4j/include/helpers/impl/ShapeUtils.cpp | 23 ++ libnd4j/include/loops/cpu/broadcasting.hpp | 107 ++++++++- .../layers_tests/DeclarableOpsTests14.cpp | 219 ++++++++++++++++-- 6 files changed, 378 insertions(+), 15 deletions(-) diff --git a/libnd4j/blas/cpu/NativeOpExecutioner.cpp b/libnd4j/blas/cpu/NativeOpExecutioner.cpp index cbc224838..1fedb0241 100644 --- a/libnd4j/blas/cpu/NativeOpExecutioner.cpp +++ b/libnd4j/blas/cpu/NativeOpExecutioner.cpp @@ -180,6 +180,18 @@ void NativeOpExecutioner::execBroadcast(nd4j::LaunchContext *lc, numTads = shape::length(hYShapeInfo); } break; + case nd4j::LoopKind::BROADCAST_3D: { + numTads = shape::sizeAt(hZShapeInfo, 0); + } + break; + case nd4j::LoopKind::BROADCAST_4D: { + numTads = shape::sizeAt(hZShapeInfo, 0) * shape::sizeAt(hZShapeInfo, 1); + } + break; + case nd4j::LoopKind::BROADCAST_5D: { + numTads = shape::sizeAt(hZShapeInfo, 0) * shape::sizeAt(hZShapeInfo, 1); + } + break; default: { auto xLen = shape::length(hXShapeInfo); auto yLen = shape::length(hYShapeInfo); diff --git a/libnd4j/include/helpers/LoopKind.h b/libnd4j/include/helpers/LoopKind.h index ddd1c95e5..d97f3b225 100644 --- a/libnd4j/include/helpers/LoopKind.h +++ b/libnd4j/include/helpers/LoopKind.h @@ -37,7 +37,7 @@ namespace nd4j { class ND4J_EXPORT LoopKind { public: - enum Kind {SMALLARR2DX, EWS1, EWSNONZERO, RANK1, RANK2, RANK3, RANK4, RANK5, X_EWSNONZERO, Y_EWSNONZERO, Z_EWSNONZERO, COMMON, BROADCAST_SCALAR_X, BROADCAST_SCALAR_Y}; + enum Kind { SMALLARR2DX, EWS1, EWSNONZERO, RANK1, RANK2, RANK3, RANK4, RANK5, X_EWSNONZERO, Y_EWSNONZERO, Z_EWSNONZERO, COMMON, BROADCAST_SCALAR_X, BROADCAST_SCALAR_Y, BROADCAST_3D, BROADCAST_4D, BROADCAST_5D }; static FORCEINLINE Kind deduceKindOfLoopXZ(const Nd4jLong* xShapeInfo, const Nd4jLong* zShapeInfo); static FORCEINLINE Kind deduceKindOfLoopXYZ(const Nd4jLong* xShapeInfo, const Nd4jLong* yShapeInfo, const Nd4jLong* zShapeInfo); @@ -96,6 +96,25 @@ LoopKind::Kind LoopKind::deduceKindOfLoopBroadcast(const Nd4jLong* xShapeInfo, c auto yEws = shape::elementWiseStride(yShapeInfo); auto zEws = shape::elementWiseStride(zShapeInfo); + bool bNDLoopsRanks = (xRank == zRank && yRank <= xRank && yRank >= 2); + + int countUnityDimsInY = 0, countUnityDimsInX = 0; + for (int i = 0; i < xRank; i++) { + if (i < yRank) + countUnityDimsInY += (1 == shape::sizeAt(yShapeInfo, i)) ? 1 : 0; + countUnityDimsInX += (1 == shape::sizeAt(xShapeInfo, i)) ? 1 : 0; + } + + bool bNotCommonVectorCase = (countUnityDimsInY != yRank - 1) && (countUnityDimsInX != xRank - 1); + + if (3 == xRank && bNDLoopsRanks && bNotCommonVectorCase) + return nd4j::LoopKind::BROADCAST_3D; + if (4 == xRank && bNDLoopsRanks && bNotCommonVectorCase) + return nd4j::LoopKind::BROADCAST_4D; + if (5 == xRank && bNDLoopsRanks && bNotCommonVectorCase) + return nd4j::LoopKind::BROADCAST_5D; + + if (xRank == yRank && xRank == zRank && xOrder == 'c' && yOrder == 'c' && zOrder == 'c' && xEws == 1 && yEws == 1 && zEws == 1 && xRank >= 2) { // we validate that shapes are equal till the last dim for (int e = 0; e < xRank - 1; e++) { diff --git a/libnd4j/include/helpers/ShapeUtils.h b/libnd4j/include/helpers/ShapeUtils.h index ebd61410b..39ea3edaa 100644 --- a/libnd4j/include/helpers/ShapeUtils.h +++ b/libnd4j/include/helpers/ShapeUtils.h @@ -180,6 +180,17 @@ namespace nd4j { return (numStrings + 1) * sizeof(Nd4jLong); } + /** + * This method selects strides based on dimentions required for broadcasting + * @param const pointer to input (Y) shape info for strides selection + * @param rank of input (X) to broadcasting + * @param dimentions size + * @param const pointer to dimentions for broadcasting + * @param pointer to output strides have to be pre allocated by 0 + * @return + */ + static void copyCertainStridesFromShapeInfo(const Nd4jLong* inShapeInfo, const int nRank, const int dimsSize, const int* dims, Nd4jLong* outStrides); + /* * check whether arr1/arr2 is sub-array of arr2/arr1, * this method do not evaluate what array is sub-array, it returns true if arr1 is sub-array of arr2 or arr2 is sub-array of arr1 diff --git a/libnd4j/include/helpers/impl/ShapeUtils.cpp b/libnd4j/include/helpers/impl/ShapeUtils.cpp index a2d3f97ef..10babeae1 100644 --- a/libnd4j/include/helpers/impl/ShapeUtils.cpp +++ b/libnd4j/include/helpers/impl/ShapeUtils.cpp @@ -1057,6 +1057,29 @@ std::vector ShapeUtils::tadAxesForSimpleBroadcast(const NDArray& max, const return numOfMinTads == 1 ? maxTadDims : std::vector(); } +void ShapeUtils::copyCertainStridesFromShapeInfo(const Nd4jLong* inShapeInfo, const int nRank, const int dimsSize, const int* dims, Nd4jLong* outStrides) { + + int yRank = shape::rank(inShapeInfo); + auto yOrigStride = shape::stride(inShapeInfo); + + if (yRank == nRank) { + for (int i = 0; i < yRank; ++i) { + // x[2,3,4] * y[2,1,4] = z[2,3,4] + outStrides[i] = (1 == shape::sizeAt(inShapeInfo, i)) ? 0 : yOrigStride[i]; + } + } + else { + + auto dimEx = nd4j::ShapeUtils::evalDimsToExclude(nRank, dimsSize, dims); + + for (int i = 0, it = 0; i < nRank; ++i) { + auto nCount = std::count(dimEx.cbegin(), dimEx.cend(), i); + outStrides[i] = (0 == nCount) ? yOrigStride[it++] : 0; + if (it == yRank) + break; + } + } +} //////////////////////////////////////////////////////////////////////////////// /* bool ShapeUtils::isSubArrayCase(const NDArray& arr1, const NDArray& arr2, std::vector& sameDims) { diff --git a/libnd4j/include/loops/cpu/broadcasting.hpp b/libnd4j/include/loops/cpu/broadcasting.hpp index 691b95b83..62058bd20 100644 --- a/libnd4j/include/loops/cpu/broadcasting.hpp +++ b/libnd4j/include/loops/cpu/broadcasting.hpp @@ -25,6 +25,7 @@ #include #include #include +#include using namespace simdOps; @@ -144,7 +145,14 @@ namespace functions { auto yEws = shape::elementWiseStride(yShapeInfo); auto zEws = shape::elementWiseStride(zTadShapeInfo); - const nd4j::LoopKind::Kind kindOfLoop = loopKind == nd4j::LoopKind::BROADCAST_SCALAR_X || loopKind == nd4j::LoopKind::BROADCAST_SCALAR_Y ? loopKind : nd4j::LoopKind::deduceKindOfLoopXYZ(xTadShapeShapeInfo, yShapeInfo, zTadShapeInfo); + + const nd4j::LoopKind::Kind kindOfLoop = + (loopKind == nd4j::LoopKind::BROADCAST_SCALAR_X || + loopKind == nd4j::LoopKind::BROADCAST_SCALAR_Y || + loopKind == nd4j::LoopKind::BROADCAST_3D || + loopKind == nd4j::LoopKind::BROADCAST_4D || + loopKind == nd4j::LoopKind::BROADCAST_5D) + ? loopKind : nd4j::LoopKind::deduceKindOfLoopXYZ(xTadShapeShapeInfo, yShapeInfo, zTadShapeInfo); if (kindOfLoop == nd4j::LoopKind::EWS1) { for (auto i = start; i < stop; i++) { @@ -194,6 +202,103 @@ namespace functions { oZ[f] = OpType::op(oX[f], oY); } } + else if (kindOfLoop == nd4j::LoopKind::BROADCAST_3D) { + + int xRank = shape::rank(xShapeInfo); + int yRank = shape::rank(yShapeInfo); + + auto xStrides = shape::stride(xShapeInfo); + auto zStrides = shape::stride(zShapeInfo); + + Nd4jLong yStrides[3] = { 0,0,0 }; + nd4j::ShapeUtils::copyCertainStridesFromShapeInfo(yShapeInfo, xRank, dimensionLength, dimension, yStrides); + + uint32_t nSize1 = shape::sizeAt(zShapeInfo, 1); + uint32_t nSize2 = shape::sizeAt(zShapeInfo, 2); + + for (uint32_t index0 = start; index0 < stop; index0++) { + + PRAGMA_OMP_SIMD + for (uint32_t index1 = 0; index1 < nSize1; index1++) { + for (uint32_t index2 = 0; index2 < nSize2; index2++) { + auto rX = x + (xStrides[0] * index0 + xStrides[1] * index1 + xStrides[2] * index2); + auto rY = y + (yStrides[0] * index0 + yStrides[1] * index1 + yStrides[2] * index2); + auto rZ = z + (zStrides[0] * index0 + zStrides[1] * index1 + zStrides[2] * index2); + *rZ = OpType::op(*rX, *rY); + } + } + + } + + } + else if (kindOfLoop == nd4j::LoopKind::BROADCAST_4D) { + + int xRank = shape::rank(xShapeInfo); + int yRank = shape::rank(yShapeInfo); + + auto xStrides = shape::stride(xShapeInfo); + auto zStrides = shape::stride(zShapeInfo); + + Nd4jLong yStrides[4] = { 0,0,0,0 }; + nd4j::ShapeUtils::copyCertainStridesFromShapeInfo(yShapeInfo, xRank, dimensionLength, dimension, yStrides); + + uint32_t nSize1 = shape::sizeAt(zShapeInfo, 1); + uint32_t nSize2 = shape::sizeAt(zShapeInfo, 2); + uint32_t nSize3 = shape::sizeAt(zShapeInfo, 3); + + for (uint32_t i = start; i < stop; i++) { + + uint32_t index0 = i / nSize1; + uint32_t index1 = i % nSize1; + + PRAGMA_OMP_SIMD + for (uint32_t index2 = 0; index2 < nSize2; index2++) { + for (uint32_t index3 = 0; index3 < nSize3; index3++) { + auto rX = x + (xStrides[0] * index0 + xStrides[1] * index1 + xStrides[2] * index2 + xStrides[3] * index3); + auto rY = y + (yStrides[0] * index0 + yStrides[1] * index1 + yStrides[2] * index2 + yStrides[3] * index3); + auto rZ = z + (zStrides[0] * index0 + zStrides[1] * index1 + zStrides[2] * index2 + zStrides[3] * index3); + *rZ = OpType::op(*rX, *rY); + } + } + } + + } + else if (kindOfLoop == nd4j::LoopKind::BROADCAST_5D) { + + int xRank = shape::rank(xShapeInfo); + int yRank = shape::rank(yShapeInfo); + + auto xStrides = shape::stride(xShapeInfo); + auto zStrides = shape::stride(zShapeInfo); + + Nd4jLong yStrides[5] = { 0,0,0,0,0 }; + nd4j::ShapeUtils::copyCertainStridesFromShapeInfo(yShapeInfo, xRank, dimensionLength, dimension, yStrides); + + uint32_t nSize1 = shape::sizeAt(zShapeInfo, 1); + uint32_t nSize2 = shape::sizeAt(zShapeInfo, 2); + uint32_t nSize3 = shape::sizeAt(zShapeInfo, 3); + uint32_t nSize4 = shape::sizeAt(zShapeInfo, 4); + + for (uint32_t i = start; i < stop; i++) { + + uint32_t index0 = i / nSize1; + uint32_t index1 = i % nSize1; + + PRAGMA_OMP_SIMD + for (uint32_t index2 = 0; index2 < nSize2; index2++) { + for (uint32_t index3 = 0; index3 < nSize3; index3++) { + for (uint32_t index4 = 0; index4 < nSize4; index4++) { + auto rX = x + (xStrides[0] * index0 + xStrides[1] * index1 + xStrides[2] * index2 + xStrides[3] * index3 + xStrides[4] * index4); + auto rY = y + (yStrides[0] * index0 + yStrides[1] * index1 + yStrides[2] * index2 + yStrides[3] * index3 + yStrides[4] * index4); + auto rZ = z + (zStrides[0] * index0 + zStrides[1] * index1 + zStrides[2] * index2 + zStrides[3] * index3 + zStrides[4] * index4); + + *rZ = OpType::op(*rX, *rY); + } + } + } + } + + } else if(shape::haveSameShapeAndStrides(xTadShapeShapeInfo, yShapeInfo) && shape::haveSameShapeAndStrides(xTadShapeShapeInfo, zTadShapeInfo)) { uint tadShapeShapeInfoCast[MAX_RANK]; bool canCastX = nd4j::DataTypeUtils::castShapeInfo(xTadShapeShapeInfo, tadShapeShapeInfoCast); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests14.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests14.cpp index 25e2d383d..3672a4c20 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests14.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests14.cpp @@ -1306,8 +1306,6 @@ TEST_F(DeclarableOpsTests14, matmul_test29) { delete results; } - - ////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests14, matmul_test30) { @@ -1328,8 +1326,6 @@ TEST_F(DeclarableOpsTests14, matmul_test30) { delete results; } - - ////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests14, matmul_test31) { @@ -1350,8 +1346,6 @@ TEST_F(DeclarableOpsTests14, matmul_test31) { delete results; } - - ////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests14, matmul_test32) { @@ -1369,8 +1363,7 @@ TEST_F(DeclarableOpsTests14, matmul_test32) { delete results; } - - +///////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests14, matmul_test33) { auto x = NDArrayFactory::create('c', {4, 3}); auto y = NDArrayFactory::create('c', {4, 1}); @@ -1390,8 +1383,7 @@ TEST_F(DeclarableOpsTests14, matmul_test33) { delete result; } - - +////////////////////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests14, matmul_test34) { auto a = NDArrayFactory::create('c', {3, 4}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); auto b = NDArrayFactory::create('c', {4}, {1, 2, 3, 4}); @@ -1408,7 +1400,7 @@ TEST_F(DeclarableOpsTests14, matmul_test34) { delete result; } - +///////////////////////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests14, matmul_test35) { auto a = NDArrayFactory::create('c', {4}, {1, 2, 3, 4}); auto b = NDArrayFactory::create('c', {4, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); @@ -1425,7 +1417,7 @@ TEST_F(DeclarableOpsTests14, matmul_test35) { delete result; } - +//////////////////////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests14, matmul_test36) { auto a = NDArrayFactory::create('c', {1, 4}, {1, 2, 3, 4}); auto b = NDArrayFactory::create('c', {4, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); @@ -1442,7 +1434,6 @@ TEST_F(DeclarableOpsTests14, matmul_test36) { delete result; } - ////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests14, matmul_test37) { @@ -1463,6 +1454,206 @@ TEST_F(DeclarableOpsTests14, matmul_test37) { ASSERT_TRUE(cExp.isSameShape(c)); ASSERT_TRUE(cExp.equalsTo(c)); } +/////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests14, Test_broadcast_3D_1) { + + // x[4, 12, 128] * y[4, 128] = z[4, 12, 128] + + auto x = NDArray('c', { 2, 3, 5 }, nd4j::DataType::FLOAT32); + auto y = NDArray('c', { 2, 5 }, nd4j::DataType::FLOAT32); + auto z = NDArray('c', { 2, 3, 5 }, nd4j::DataType::FLOAT32); + // recieved by main algorithm + auto e = NDArray('c', { 2, 3, 5 }, { 10.000000, 22.000000, 36.000000, 52.000000, 70.000000, 60.000000, 77.000000, 96.000000, 117.000000, 140.000000, 110.000000, 132.000000, 156.000000, 182.000000, 210.000000, 240.000000, 272.000000, 306.000000, 342.000000, 380.000000, 315.000000, 352.000000, 391.000000, 432.000000, 475.000000, 390.000000, 432.000000, 476.000000, 522.000000, 570.000000 }, nd4j::DataType::FLOAT32); + + x.linspace(1.f); + y.linspace(10.f); + z.assign(0.f); + + x.applyBroadcast(nd4j::broadcast::Multiply, { 0,2 }, y, z); + //z.printBuffer(); + ASSERT_EQ(e, z); +} +/////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests14, Test_broadcast_3D_2) { + + auto x = NDArray('f', { 2, 3, 5 }, nd4j::DataType::FLOAT32); + auto y = NDArray('f', { 2, 5 }, nd4j::DataType::FLOAT32); + auto z = NDArray('f', { 2, 3, 5 }, nd4j::DataType::FLOAT32); + // recieved by main algorithm + auto eC = NDArray('c', { 2, 3, 5 }, { 0.100000, 0.181818, 0.250000, 0.307692, 0.357143, 0.600000, 0.636364, 0.666667, 0.692308, 0.714286, 1.100000, 1.090909, 1.083333, 1.076923, 1.071429, 1.066667, 1.062500, 1.058824, 1.055556, 1.052632, 1.400000, 1.375000, 1.352941, 1.333333, 1.315789, 1.733333, 1.687500, 1.647059, 1.611111, 1.578947 }, nd4j::DataType::FLOAT32); + + auto e = NDArray('f', { 2, 3, 5 }, nd4j::DataType::FLOAT32); + + e.assign(eC); + + x.linspace(1.f); + y.linspace(10.f); + z.assign(0.f); + + x.applyBroadcast(nd4j::broadcast::Divide, { 0,2 }, y, z); + + ASSERT_EQ(e, z); +} +/////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests14, Test_broadcast_4D_1) { + + auto x = NDArray('c', { 2, 3, 5, 4 }, nd4j::DataType::FLOAT32); + auto y = NDArray('c', { 2, 5, 4 }, nd4j::DataType::FLOAT32); + auto z = NDArray('c', { 2, 3, 5, 4 }, nd4j::DataType::FLOAT32); + // recieved by main algorithm + auto e = NDArray('c', { 2, 3, 5, 4 }, { 10.000000, 22.000000, 36.000000, 52.000000, 70.000000, 90.000000, 112.000000, 136.000000, 162.000000, 190.000000, 220.000000, 252.000000, 286.000000, 322.000000, 360.000000, 400.000000, 442.000000, 486.000000, 532.000000, 580.000000, 210.000000, 242.000000, 276.000000, 312.000000, 350.000000, 390.000000, 432.000000, 476.000000, 522.000000, 570.000000, 620.000000, 672.000000, 726.000000, 782.000000, 840.000000, 900.000000, 962.000000, 1026.000000, 1092.000000, 1160.000000, 410.000000, 462.000000, 516.000000, 572.000000, 630.000000, 690.000000, 752.000000, 816.000000, 882.000000, 950.000000, 1020.000000, 1092.000000, 1166.000000, 1242.000000, 1320.000000, 1400.000000, 1482.000000, 1566.000000, 1652.000000, 1740.000000, 1830.000000, 1922.000000, 2016.000000, 2112.000000, 2210.000000, 2310.000000, 2412.000000, 2516.000000, 2622.000000, 2730.000000, 2840.000000, 2952.000000, 3066.000000, 3182.000000, 3300.000000, 3420.000000, 3542.000000, 3666.000000, 3792.000000, 3920.000000, 2430.000000, 2542.000000, 2656.000000, 2772.000000, 2890.000000, 3010.000000, 3132.000000, 3256.000000, 3382.000000, 3510.000000, 3640.000000, 3772.000000, 3906.000000, 4042.000000, 4180.000000, 4320.000000, 4462.000000, 4606.000000, 4752.000000, 4900.000000, 3030.000000, 3162.000000, 3296.000000, 3432.000000, 3570.000000, 3710.000000, 3852.000000, 3996.000000, 4142.000000, 4290.000000, 4440.000000, 4592.000000, 4746.000000, 4902.000000, 5060.000000, 5220.000000, 5382.000000, 5546.000000, 5712.000000, 5880.000000 }, nd4j::DataType::FLOAT32); + + x.linspace(1.f); + y.linspace(10.f); + z.assign(0.f); + + x.applyBroadcast(nd4j::broadcast::Multiply, { 0,2,3 }, y, z); + + ASSERT_EQ(e, z); +} +/////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests14, Test_broadcast_4D_2) { + + auto x = NDArray('f', { 2, 3, 5, 4 }, nd4j::DataType::FLOAT32); + auto y = NDArray('f', { 2, 5, 4 }, nd4j::DataType::FLOAT32); + auto z = NDArray('f', { 2, 3, 5, 4 }, nd4j::DataType::FLOAT32); + // recieved by main algorithm + auto eC = NDArray('c', { 2, 3, 5, 4 }, { 0.100000,0.181818,0.250000,0.307692,0.357143,0.400000,0.437500,0.470588,0.500000,0.526316,0.550000,0.571429, 0.590909,0.608696,0.625000,0.640000, 0.653846,0.666667,0.678571,0.689655, 2.100000,2.000000,1.916667, 1.846154, 1.785714, 1.733333,1.687500, 1.647059,1.611111, 1.578947,1.550000, 1.523810,1.500000, 1.478261,1.458333, 1.440000,1.423077, 1.407407,1.392857, 1.379310,4.100000, 3.818182,3.583333, 3.384615, 3.214286, 3.066667,2.937500, 2.823529,2.722222, 2.631579,2.550000, 2.476191,2.409091, 2.347826,2.291667, 2.240000,2.192308, 2.148148,2.107143, 2.068965,2.033333, 2.000000,1.968750, 1.939394,1.911765, 1.885714,1.861111, 1.837838,1.815789, 1.794872,1.775000, 1.756098,1.738095, 1.720930,1.704545, 1.688889,1.673913, 1.659575,1.645833,1.632653,2.700000,2.645161,2.593750,2.545455,2.500000,2.457143,2.416667,2.378378,2.342105,2.307692,2.275000,2.243902,2.214286,2.186047,2.159091,2.133333,2.108696,2.085106,2.062500,2.040816,3.366667,3.290323,3.218750,3.151515,3.088235,3.028571,2.972222,2.918919,2.868421,2.820513,2.775000,2.731707,2.690476,2.651163,2.613636,2.577778,2.543478,2.510638,2.479167,2.448980 }, nd4j::DataType::FLOAT32); + + auto e = NDArray('f', { 2, 3, 5, 4 }, nd4j::DataType::FLOAT32); + + e.assign(eC); + + x.linspace(1.f); + y.linspace(10.f); + z.assign(0.f); + + x.applyBroadcast(nd4j::broadcast::Divide, { 0,2,3 }, y, z); + + ASSERT_EQ(e, z); +} +/////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests14, Test_broadcast_4D_3) { + + auto x = NDArray('f', { 2, 3, 5, 4 }, nd4j::DataType::FLOAT32); + auto y = NDArray('f', { 2, 5 }, nd4j::DataType::FLOAT32); + auto z = NDArray('f', { 2, 3, 5, 4 }, nd4j::DataType::FLOAT32); + // recieved by main algorithm + auto eC = NDArray('c', { 2, 3, 5, 4 }, { 0.100000, 0.200000, 0.300000, 0.400000, 0.454545, 0.545455, 0.636364, 0.727273, 0.750000, 0.833333, 0.916667, 1.000000, 1.000000, 1.076923, 1.153846, 1.230769, 1.214286, 1.285714, 1.357143, 1.428571, 2.100000, 2.200000, 2.300000, 2.400000, 2.272727, 2.363636, 2.454545, 2.545455, 2.416667, 2.500000, 2.583333, 2.666667, 2.538461, 2.615385, 2.692308, 2.769231, 2.642857, 2.714286, 2.785714, 2.857143, 4.100000, 4.200000, 4.300000, 4.400000, 4.090909, 4.181818, 4.272727, 4.363636, 4.083333, 4.166667, 4.250000, 4.333333, 4.076923, 4.153846, 4.230769, 4.307693, 4.071429, 4.142857, 4.214286, 4.285714, 4.066667, 4.133333, 4.200000, 4.266667, 4.062500, 4.125000, 4.187500, 4.250000, 4.058824, 4.117647, 4.176471, 4.235294, 4.055555, 4.111111, 4.166667, 4.222222, 4.052631, 4.105263, 4.157895, 4.210526, 5.400000, 5.466667, 5.533333, 5.600000, 5.312500, 5.375000, 5.437500, 5.500000, 5.235294, 5.294117, 5.352941, 5.411765, 5.166667, 5.222222, 5.277778, 5.333333, 5.105263, 5.157895, 5.210526, 5.263158, 6.733333, 6.800000, 6.866667, 6.933333, 6.562500, 6.625000, 6.687500, 6.750000, 6.411765, 6.470588, 6.529412, 6.588235, 6.277778, 6.333333, 6.388889, 6.444445, 6.157895, 6.210526, 6.263158, 6.315790 }, nd4j::DataType::FLOAT32); + + auto e = NDArray('f', { 2, 3, 5, 4 }, nd4j::DataType::FLOAT32); + + e.assign(eC); + + x.linspace(1.f); + y.linspace(10.f); + z.assign(0.f); + + x.applyBroadcast(nd4j::broadcast::Divide, { 0,2 }, y, z); + + ASSERT_EQ(e, z); +} +/////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests14, Test_broadcast_4D_4) { + + // x[4, 12, 128, 128] * y[4, 1, 128, 1] = z[4, 12, 128, 128] + + auto x = NDArray('f', { 2, 3, 5, 4 }, nd4j::DataType::FLOAT32); + auto y = NDArray('f', { 2, 1, 5, 1 }, nd4j::DataType::FLOAT32); + auto z = NDArray('f', { 2, 3, 5, 4 }, nd4j::DataType::FLOAT32); + // recieved by main algorithm + auto eC = NDArray('c', { 2, 3, 5, 4 }, { 0.100000, 0.200000, 0.300000, 0.400000, 0.454545, 0.545455, 0.636364, 0.727273, 0.750000, 0.833333, 0.916667, 1.000000, 1.000000, 1.076923, 1.153846, 1.230769, 1.214286, 1.285714, 1.357143, 1.428571, 2.100000, 2.200000, 2.300000, 2.400000, 2.272727, 2.363636, 2.454545, 2.545455, 2.416667, 2.500000, 2.583333, 2.666667, 2.538461, 2.615385, 2.692308, 2.769231, 2.642857, 2.714286, 2.785714, 2.857143, 4.100000, 4.200000, 4.300000, 4.400000, 4.090909, 4.181818, 4.272727, 4.363636, 4.083333, 4.166667, 4.250000, 4.333333, 4.076923, 4.153846, 4.230769, 4.307693, 4.071429, 4.142857, 4.214286, 4.285714, 4.066667, 4.133333, 4.200000, 4.266667, 4.062500, 4.125000, 4.187500, 4.250000, 4.058824, 4.117647, 4.176471, 4.235294, 4.055555, 4.111111, 4.166667, 4.222222, 4.052631, 4.105263, 4.157895, 4.210526, 5.400000, 5.466667, 5.533333, 5.600000, 5.312500, 5.375000, 5.437500, 5.500000, 5.235294, 5.294117, 5.352941, 5.411765, 5.166667, 5.222222, 5.277778, 5.333333, 5.105263, 5.157895, 5.210526, 5.263158, 6.733333, 6.800000, 6.866667, 6.933333, 6.562500, 6.625000, 6.687500, 6.750000, 6.411765, 6.470588, 6.529412, 6.588235, 6.277778, 6.333333, 6.388889, 6.444445, 6.157895, 6.210526, 6.263158, 6.315790 }, nd4j::DataType::FLOAT32); + + auto e = NDArray('f', { 2, 3, 5, 4 }, nd4j::DataType::FLOAT32); + e.assign(eC); + + x.linspace(1.f); + y.linspace(10.f); + z.assign(0.f); + + x.applyTrueBroadcast(BroadcastOpsTuple::Divide(), y, z); + + ASSERT_EQ(e, z); +} +/////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests14, Test_broadcast_5D_1) { + // x[4, 12, 128, 128, 128] * y[4, 1, 128, 128, 128] = z[4, 12, 128, 128, 128] + auto x = NDArray('c', { 2, 3, 5, 4, 3 }, nd4j::DataType::FLOAT32); + auto y = NDArray('c', { 2, 1, 5, 4, 3 }, nd4j::DataType::FLOAT32); + auto z = NDArray('c', { 2, 3, 5, 4, 3 }, nd4j::DataType::FLOAT32); + // recieved by main algorithm + auto e = NDArray('c', { 2, 3, 5, 4, 3 }, { 10.000000, 22.000000, 36.000000, 52.000000, 70.000000, 90.000000, 112.000000, 136.000000, 162.000000, 190.000000, 220.000000, 252.000000, 286.000000, 322.000000, 360.000000, 400.000000, 442.000000, 486.000000, 532.000000, 580.000000, 630.000000, 682.000000, 736.000000, 792.000000, 850.000000, 910.000000, 972.000000, 1036.000000, 1102.000000, 1170.000000, 1240.000000, 1312.000000, 1386.000000, 1462.000000, 1540.000000, 1620.000000, 1702.000000, 1786.000000, 1872.000000, 1960.000000, 2050.000000, 2142.000000, 2236.000000, 2332.000000, 2430.000000, 2530.000000, 2632.000000, 2736.000000, 2842.000000, 2950.000000, 3060.000000, 3172.000000, 3286.000000, 3402.000000, 3520.000000, 3640.000000, 3762.000000, 3886.000000, 4012.000000, 4140.000000, 610.000000, 682.000000, 756.000000, 832.000000, 910.000000, 990.000000, 1072.000000, 1156.000000, 1242.000000, 1330.000000, 1420.000000, 1512.000000, 1606.000000, 1702.000000, 1800.000000, 1900.000000, 2002.000000, 2106.000000, 2212.000000, 2320.000000, 2430.000000, 2542.000000, 2656.000000, 2772.000000, 2890.000000, 3010.000000, 3132.000000, 3256.000000, 3382.000000, 3510.000000, 3640.000000, 3772.000000, 3906.000000, 4042.000000, 4180.000000, 4320.000000, 4462.000000, 4606.000000, 4752.000000, 4900.000000, 5050.000000, 5202.000000, 5356.000000, 5512.000000, 5670.000000, 5830.000000, 5992.000000, 6156.000000, 6322.000000, 6490.000000, 6660.000000, 6832.000000, 7006.000000, 7182.000000, 7360.000000, 7540.000000, 7722.000000, 7906.000000, 8092.000000, 8280.000000, 1210.000000, 1342.000000, 1476.000000, 1612.000000, 1750.000000, 1890.000000, 2032.000000, 2176.000000, 2322.000000, 2470.000000, 2620.000000, 2772.000000, 2926.000000, 3082.000000, 3240.000000, 3400.000000, 3562.000000, 3726.000000, 3892.000000, 4060.000000, 4230.000000, 4402.000000, 4576.000000, 4752.000000, 4930.000000, 5110.000000, 5292.000000, 5476.000000, 5662.000000, 5850.000000, 6040.000000, 6232.000000, 6426.000000, 6622.000000, 6820.000000, 7020.000000, 7222.000000, 7426.000000, 7632.000000, 7840.000000, 8050.000000, 8262.000000, 8476.000000, 8692.000000, 8910.000000, 9130.000000, 9352.000000, 9576.000000, 9802.000000, 10030.000000, 10260.000000, 10492.000000, 10726.000000, 10962.000000, 11200.000000, 11440.000000, 11682.000000, 11926.000000, 12172.000000, 12420.000000, 12670.000000, 12922.000000, 13176.000000, 13432.000000, 13690.000000, 13950.000000, 14212.000000, 14476.000000, 14742.000000, 15010.000000, 15280.000000, 15552.000000, 15826.000000, 16102.000000, 16380.000000, 16660.000000, 16942.000000, 17226.000000, 17512.000000, 17800.000000, 18090.000000, 18382.000000, 18676.000000, 18972.000000, 19270.000000, 19570.000000, 19872.000000, 20176.000000, 20482.000000, 20790.000000, 21100.000000, 21412.000000, 21726.000000, 22042.000000, 22360.000000, 22680.000000, 23002.000000, 23326.000000, 23652.000000, 23980.000000, 24310.000000, 24642.000000, 24976.000000, 25312.000000, 25650.000000, 25990.000000, 26332.000000, 26676.000000, 27022.000000, 27370.000000, 27720.000000, 28072.000000, 28426.000000, 28782.000000, 29140.000000, 29500.000000, 29862.000000, 30226.000000, 30592.000000, 30960.000000, 16870.000000, 17182.000000, 17496.000000, 17812.000000, 18130.000000, 18450.000000, 18772.000000, 19096.000000, 19422.000000, 19750.000000, 20080.000000, 20412.000000, 20746.000000, 21082.000000, 21420.000000, 21760.000000, 22102.000000, 22446.000000, 22792.000000, 23140.000000, 23490.000000, 23842.000000, 24196.000000, 24552.000000, 24910.000000, 25270.000000, 25632.000000, 25996.000000, 26362.000000, 26730.000000, 27100.000000, 27472.000000, 27846.000000, 28222.000000, 28600.000000, 28980.000000, 29362.000000, 29746.000000, 30132.000000, 30520.000000, 30910.000000, 31302.000000, 31696.000000, 32092.000000, 32490.000000, 32890.000000, 33292.000000, 33696.000000, 34102.000000, 34510.000000, 34920.000000, 35332.000000, 35746.000000, 36162.000000, 36580.000000, 37000.000000, 37422.000000, 37846.000000, 38272.000000, 38700.000000, 21070.000000, 21442.000000, 21816.000000, 22192.000000, 22570.000000, 22950.000000, 23332.000000, 23716.000000, 24102.000000, 24490.000000, 24880.000000, 25272.000000, 25666.000000, 26062.000000, 26460.000000, 26860.000000, 27262.000000, 27666.000000, 28072.000000, 28480.000000, 28890.000000, 29302.000000, 29716.000000, 30132.000000, 30550.000000, 30970.000000, 31392.000000, 31816.000000, 32242.000000, 32670.000000, 33100.000000, 33532.000000, 33966.000000, 34402.000000, 34840.000000, 35280.000000, 35722.000000, 36166.000000, 36612.000000, 37060.000000, 37510.000000, 37962.000000, 38416.000000, 38872.000000, 39330.000000, 39790.000000, 40252.000000, 40716.000000, 41182.000000, 41650.000000, 42120.000000, 42592.000000, 43066.000000, 43542.000000, 44020.000000, 44500.000000, 44982.000000, 45466.000000, 45952.000000, 46440.000000 }, nd4j::DataType::FLOAT32); + + x.linspace(1.f); + y.linspace(10.f); + z.assign(0.f); + + x.applyTrueBroadcast(BroadcastOpsTuple::Multiply(), y, z); + // z.printBuffer(); + ASSERT_EQ(e, z); +} +/////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests14, Test_broadcast_5D_2) { + + auto x = NDArray('f', { 2, 3, 5, 4, 3 }, nd4j::DataType::FLOAT32); + auto y = NDArray('f', { 2, 5, 4, 3 }, nd4j::DataType::FLOAT32); + auto z = NDArray('f', { 2, 3, 5, 4, 3 }, nd4j::DataType::FLOAT32); + // recieved by main algorithm + auto eC = NDArray('c', { 2, 3, 5, 4, 3 }, { 0.100000, 0.181818, 0.250000, 0.307692, 0.357143, 0.400000, 0.437500, 0.470588, 0.500000, 0.526316, 0.550000, 0.571429, 0.590909, 0.608696, 0.625000, 0.640000, 0.653846, 0.666667, 0.678571, 0.689655, 0.700000, 0.709677, 0.718750, 0.727273, 0.735294, 0.742857, 0.750000, 0.756757, 0.763158, 0.769231, 0.775000, 0.780488, 0.785714, 0.790698, 0.795455, 0.800000, 0.804348, 0.808511, 0.812500, 0.816327, 0.820000, 0.823529, 0.826923, 0.830189, 0.833333, 0.836364, 0.839286, 0.842105, 0.844828, 0.847458, 0.850000, 0.852459, 0.854839, 0.857143, 0.859375, 0.861538, 0.863636, 0.865672, 0.867647, 0.869565, 6.100000, 5.636364, 5.250000, 4.923077, 4.642857, 4.400000, 4.187500, 4.000000, 3.833333, 3.684211, 3.550000, 3.428571, 3.318182, 3.217391, 3.125000, 3.040000, 2.961539, 2.888889, 2.821429, 2.758621, 2.700000, 2.645161, 2.593750, 2.545455, 2.500000, 2.457143, 2.416667, 2.378378, 2.342105, 2.307692, 2.275000, 2.243902, 2.214286, 2.186047, 2.159091, 2.133333, 2.108696, 2.085106, 2.062500, 2.040816, 2.020000, 2.000000, 1.980769, 1.962264, 1.944444, 1.927273, 1.910714, 1.894737, 1.879310, 1.864407, 1.850000, 1.836066, 1.822581, 1.809524, 1.796875, 1.784615, 1.772727, 1.761194, 1.750000, 1.739130, 12.100000, 11.090909, 10.250000, 9.538462, 8.928572, 8.400000, 7.937500, 7.529412, 7.166667, 6.842105, 6.550000, 6.285714, 6.045455, 5.826087, 5.625000, 5.440000, 5.269231, 5.111111, 4.964286, 4.827586, 4.700000, 4.580645, 4.468750, 4.363636, 4.264706, 4.171429, 4.083333, 4.000000, 3.921053, 3.846154, 3.775000, 3.707317, 3.642857, 3.581395, 3.522727, 3.466667, 3.413043, 3.361702, 3.312500, 3.265306, 3.220000, 3.176471, 3.134615, 3.094340, 3.055556, 3.018182, 2.982143, 2.947368, 2.913793, 2.881356, 2.850000, 2.819672, 2.790323, 2.761905, 2.734375, 2.707692, 2.681818, 2.656716, 2.632353, 2.608696, 2.585714, 2.563380, 2.541667, 2.520548, 2.500000, 2.480000, 2.460526, 2.441558, 2.423077, 2.405063, 2.387500, 2.370370, 2.353658, 2.337349, 2.321429, 2.305882, 2.290698, 2.275862, 2.261364, 2.247191, 2.233333, 2.219780, 2.206522, 2.193548, 2.180851, 2.168421, 2.156250, 2.144330, 2.132653, 2.121212, 2.110000, 2.099010, 2.088235, 2.077670, 2.067308, 2.057143, 2.047170, 2.037383, 2.027778, 2.018349, 2.009091, 2.000000, 1.991071, 1.982301, 1.973684, 1.965217, 1.956897, 1.948718, 1.940678, 1.932773, 1.925000, 1.917355, 1.909836, 1.902439, 1.895161, 1.888000, 1.880952, 1.874016, 1.867188, 1.860465, 3.442857, 3.408451, 3.375000, 3.342466, 3.310811, 3.280000, 3.250000, 3.220779, 3.192308, 3.164557, 3.137500, 3.111111, 3.085366, 3.060241, 3.035714, 3.011765, 2.988372, 2.965517, 2.943182, 2.921348, 2.900000, 2.879121, 2.858696, 2.838710, 2.819149, 2.800000, 2.781250, 2.762887, 2.744898, 2.727273, 2.710000, 2.693069, 2.676471, 2.660194, 2.644231, 2.628572, 2.613208, 2.598131, 2.583333, 2.568807, 2.554545, 2.540540, 2.526786, 2.513274, 2.500000, 2.486957, 2.474138, 2.461539, 2.449152, 2.436975, 2.425000, 2.413223, 2.401639, 2.390244, 2.379032, 2.368000, 2.357143, 2.346457, 2.335938, 2.325581, 4.300000, 4.253521, 4.208333, 4.164383, 4.121622, 4.080000, 4.039474, 4.000000, 3.961539, 3.924051, 3.887500, 3.851852, 3.817073, 3.783133, 3.750000, 3.717647, 3.686047, 3.655172, 3.625000, 3.595506, 3.566667, 3.538461, 3.510870, 3.483871, 3.457447, 3.431579, 3.406250, 3.381443, 3.357143, 3.333333, 3.310000, 3.287129, 3.264706, 3.242718, 3.221154, 3.200000, 3.179245, 3.158879, 3.138889, 3.119266, 3.100000, 3.081081, 3.062500, 3.044248, 3.026316, 3.008696, 2.991379, 2.974359, 2.957627, 2.941176, 2.925000, 2.909091, 2.893443, 2.878049, 2.862903, 2.848000, 2.833333, 2.818898, 2.804688, 2.790698 }, nd4j::DataType::FLOAT32); + + auto e = NDArray('f', { 2, 3, 5, 4, 3 }, nd4j::DataType::FLOAT32); + + e.assign(eC); + + x.linspace(1.f); + y.linspace(10.f); + z.assign(0.f); + + x.applyBroadcast(nd4j::broadcast::Divide, { 0,2,3,4 }, y, z); + + ASSERT_EQ(e, z); +} +/////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests14, Test_broadcast_5D_3) { + + auto x = NDArray('f', { 2, 3, 5, 4, 3 }, nd4j::DataType::FLOAT32); + auto y = NDArray('f', { 2, 5 }, nd4j::DataType::FLOAT32); + auto z = NDArray('f', { 2, 3, 5, 4, 3 }, nd4j::DataType::FLOAT32); + // recieved by main algorithm + auto eC = NDArray('c', { 2, 3, 5, 4, 3 }, { 0.100000, 0.200000, 0.300000, 0.400000, 0.500000, 0.600000, 0.700000, 0.800000, 0.900000, 1.000000, 1.100000, 1.200000, 1.181818, 1.272727, 1.363636, 1.454545, 1.545455, 1.636364, 1.727273, 1.818182, 1.909091, 2.000000, 2.090909, 2.181818, 2.083333, 2.166667, 2.250000, 2.333333, 2.416667, 2.500000, 2.583333, 2.666667, 2.750000, 2.833333, 2.916667, 3.000000, 2.846154, 2.923077, 3.000000, 3.076923, 3.153846, 3.230769, 3.307692, 3.384615, 3.461539, 3.538461, 3.615385, 3.692308, 3.500000, 3.571429, 3.642857, 3.714286, 3.785714, 3.857143, 3.928571, 4.000000, 4.071429, 4.142857, 4.214286, 4.285714, 6.100000, 6.200000, 6.300000, 6.400000, 6.500000, 6.600000, 6.700000, 6.800000, 6.900000, 7.000000, 7.100000, 7.200000, 6.636364, 6.727273, 6.818182, 6.909091, 7.000000, 7.090909, 7.181818, 7.272727, 7.363636, 7.454545, 7.545455, 7.636364, 7.083333, 7.166667, 7.250000, 7.333333, 7.416667, 7.500000, 7.583333, 7.666667, 7.750000, 7.833333, 7.916667, 8.000000, 7.461538, 7.538462, 7.615385, 7.692307, 7.769231, 7.846154, 7.923077, 8.000000, 8.076923, 8.153846, 8.230769, 8.307693, 7.785714, 7.857143, 7.928571, 8.000000, 8.071428, 8.142858, 8.214286, 8.285714, 8.357142, 8.428572, 8.500000, 8.571428, 12.100000, 12.200000, 12.300000, 12.400000, 12.500000, 12.600000, 12.700000, 12.800000, 12.900000, 13.000000, 13.100000, 13.200000, 12.090909, 12.181818, 12.272727, 12.363636, 12.454545, 12.545455, 12.636364, 12.727273, 12.818182, 12.909091, 13.000000, 13.090909, 12.083333, 12.166667, 12.250000, 12.333333, 12.416667, 12.500000, 12.583333, 12.666667, 12.750000, 12.833333, 12.916667, 13.000000, 12.076923, 12.153846, 12.230769, 12.307693, 12.384615, 12.461538, 12.538462, 12.615385, 12.692307, 12.769231, 12.846154, 12.923077, 12.071428, 12.142858, 12.214286, 12.285714, 12.357142, 12.428572, 12.500000, 12.571428, 12.642858, 12.714286, 12.785714, 12.857142, 12.066667, 12.133333, 12.200000, 12.266666, 12.333333, 12.400000, 12.466666, 12.533334, 12.600000, 12.666667, 12.733334, 12.800000, 12.062500, 12.125000, 12.187500, 12.250000, 12.312500, 12.375000, 12.437500, 12.500000, 12.562500, 12.625000, 12.687500, 12.750000, 12.058824, 12.117647, 12.176471, 12.235294, 12.294118, 12.352942, 12.411765, 12.470589, 12.529411, 12.588235, 12.647058, 12.705882, 12.055555, 12.111111, 12.166667, 12.222222, 12.277778, 12.333333, 12.388889, 12.444445, 12.500000, 12.555555, 12.611111, 12.666667, 12.052631, 12.105263, 12.157895, 12.210526, 12.263158, 12.315789, 12.368421, 12.421053, 12.473684, 12.526316, 12.578947, 12.631579, 16.066668, 16.133333, 16.200001, 16.266666, 16.333334, 16.400000, 16.466667, 16.533333, 16.600000, 16.666666, 16.733334, 16.799999, 15.812500, 15.875000, 15.937500, 16.000000, 16.062500, 16.125000, 16.187500, 16.250000, 16.312500, 16.375000, 16.437500, 16.500000, 15.588235, 15.647058, 15.705882, 15.764706, 15.823529, 15.882353, 15.941176, 16.000000, 16.058823, 16.117647, 16.176470, 16.235294, 15.388889, 15.444445, 15.500000, 15.555555, 15.611111, 15.666667, 15.722222, 15.777778, 15.833333, 15.888889, 15.944445, 16.000000, 15.210526, 15.263158, 15.315789, 15.368421, 15.421053, 15.473684, 15.526316, 15.578947, 15.631579, 15.684211, 15.736842, 15.789474, 20.066668, 20.133333, 20.200001, 20.266666, 20.333334, 20.400000, 20.466667, 20.533333, 20.600000, 20.666666, 20.733334, 20.799999, 19.562500, 19.625000, 19.687500, 19.750000, 19.812500, 19.875000, 19.937500, 20.000000, 20.062500, 20.125000, 20.187500, 20.250000, 19.117647, 19.176470, 19.235294, 19.294117, 19.352942, 19.411764, 19.470589, 19.529411, 19.588236, 19.647058, 19.705883, 19.764706, 18.722221, 18.777779, 18.833334, 18.888889, 18.944445, 19.000000, 19.055555, 19.111111, 19.166666, 19.222221, 19.277779, 19.333334, 18.368422, 18.421053, 18.473684, 18.526316, 18.578947, 18.631578, 18.684210, 18.736841, 18.789474, 18.842106, 18.894737, 18.947369 }, nd4j::DataType::FLOAT32); + + auto e = NDArray('f', { 2, 3, 5, 4, 3 }, nd4j::DataType::FLOAT32); + + e.assign(eC); + + x.linspace(1.f); + y.linspace(10.f); + z.assign(0.f); + + x.applyBroadcast(nd4j::broadcast::Divide, { 0,2 }, y, z); + + ASSERT_EQ(e, z); +} +/////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests14, Test_broadcast_5D_4) { + + auto x = NDArray('f', { 2, 3, 5, 4, 3 }, nd4j::DataType::FLOAT32); + auto y = NDArray('f', { 2, 1, 5, 1, 1 }, nd4j::DataType::FLOAT32); + auto z = NDArray('f', { 2, 3, 5, 4, 3 }, nd4j::DataType::FLOAT32); + // recieved by main algorithm + auto eC = NDArray('c', { 2, 3, 5, 4, 3 }, { 0.100000, 0.200000, 0.300000, 0.400000, 0.500000, 0.600000, 0.700000, 0.800000, 0.900000, 1.000000, 1.100000, 1.200000, 1.181818, 1.272727, 1.363636, 1.454545, 1.545455, 1.636364, 1.727273, 1.818182, 1.909091, 2.000000, 2.090909, 2.181818, 2.083333, 2.166667, 2.250000, 2.333333, 2.416667, 2.500000, 2.583333, 2.666667, 2.750000, 2.833333, 2.916667, 3.000000, 2.846154, 2.923077, 3.000000, 3.076923, 3.153846, 3.230769, 3.307692, 3.384615, 3.461539, 3.538461, 3.615385, 3.692308, 3.500000, 3.571429, 3.642857, 3.714286, 3.785714, 3.857143, 3.928571, 4.000000, 4.071429, 4.142857, 4.214286, 4.285714, 6.100000, 6.200000, 6.300000, 6.400000, 6.500000, 6.600000, 6.700000, 6.800000, 6.900000, 7.000000, 7.100000, 7.200000, 6.636364, 6.727273, 6.818182, 6.909091, 7.000000, 7.090909, 7.181818, 7.272727, 7.363636, 7.454545, 7.545455, 7.636364, 7.083333, 7.166667, 7.250000, 7.333333, 7.416667, 7.500000, 7.583333, 7.666667, 7.750000, 7.833333, 7.916667, 8.000000, 7.461538, 7.538462, 7.615385, 7.692307, 7.769231, 7.846154, 7.923077, 8.000000, 8.076923, 8.153846, 8.230769, 8.307693, 7.785714, 7.857143, 7.928571, 8.000000, 8.071428, 8.142858, 8.214286, 8.285714, 8.357142, 8.428572, 8.500000, 8.571428, 12.100000, 12.200000, 12.300000, 12.400000, 12.500000, 12.600000, 12.700000, 12.800000, 12.900000, 13.000000, 13.100000, 13.200000, 12.090909, 12.181818, 12.272727, 12.363636, 12.454545, 12.545455, 12.636364, 12.727273, 12.818182, 12.909091, 13.000000, 13.090909, 12.083333, 12.166667, 12.250000, 12.333333, 12.416667, 12.500000, 12.583333, 12.666667, 12.750000, 12.833333, 12.916667, 13.000000, 12.076923, 12.153846, 12.230769, 12.307693, 12.384615, 12.461538, 12.538462, 12.615385, 12.692307, 12.769231, 12.846154, 12.923077, 12.071428, 12.142858, 12.214286, 12.285714, 12.357142, 12.428572, 12.500000, 12.571428, 12.642858, 12.714286, 12.785714, 12.857142, 12.066667, 12.133333, 12.200000, 12.266666, 12.333333, 12.400000, 12.466666, 12.533334, 12.600000, 12.666667, 12.733334, 12.800000, 12.062500, 12.125000, 12.187500, 12.250000, 12.312500, 12.375000, 12.437500, 12.500000, 12.562500, 12.625000, 12.687500, 12.750000, 12.058824, 12.117647, 12.176471, 12.235294, 12.294118, 12.352942, 12.411765, 12.470589, 12.529411, 12.588235, 12.647058, 12.705882, 12.055555, 12.111111, 12.166667, 12.222222, 12.277778, 12.333333, 12.388889, 12.444445, 12.500000, 12.555555, 12.611111, 12.666667, 12.052631, 12.105263, 12.157895, 12.210526, 12.263158, 12.315789, 12.368421, 12.421053, 12.473684, 12.526316, 12.578947, 12.631579, 16.066668, 16.133333, 16.200001, 16.266666, 16.333334, 16.400000, 16.466667, 16.533333, 16.600000, 16.666666, 16.733334, 16.799999, 15.812500, 15.875000, 15.937500, 16.000000, 16.062500, 16.125000, 16.187500, 16.250000, 16.312500, 16.375000, 16.437500, 16.500000, 15.588235, 15.647058, 15.705882, 15.764706, 15.823529, 15.882353, 15.941176, 16.000000, 16.058823, 16.117647, 16.176470, 16.235294, 15.388889, 15.444445, 15.500000, 15.555555, 15.611111, 15.666667, 15.722222, 15.777778, 15.833333, 15.888889, 15.944445, 16.000000, 15.210526, 15.263158, 15.315789, 15.368421, 15.421053, 15.473684, 15.526316, 15.578947, 15.631579, 15.684211, 15.736842, 15.789474, 20.066668, 20.133333, 20.200001, 20.266666, 20.333334, 20.400000, 20.466667, 20.533333, 20.600000, 20.666666, 20.733334, 20.799999, 19.562500, 19.625000, 19.687500, 19.750000, 19.812500, 19.875000, 19.937500, 20.000000, 20.062500, 20.125000, 20.187500, 20.250000, 19.117647, 19.176470, 19.235294, 19.294117, 19.352942, 19.411764, 19.470589, 19.529411, 19.588236, 19.647058, 19.705883, 19.764706, 18.722221, 18.777779, 18.833334, 18.888889, 18.944445, 19.000000, 19.055555, 19.111111, 19.166666, 19.222221, 19.277779, 19.333334, 18.368422, 18.421053, 18.473684, 18.526316, 18.578947, 18.631578, 18.684210, 18.736841, 18.789474, 18.842106, 18.894737, 18.947369 }, nd4j::DataType::FLOAT32); + + auto e = NDArray('f', { 2, 3, 5, 4, 3 }, nd4j::DataType::FLOAT32); + e.assign(eC); + + x.linspace(1.f); + y.linspace(10.f); + z.assign(0.f); + + x.applyTrueBroadcast(BroadcastOpsTuple::Divide(), y, z); + + ASSERT_EQ(e, z); +} // @Test // public void testMmulRank4_simple(){ @@ -1489,3 +1680,5 @@ TEST_F(DeclarableOpsTests14, matmul_test37) { // INDArray exp = Nd4j.valueArrayOf(shape, 64.0, DataType.FLOAT); //Each entry in output is sum of 64 (1.0 x 1.0) multiplications // assertEquals(exp, out); // } + +