diff --git a/libnd4j/CMakeLists.txt b/libnd4j/CMakeLists.txt index 949dbd542..a16c9eaf7 100755 --- a/libnd4j/CMakeLists.txt +++ b/libnd4j/CMakeLists.txt @@ -22,8 +22,8 @@ if (APPLE) elseif(WIN32) set(X86_BUILD true) if (NOT CUDA_BLAS) - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -fPIC -std=c++11 -fmax-errors=2 -D_RELEASE=true") - set(CMAKE_CXX_FLAGS_DEBUG " -g -fPIC -std=c++11 -fmax-errors=2") + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -fPIC -std=c++11 -fmax-errors=2 -DINLINE_LOOPS -D_RELEASE=true") + set(CMAKE_CXX_FLAGS_DEBUG " -g -fPIC -std=c++11 -DINLINE_LOOPS -fmax-errors=2") else() set(CMAKE_CXX_FLAGS_RELEASE "-D_RELEASE=true /wd4804") set(CMAKE_CXX_FLAGS_DEBUG " /FS /EHsc /wd4661 /wd4804 /wd4267 /wd4244 /wd4251 /wd4305") diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_0.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_0.cpp index 895afccfd..16bf3b08b 100644 --- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_0.cpp +++ b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_0.cpp @@ -29,23 +29,31 @@ namespace nd4j { template template void Reduction3Loops::innerloopReduce3(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams) { - Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams); +#ifndef INLINE_LOOPS + Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams); +#endif } template template void Reduction3Loops::innerloopReduce3All(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Z* extraParams) { - Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams); +#ifndef INLINE_LOOPS + Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams); +#endif } template void Reduction3Loops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams) { +#ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams), REDUCE3_OPS); +#endif } template void Reduction3Loops::wrapperAll(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Y* extraParams) { +#ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams), REDUCE3_OPS); +#endif } BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduction3Loops, , LIBND4J_TYPES, FLOAT_TYPES_0); diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp index d8c24e096..4e350ce15 100644 --- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp +++ b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp @@ -29,23 +29,31 @@ namespace nd4j { template template void Reduction3Loops::innerloopReduce3(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams) { - Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams); +#ifndef INLINE_LOOPS + Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams); +#endif } template template void Reduction3Loops::innerloopReduce3All(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Z* extraParams) { - Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams); +#ifndef INLINE_LOOPS + Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams); +#endif } template void Reduction3Loops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams) { +#ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams), REDUCE3_OPS); +#endif } template void Reduction3Loops::wrapperAll(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Y* extraParams) { +#ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams), REDUCE3_OPS); +#endif } BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduction3Loops, , LIBND4J_TYPES, FLOAT_TYPES_1); diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp index 4ecc0e370..e869793a8 100644 --- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp +++ b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp @@ -29,23 +29,31 @@ namespace nd4j { template template void Reduction3Loops::innerloopReduce3(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams) { - Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams); +#ifndef INLINE_LOOPS + Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams); +#endif } template template void Reduction3Loops::innerloopReduce3All(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Z* extraParams) { - Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams); +#ifndef INLINE_LOOPS + Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams); +#endif } template void Reduction3Loops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams) { +#ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams), REDUCE3_OPS); +#endif } template void Reduction3Loops::wrapperAll(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Y* extraParams) { +#ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams), REDUCE3_OPS); +#endif } BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduction3Loops, , LIBND4J_TYPES, FLOAT_TYPES_2); diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp index 218c335ca..474443fd3 100644 --- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp +++ b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp @@ -29,23 +29,31 @@ namespace nd4j { template template void Reduction3Loops::innerloopReduce3(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams) { - Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams); +#ifndef INLINE_LOOPS + Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams); +#endif } template template void Reduction3Loops::innerloopReduce3All(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Z* extraParams) { - Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams); +#ifndef INLINE_LOOPS + Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams); +#endif } template void Reduction3Loops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams) { +#ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams), REDUCE3_OPS); +#endif } template void Reduction3Loops::wrapperAll(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Y* extraParams) { +#ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams), REDUCE3_OPS); +#endif } BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduction3Loops, , LIBND4J_TYPES, FLOAT_TYPES_3); diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops.hpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops.hpp index 4a223a0f2..0709e5f3c 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops.hpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops.hpp @@ -19,3 +19,4 @@ // #include +#include diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_bool.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_bool.cpp index 35ae99afb..3d7a85eff 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_bool.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_bool.cpp @@ -27,7 +27,9 @@ namespace nd4j { template template void ReductionBoolLoops::innerloopReduce(X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams) { +#ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams); +#endif } template @@ -35,7 +37,9 @@ namespace nd4j { Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, X *extraParams) { +#ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams), REDUCE_BOOL_OPS); +#endif } BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionBoolLoops, , LIBND4J_TYPES, BOOL_TYPES); diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_0.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_0.cpp index c7b1f6ff8..f545c8c83 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_0.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_0.cpp @@ -29,15 +29,18 @@ namespace nd4j { template template void ReductionFloatLoops::innerloopReduce(X * x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams) { +#ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams); +#endif } template void ReductionFloatLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Y *extraParams) { - +#ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams ), REDUCE_FLOAT_OPS); +#endif } BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES_0); diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp index 76c1141bf..fa52015ca 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp @@ -29,15 +29,18 @@ namespace nd4j { template template void ReductionFloatLoops::innerloopReduce(X * x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams) { +#ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams); +#endif } template void ReductionFloatLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Y *extraParams) { - +#ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams ), REDUCE_FLOAT_OPS); +#endif } BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES_1); diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp index 7288816ad..eb144fcc6 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp @@ -29,15 +29,18 @@ namespace nd4j { template template void ReductionFloatLoops::innerloopReduce(X * x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams) { +#ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams); +#endif } template void ReductionFloatLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Y *extraParams) { - +#ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams ), REDUCE_FLOAT_OPS); +#endif } BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES_2); diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp index 251624076..d2991b51b 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp @@ -29,15 +29,18 @@ namespace nd4j { template template void ReductionFloatLoops::innerloopReduce(X * x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams) { +#ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams); +#endif } template void ReductionFloatLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Y *extraParams) { - +#ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams ), REDUCE_FLOAT_OPS); +#endif } BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES_3); diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_long.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_long.cpp index a6dd992c6..04a3d8559 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_long.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_long.cpp @@ -34,15 +34,18 @@ namespace nd4j { template template void ReductionLongLoops::innerloopReduce(X * x, Nd4jLong* xShapeInfo, Z *z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams) { +#ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams); +#endif } template void ReductionLongLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, X *extraParams) { - +#ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams ), REDUCE_LONG_OPS); +#endif } BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionLongLoops, , LIBND4J_TYPES, LONG_TYPES); diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_same.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_same.cpp index 623d97e79..9932b04c5 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_same.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_same.cpp @@ -27,7 +27,9 @@ namespace nd4j { template template void ReductionSameLoops::innerloopReduce(X* x, Nd4jLong* xShapeInfo, X* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams) { +#ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams); +#endif } template @@ -35,11 +37,13 @@ namespace nd4j { Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, X *vextraParams) { +#ifndef INLINE_LOOPS auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); DISPATCH_BY_OPNUM_T(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams), REDUCE_SAME_OPS); +#endif } BUILD_SINGLE_TEMPLATE(template class ReductionSameLoops, , LIBND4J_TYPES);