diff --git a/libnd4j/CMakeLists.txt b/libnd4j/CMakeLists.txt index 50c6b9b8a..d8b0439b4 100755 --- a/libnd4j/CMakeLists.txt +++ b/libnd4j/CMakeLists.txt @@ -25,8 +25,8 @@ elseif (APPLE) elseif(WIN32) set(X86_BUILD true) if (CUDA_BLAS) - set(CMAKE_CXX_FLAGS_RELEASE "-D_RELEASE=true /wd4804") - set(CMAKE_CXX_FLAGS_DEBUG " /FS /EHsc /wd4661 /wd4804 /wd4267 /wd4244 /wd4251 /wd4305") + set(CMAKE_CXX_FLAGS_RELEASE "-D_RELEASE=true") + set(CMAKE_CXX_FLAGS_DEBUG " /FS /EHsc") else() set(CMAKE_CXX_FLAGS_RELEASE "-O3 -fPIC -std=c++11 -fmax-errors=2 -D_RELEASE=true") set(CMAKE_CXX_FLAGS_DEBUG " -g -O2 -fPIC -std=c++11 -fmax-errors=2") diff --git a/libnd4j/blas/CMakeLists.txt b/libnd4j/blas/CMakeLists.txt index 9674e28cd..c86bdc13a 100755 --- a/libnd4j/blas/CMakeLists.txt +++ b/libnd4j/blas/CMakeLists.txt @@ -111,7 +111,7 @@ elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") # using Visual Studio C++ - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc ${ARCH_TUNE}") + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE}") elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") # using GCC SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE}") @@ -158,7 +158,7 @@ if(CUDA_BLAS) include_directories(${CUDA_INCLUDE_DIRS}) message("CUDA found!") set( CUDA_ARCHITECTURE_MINIMUM "3.0" CACHE STRING "Minimum required CUDA compute capability" ) - SET(CUDA_VERBOSE_BUILD ON) + SET(CUDA_VERBOSE_BUILD OFF) SET(CUDA_SEPARABLE_COMPILATION OFF) #set(CUDA_COMPUTE_CAPABILITY "61") set(CUDA_COMPUTE_CAPABILITY "35") @@ -264,24 +264,13 @@ if(CUDA_BLAS) file(GLOB_RECURSE LOOPS_SOURCES false ../include/loops/impl/*.cpp ../include/loops/*.h) file(GLOB_RECURSE LOOPS_SOURCES_CUDA false ../include/loops/*.cu) - if (NOT BUILD_TESTS) - CUDA_ADD_LIBRARY(${LIBND4J_NAME} SHARED cuda/NativeOps.cu cuda/NativeOpExecutioner.cu cuda/BlasVersionHelper.cu Environment.cpp ${LOOPS_SOURCES_CUDA} + + CUDA_ADD_LIBRARY(${LIBND4J_NAME} SHARED cuda/NativeOps.cu cuda/NativeOpExecutioner.cu cuda/BlasVersionHelper.cu Environment.cpp ${LOOPS_SOURCES_CUDA} ${CUSTOMOPS_HELPERS_SOURCES} ${HELPERS_SOURCES} ${EXEC_SOURCES} ../include/cnpy/cnpy.cpp ../include/nd4jmemset.h ../include/nd4jmalloc.h cpu/GraphExecutioner.cpp cuda/NDArray.cu cpu/NDArrayFactory.cpp Environment.h ${LOOPS_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES} ${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${INDEXING_SOURCES} ${EXCEPTIONS_SOURCES} ${OPS_SOURCES} ${PERF_SOURCES}) - else() - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBUILD_TESTS=true") - - CUDA_ADD_LIBRARY(${LIBND4J_NAME} STATIC cuda/NativeOps.cu cuda/NativeOpExecutioner.cu cuda/BlasVersionHelper.cu Environment.cpp ${LOOPS_SOURCES_CUDA} - ${CUSTOMOPS_HELPERS_SOURCES} ${HELPERS_SOURCES} ${EXEC_SOURCES} - ../include/cnpy/cnpy.cpp ../include/nd4jmemset.h ../include/nd4jmalloc.h - cpu/GraphExecutioner.cpp cuda/NDArray.cu cpu/NDArrayFactory.cpp - Environment.h ${LOOPS_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES} - ${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${INDEXING_SOURCES} ${EXCEPTIONS_SOURCES} ${OPS_SOURCES} ${PERF_SOURCES}) - endif() - if(WIN32) message("CUDA on Windows: enabling /EHsc") @@ -289,11 +278,16 @@ if(CUDA_BLAS) SET_TARGET_PROPERTIES(${LIBND4J_NAME} PROPERTIES COMPILER_FLAGS "/EHsc /bigobj /std:c++14") endif() - target_link_libraries(${LIBND4J_NAME} ${CUDA_LIBRARIES} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_cusolver_LIBRARY}) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/cuda) install(TARGETS ${LIBND4J_NAME} DESTINATION .) + + add_custom_command( + TARGET ${LIBND4J_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy + $ + ${PROJECT_BINARY_DIR}/../../tests_cpu/) endif(CUDA_FOUND) elseif(CPU_BLAS) diff --git a/libnd4j/blas/NDArray.hpp b/libnd4j/blas/NDArray.hpp index 00a984d45..df358b64f 100644 --- a/libnd4j/blas/NDArray.hpp +++ b/libnd4j/blas/NDArray.hpp @@ -31,9 +31,9 @@ namespace nd4j { template <> -utf8string NDArray::e(const Nd4jLong i) const; +ND4J_EXPORT utf8string NDArray::e(const Nd4jLong i) const; template <> -std::string NDArray::e(const Nd4jLong i) const; +ND4J_EXPORT std::string NDArray::e(const Nd4jLong i) const; ////////////////////////////////////////////////////////////////////////// template @@ -48,7 +48,7 @@ NDArray* NDArray::asT() const{ return result; } -BUILD_SINGLE_TEMPLATE(template NDArray* NDArray::asT, () const, LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT NDArray* NDArray::asT, () const, LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////// // copy constructor @@ -435,7 +435,7 @@ std::vector NDArray::getBufferAsVector() { vector[e] = this->e(e); return vector; } -BUILD_SINGLE_TEMPLATE(template std::vector, NDArray::getBufferAsVector(), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT std::vector, NDArray::getBufferAsVector(), LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////// std::vector NDArray::getShapeAsFlatVector() { @@ -813,7 +813,7 @@ void NDArray::templatedSet(void *buffer, const Nd4jLong *indices, const void *va auto xOffset = shape::getOffset(getShapeInfo(), indices); t[xOffset] = static_cast(y); } -BUILD_DOUBLE_TEMPLATE(template void NDArray::templatedSet, (void *buffer, const Nd4jLong *indices, const void *value), LIBND4J_TYPES, LIBND4J_TYPES); +BUILD_DOUBLE_TEMPLATE(template ND4J_EXPORT void NDArray::templatedSet, (void *buffer, const Nd4jLong *indices, const void *value), LIBND4J_TYPES, LIBND4J_TYPES); ////////////////////////////////////////////////////////////////////////// template @@ -823,7 +823,7 @@ void NDArray::templatedSet(void *buffer, const Nd4jLong offset, const void *valu t[offset] = static_cast(y); } -BUILD_DOUBLE_TEMPLATE(template void NDArray::templatedSet, (void *buffer, const Nd4jLong offset, const void *value), LIBND4J_TYPES, LIBND4J_TYPES); +BUILD_DOUBLE_TEMPLATE(template ND4J_EXPORT void NDArray::templatedSet, (void *buffer, const Nd4jLong offset, const void *value), LIBND4J_TYPES, LIBND4J_TYPES); ////////////////////////////////////////////////////////////////////////// void NDArray::setContext(nd4j::LaunchContext *context) { @@ -1301,7 +1301,7 @@ template void* NDArray::templatedPointerShift(const Nd4jLong offset) const { return reinterpret_cast(getBuffer()) + offset; } -BUILD_SINGLE_TEMPLATE(template void* NDArray::templatedPointerShift, (const Nd4jLong offset) const, LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT void* NDArray::templatedPointerShift, (const Nd4jLong offset) const, LIBND4J_TYPES); ////////////////////////////////////////////////////////////////////////// // method makes copy of this array and applies to the copy transpose operation, this array remains unaffected @@ -1608,7 +1608,7 @@ bool NDArray::isUnitary() { ////////////////////////////////////////////////////////////////////////// template <> -std::string* NDArray::bufferAsT() const { +std::string* ND4J_EXPORT NDArray::bufferAsT() const { throw std::runtime_error("This method is NOT supposed to be used"); } @@ -1620,7 +1620,7 @@ T* NDArray::bufferAsT() const { return reinterpret_cast(getBuffer()); } -BUILD_SINGLE_UNCHAINED_TEMPLATE(template, * NDArray::bufferAsT() const, LIBND4J_TYPES); +BUILD_SINGLE_UNCHAINED_TEMPLATE(template ND4J_EXPORT , * NDArray::bufferAsT() const, LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////// NDArray* NDArray::subarray(IndicesList& idx) const { @@ -1797,16 +1797,16 @@ NDArray NDArray::operator+(const T& scalar) const { return result; } -template NDArray NDArray::operator+(const double& scalar) const; -template NDArray NDArray::operator+(const float& scalar) const; -template NDArray NDArray::operator+(const float16& scalar) const; -template NDArray NDArray::operator+(const bfloat16& scalar) const; -template NDArray NDArray::operator+(const Nd4jLong& scalar) const; -template NDArray NDArray::operator+(const int& scalar) const; -template NDArray NDArray::operator+(const int16_t& scalar) const; -template NDArray NDArray::operator+(const int8_t& scalar) const; -template NDArray NDArray::operator+(const uint8_t& scalar) const; -template NDArray NDArray::operator+(const bool& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator+(const double& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator+(const float& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator+(const float16& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator+(const bfloat16& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator+(const Nd4jLong& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator+(const int& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator+(const int16_t& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator+(const int8_t& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator+(const uint8_t& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator+(const bool& scalar) const; //////////////////////////////////////////////////////////////////////// // subtraction operator array - scalar @@ -1824,16 +1824,16 @@ NDArray NDArray::operator-(const T& scalar) const { return result; } -template NDArray NDArray::operator-(const double& scalar) const; -template NDArray NDArray::operator-(const float& scalar) const; -template NDArray NDArray::operator-(const float16& scalar) const; -template NDArray NDArray::operator-(const bfloat16& scalar) const; -template NDArray NDArray::operator-(const Nd4jLong& scalar) const; -template NDArray NDArray::operator-(const int& scalar) const; -template NDArray NDArray::operator-(const int16_t& scalar) const; -template NDArray NDArray::operator-(const int8_t& scalar) const; -template NDArray NDArray::operator-(const uint8_t& scalar) const; -template NDArray NDArray::operator-(const bool& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator-(const double& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator-(const float& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator-(const float16& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator-(const bfloat16& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator-(const Nd4jLong& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator-(const int& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator-(const int16_t& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator-(const int8_t& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator-(const uint8_t& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator-(const bool& scalar) const; //////////////////////////////////////////////////////////////////////// // multiplication operator array*scalar @@ -1851,16 +1851,16 @@ NDArray NDArray::operator*(const T& scalar) const { return result; } -template NDArray NDArray::operator*(const double& scalar) const; -template NDArray NDArray::operator*(const float& scalar) const; -template NDArray NDArray::operator*(const float16& scalar) const; -template NDArray NDArray::operator*(const bfloat16& scalar) const; -template NDArray NDArray::operator*(const Nd4jLong& scalar) const; -template NDArray NDArray::operator*(const int& scalar) const; -template NDArray NDArray::operator*(const int16_t& scalar) const; -template NDArray NDArray::operator*(const int8_t& scalar) const; -template NDArray NDArray::operator*(const uint8_t& scalar) const; -template NDArray NDArray::operator*(const bool& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator*(const double& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator*(const float& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator*(const float16& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator*(const bfloat16& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator*(const Nd4jLong& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator*(const int& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator*(const int16_t& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator*(const int8_t& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator*(const uint8_t& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator*(const bool& scalar) const; //////////////////////////////////////////////////////////////////////// // division operator array / scalar @@ -1881,16 +1881,16 @@ NDArray NDArray::operator/(const T& scalar) const { return result; } -template NDArray NDArray::operator/(const double& scalar) const; -template NDArray NDArray::operator/(const float& scalar) const; -template NDArray NDArray::operator/(const float16& scalar) const; -template NDArray NDArray::operator/(const bfloat16& scalar) const; -template NDArray NDArray::operator/(const Nd4jLong& scalar) const; -template NDArray NDArray::operator/(const int& scalar) const; -template NDArray NDArray::operator/(const int16_t& scalar) const; -template NDArray NDArray::operator/(const int8_t& scalar) const; -template NDArray NDArray::operator/(const uint8_t& scalar) const; -template NDArray NDArray::operator/(const bool& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator/(const double& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator/(const float& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator/(const float16& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator/(const bfloat16& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator/(const Nd4jLong& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator/(const int& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator/(const int16_t& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator/(const int8_t& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator/(const uint8_t& scalar) const; +template ND4J_EXPORT NDArray NDArray::operator/(const bool& scalar) const; //////////////////////////////////////////////////////////////////////// // addition operator scalar + array @@ -2260,13 +2260,13 @@ void NDArray::operator+=(const T value) { NDArray::registerSpecialUse({this}, {}); } -template void NDArray::operator+=(const double value); -template void NDArray::operator+=(const float value); -template void NDArray::operator+=(const float16 value); -template void NDArray::operator+=(const bfloat16 value); -template void NDArray::operator+=(const Nd4jLong value); -template void NDArray::operator+=(const int value); -template void NDArray::operator+=(const bool value); +template ND4J_EXPORT void NDArray::operator+=(const double value); +template ND4J_EXPORT void NDArray::operator+=(const float value); +template ND4J_EXPORT void NDArray::operator+=(const float16 value); +template ND4J_EXPORT void NDArray::operator+=(const bfloat16 value); +template ND4J_EXPORT void NDArray::operator+=(const Nd4jLong value); +template ND4J_EXPORT void NDArray::operator+=(const int value); +template ND4J_EXPORT void NDArray::operator+=(const bool value); //////////////////////////////////////////////////////////////////////// template @@ -2282,13 +2282,13 @@ void NDArray::operator-=(const T value) { NDArray::registerSpecialUse({this}, {}); } -template void NDArray::operator-=(const double value); -template void NDArray::operator-=(const float value); -template void NDArray::operator-=(const float16 value); -template void NDArray::operator-=(const bfloat16 value); -template void NDArray::operator-=(const Nd4jLong value); -template void NDArray::operator-=(const int value); -template void NDArray::operator-=(const bool value); +template ND4J_EXPORT void NDArray::operator-=(const double value); +template ND4J_EXPORT void NDArray::operator-=(const float value); +template ND4J_EXPORT void NDArray::operator-=(const float16 value); +template ND4J_EXPORT void NDArray::operator-=(const bfloat16 value); +template ND4J_EXPORT void NDArray::operator-=(const Nd4jLong value); +template ND4J_EXPORT void NDArray::operator-=(const int value); +template ND4J_EXPORT void NDArray::operator-=(const bool value); //////////////////////////////////////////////////////////////////////// template @@ -2302,16 +2302,16 @@ void NDArray::operator*=(const T scalar) { NDArray::registerSpecialUse({this}, {}); } -template void NDArray::operator*=(const double scalar); -template void NDArray::operator*=(const float scalar); -template void NDArray::operator*=(const float16 scalar); -template void NDArray::operator*=(const bfloat16 scalar); -template void NDArray::operator*=(const Nd4jLong scalar); -template void NDArray::operator*=(const int scalar); -template void NDArray::operator*=(const int16_t scalar); -template void NDArray::operator*=(const int8_t scalar); -template void NDArray::operator*=(const uint8_t scalar); -template void NDArray::operator*=(const bool scalar); +template ND4J_EXPORT void NDArray::operator*=(const double scalar); +template ND4J_EXPORT void NDArray::operator*=(const float scalar); +template ND4J_EXPORT void NDArray::operator*=(const float16 scalar); +template ND4J_EXPORT void NDArray::operator*=(const bfloat16 scalar); +template ND4J_EXPORT void NDArray::operator*=(const Nd4jLong scalar); +template ND4J_EXPORT void NDArray::operator*=(const int scalar); +template ND4J_EXPORT void NDArray::operator*=(const int16_t scalar); +template ND4J_EXPORT void NDArray::operator*=(const int8_t scalar); +template ND4J_EXPORT void NDArray::operator*=(const uint8_t scalar); +template ND4J_EXPORT void NDArray::operator*=(const bool scalar); //////////////////////////////////////////////////////////////////////// template @@ -2324,16 +2324,16 @@ void NDArray::operator/=(const T scalar) { NativeOpExecutioner::execScalar(getContext(), nd4j::scalar::Divide, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {}); } -template void NDArray::operator/=(const double scalar); -template void NDArray::operator/=(const float scalar); -template void NDArray::operator/=(const float16 scalar); -template void NDArray::operator/=(const bfloat16 scalar); -template void NDArray::operator/=(const Nd4jLong scalar); -template void NDArray::operator/=(const int scalar); -template void NDArray::operator/=(const int16_t scalar); -template void NDArray::operator/=(const int8_t scalar); -template void NDArray::operator/=(const uint8_t scalar); -template void NDArray::operator/=(const bool scalar); +template ND4J_EXPORT void NDArray::operator/=(const double scalar); +template ND4J_EXPORT void NDArray::operator/=(const float scalar); +template ND4J_EXPORT void NDArray::operator/=(const float16 scalar); +template ND4J_EXPORT void NDArray::operator/=(const bfloat16 scalar); +template ND4J_EXPORT void NDArray::operator/=(const Nd4jLong scalar); +template ND4J_EXPORT void NDArray::operator/=(const int scalar); +template ND4J_EXPORT void NDArray::operator/=(const int16_t scalar); +template ND4J_EXPORT void NDArray::operator/=(const int8_t scalar); +template ND4J_EXPORT void NDArray::operator/=(const uint8_t scalar); +template ND4J_EXPORT void NDArray::operator/=(const bool scalar); //////////////////////////////////////////////////////////////////////// // subtraction operator array - array @@ -2929,7 +2929,7 @@ std::vector NDArray::asVectorT() { return result; } -BUILD_SINGLE_TEMPLATE(template std::vector, NDArray::asVectorT(), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT std::vector, NDArray::asVectorT(), LIBND4J_TYPES); ////////////////////////////////////////////////////////////////////////// // set new order and shape in case of suitable array length @@ -3046,7 +3046,7 @@ template void NDArray::templatedSet(void *buffer, const Nd4jLong xOfsset, nd4j::DataType dtype, const void *value) { BUILD_SINGLE_PARTIAL_SELECTOR(dtype, templatedSet< , T>(buffer, xOfsset, value), LIBND4J_TYPES); } -BUILD_SINGLE_TEMPLATE(template void NDArray::templatedSet, (void *buffer, const Nd4jLong xOfsset, nd4j::DataType dtype, const void *value), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT void NDArray::templatedSet, (void *buffer, const Nd4jLong xOfsset, nd4j::DataType dtype, const void *value), LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////// void NDArray::applyPairwiseTransform(nd4j::pairwise::Ops op, const NDArray* other, NDArray *target, ExtraArguments *extraParams) const{ @@ -3109,7 +3109,7 @@ void NDArray::templatedDoubleAssign(void *xBuffer, const Nd4jLong xOffset, const const auto y = reinterpret_cast(yBuffer); x[xOffset] = static_cast(y[yOffset]); } -BUILD_DOUBLE_TEMPLATE(template void NDArray::templatedDoubleAssign, (void *xBuffer, const Nd4jLong xOffset, const void *yBuffer, const Nd4jLong yOffset) const, LIBND4J_TYPES, LIBND4J_TYPES); +BUILD_DOUBLE_TEMPLATE(template ND4J_EXPORT void NDArray::templatedDoubleAssign, (void *xBuffer, const Nd4jLong xOffset, const void *yBuffer, const Nd4jLong yOffset) const, LIBND4J_TYPES, LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////// void NDArray::varianceAlongDimension(nd4j::variance::Ops op, NDArray *target, const bool biasCorrected, const std::vector& dimensions) const { @@ -3356,7 +3356,7 @@ T NDArray::e(const Nd4jLong i) const { BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), return templatedGet<, T>(getBuffer(), rp), LIBND4J_TYPES); } -BUILD_SINGLE_UNCHAINED_TEMPLATE(template , NDArray::e(const Nd4jLong) const, LIBND4J_TYPES); +BUILD_SINGLE_UNCHAINED_TEMPLATE(template ND4J_EXPORT , NDArray::e(const Nd4jLong) const, LIBND4J_TYPES); ////////////////////////////////////////////////////////////////////////// // Returns value from 2D matrix by coordinates/indexes @@ -3376,7 +3376,7 @@ T NDArray::e(const Nd4jLong i, const Nd4jLong j) const { return static_cast(119); } -BUILD_SINGLE_UNCHAINED_TEMPLATE(template , NDArray::e(const Nd4jLong, const Nd4jLong) const, LIBND4J_TYPES); +BUILD_SINGLE_UNCHAINED_TEMPLATE(template ND4J_EXPORT , NDArray::e(const Nd4jLong, const Nd4jLong) const, LIBND4J_TYPES); ////////////////////////////////////////////////////////////////////////// // returns value from 3D tensor by coordinates @@ -3396,7 +3396,7 @@ T NDArray::e(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) const { return static_cast(119); } -BUILD_SINGLE_UNCHAINED_TEMPLATE(template , NDArray::e(const Nd4jLong, const Nd4jLong, const Nd4jLong) const, LIBND4J_TYPES); +BUILD_SINGLE_UNCHAINED_TEMPLATE(template ND4J_EXPORT , NDArray::e(const Nd4jLong, const Nd4jLong, const Nd4jLong) const, LIBND4J_TYPES); ////////////////////////////////////////////////////////////////////////// // returns value from 3D tensor by coordinates @@ -3416,7 +3416,7 @@ T NDArray::e(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLon return static_cast(119); } -BUILD_SINGLE_UNCHAINED_TEMPLATE(template , NDArray::e(const Nd4jLong, const Nd4jLong, const Nd4jLong, const Nd4jLong) const, LIBND4J_TYPES); +BUILD_SINGLE_UNCHAINED_TEMPLATE(template ND4J_EXPORT , NDArray::e(const Nd4jLong, const Nd4jLong, const Nd4jLong, const Nd4jLong) const, LIBND4J_TYPES); ////////////////////////////////////////////////////////////////////////// NDArray NDArray::e(const Nd4jLong i) const { @@ -3591,17 +3591,17 @@ void NDArray::applyScalar(nd4j::scalar::Ops op, const T scalar, NDArray *target, applyScalarArr(op, &scalarArr, target, extraParams); } -template <> void NDArray::applyScalar(nd4j::scalar::Ops op, const NDArray* scalar, NDArray *target, ExtraArguments *extraParams) { throw std::runtime_error("NDArray::applyScalar method: do not use me!");} -template void NDArray::applyScalar(nd4j::scalar::Ops op, const double scalar, NDArray *target, ExtraArguments *extraParams); -template void NDArray::applyScalar(nd4j::scalar::Ops op, const float scalar, NDArray *target, ExtraArguments *extraParams); -template void NDArray::applyScalar(nd4j::scalar::Ops op, const float16 scalar, NDArray *target, ExtraArguments *extraParams); -template void NDArray::applyScalar(nd4j::scalar::Ops op, const bfloat16 scalar, NDArray *target, ExtraArguments *extraParams); -template void NDArray::applyScalar(nd4j::scalar::Ops op, const Nd4jLong scalar, NDArray *target, ExtraArguments *extraParams); -template void NDArray::applyScalar(nd4j::scalar::Ops op, const int scalar, NDArray *target, ExtraArguments *extraParams); -template void NDArray::applyScalar(nd4j::scalar::Ops op, const int16_t scalar, NDArray *target, ExtraArguments *extraParams); -template void NDArray::applyScalar(nd4j::scalar::Ops op, const int8_t scalar, NDArray *target, ExtraArguments *extraParams); -template void NDArray::applyScalar(nd4j::scalar::Ops op, const uint8_t scalar, NDArray *target, ExtraArguments *extraParams); -template void NDArray::applyScalar(nd4j::scalar::Ops op, const bool scalar, NDArray *target, ExtraArguments *extraParams); +template <> ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::Ops op, const NDArray* scalar, NDArray *target, ExtraArguments *extraParams) { throw std::runtime_error("NDArray::applyScalar method: do not use me!");} +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::Ops op, const double scalar, NDArray *target, ExtraArguments *extraParams); +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::Ops op, const float scalar, NDArray *target, ExtraArguments *extraParams); +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::Ops op, const float16 scalar, NDArray *target, ExtraArguments *extraParams); +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::Ops op, const bfloat16 scalar, NDArray *target, ExtraArguments *extraParams); +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::Ops op, const Nd4jLong scalar, NDArray *target, ExtraArguments *extraParams); +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::Ops op, const int scalar, NDArray *target, ExtraArguments *extraParams); +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::Ops op, const int16_t scalar, NDArray *target, ExtraArguments *extraParams); +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::Ops op, const int8_t scalar, NDArray *target, ExtraArguments *extraParams); +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::Ops op, const uint8_t scalar, NDArray *target, ExtraArguments *extraParams); +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::Ops op, const bool scalar, NDArray *target, ExtraArguments *extraParams); ////////////////////////////////////////////////////////////////////////// void NDArray::applyScalarArr(nd4j::scalar::BoolOps op, const NDArray* scalar, NDArray *target, ExtraArguments *extraParams) const { @@ -3627,17 +3627,17 @@ void NDArray::applyScalar(nd4j::scalar::BoolOps op, const T scalar, NDArray *tar applyScalarArr(op, &scalarArr, target, extraParams); } -template <> void NDArray::applyScalar(nd4j::scalar::BoolOps op, const NDArray* scalar, NDArray *target, ExtraArguments *extraParams) const { throw std::runtime_error("NDArray::applyScalar method: do not use me!");} -template void NDArray::applyScalar(nd4j::scalar::BoolOps op, const double scalar, NDArray *target, ExtraArguments *extraParams) const; -template void NDArray::applyScalar(nd4j::scalar::BoolOps op, const float scalar, NDArray *target, ExtraArguments *extraParams) const; -template void NDArray::applyScalar(nd4j::scalar::BoolOps op, const float16 scalar, NDArray *target, ExtraArguments *extraParams) const; -template void NDArray::applyScalar(nd4j::scalar::BoolOps op, const bfloat16 scalar, NDArray *target, ExtraArguments *extraParams) const; -template void NDArray::applyScalar(nd4j::scalar::BoolOps op, const Nd4jLong scalar, NDArray *target, ExtraArguments *extraParams) const; -template void NDArray::applyScalar(nd4j::scalar::BoolOps op, const int scalar, NDArray *target, ExtraArguments *extraParams) const; -template void NDArray::applyScalar(nd4j::scalar::BoolOps op, const int16_t scalar, NDArray *target, ExtraArguments *extraParams) const; -template void NDArray::applyScalar(nd4j::scalar::BoolOps op, const int8_t scalar, NDArray *target, ExtraArguments *extraParams) const; -template void NDArray::applyScalar(nd4j::scalar::BoolOps op, const uint8_t scalar, NDArray *target, ExtraArguments *extraParams) const; -template void NDArray::applyScalar(nd4j::scalar::BoolOps op, const bool scalar, NDArray *target, ExtraArguments *extraParams) const; +template <> ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::BoolOps op, const NDArray* scalar, NDArray *target, ExtraArguments *extraParams) const { throw std::runtime_error("NDArray::applyScalar method: do not use me!");} +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::BoolOps op, const double scalar, NDArray *target, ExtraArguments *extraParams) const; +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::BoolOps op, const float scalar, NDArray *target, ExtraArguments *extraParams) const; +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::BoolOps op, const float16 scalar, NDArray *target, ExtraArguments *extraParams) const; +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::BoolOps op, const bfloat16 scalar, NDArray *target, ExtraArguments *extraParams) const; +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::BoolOps op, const Nd4jLong scalar, NDArray *target, ExtraArguments *extraParams) const; +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::BoolOps op, const int scalar, NDArray *target, ExtraArguments *extraParams) const; +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::BoolOps op, const int16_t scalar, NDArray *target, ExtraArguments *extraParams) const; +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::BoolOps op, const int8_t scalar, NDArray *target, ExtraArguments *extraParams) const; +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::BoolOps op, const uint8_t scalar, NDArray *target, ExtraArguments *extraParams) const; +template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::BoolOps op, const bool scalar, NDArray *target, ExtraArguments *extraParams) const; ////////////////////////////////////////////////////////////////////////// @@ -3665,17 +3665,17 @@ template void NDArray::applyScalar(nd4j::scalar::BoolOps op, const bool sc applyScalarArr(op, &scalarArr, target, extraParams); } - template <> void NDArray::applyScalar(nd4j::scalar::IntOps op, const NDArray* scalar, NDArray *target, ExtraArguments *extraParams) const { throw std::runtime_error("NDArray::applyScalar method: do not use me!");} - template void NDArray::applyScalar(nd4j::scalar::IntOps op, const double scalar, NDArray *target, ExtraArguments *extraParams) const; - template void NDArray::applyScalar(nd4j::scalar::IntOps op, const float scalar, NDArray *target, ExtraArguments *extraParams) const; - template void NDArray::applyScalar(nd4j::scalar::IntOps op, const float16 scalar, NDArray *target, ExtraArguments *extraParams) const; - template void NDArray::applyScalar(nd4j::scalar::IntOps op, const bfloat16 scalar, NDArray *target, ExtraArguments *extraParams) const; - template void NDArray::applyScalar(nd4j::scalar::IntOps op, const Nd4jLong scalar, NDArray *target, ExtraArguments *extraParams) const; - template void NDArray::applyScalar(nd4j::scalar::IntOps op, const int scalar, NDArray *target, ExtraArguments *extraParams) const; - template void NDArray::applyScalar(nd4j::scalar::IntOps op, const int16_t scalar, NDArray *target, ExtraArguments *extraParams) const; - template void NDArray::applyScalar(nd4j::scalar::IntOps op, const int8_t scalar, NDArray *target, ExtraArguments *extraParams) const; - template void NDArray::applyScalar(nd4j::scalar::IntOps op, const uint8_t scalar, NDArray *target, ExtraArguments *extraParams) const; - template void NDArray::applyScalar(nd4j::scalar::IntOps op, const bool scalar, NDArray *target, ExtraArguments *extraParams) const; + template <> ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::IntOps op, const NDArray* scalar, NDArray *target, ExtraArguments *extraParams) const { throw std::runtime_error("NDArray::applyScalar method: do not use me!");} + template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::IntOps op, const double scalar, NDArray *target, ExtraArguments *extraParams) const; + template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::IntOps op, const float scalar, NDArray *target, ExtraArguments *extraParams) const; + template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::IntOps op, const float16 scalar, NDArray *target, ExtraArguments *extraParams) const; + template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::IntOps op, const bfloat16 scalar, NDArray *target, ExtraArguments *extraParams) const; + template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::IntOps op, const Nd4jLong scalar, NDArray *target, ExtraArguments *extraParams) const; + template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::IntOps op, const int scalar, NDArray *target, ExtraArguments *extraParams) const; + template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::IntOps op, const int16_t scalar, NDArray *target, ExtraArguments *extraParams) const; + template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::IntOps op, const int8_t scalar, NDArray *target, ExtraArguments *extraParams) const; + template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::IntOps op, const uint8_t scalar, NDArray *target, ExtraArguments *extraParams) const; + template ND4J_EXPORT void NDArray::applyScalar(nd4j::scalar::IntOps op, const bool scalar, NDArray *target, ExtraArguments *extraParams) const; //////////////////////////////////////////////////////////////////////// @@ -3966,19 +3966,19 @@ void NDArray::p(const Nd4jLong i, const T value) { NDArray::registerPrimaryUse({this}, {}); } -template void NDArray::p(const Nd4jLong i, const double value); -template void NDArray::p(const Nd4jLong i, const float value); -template void NDArray::p(const Nd4jLong i, const float16 value); -template void NDArray::p(const Nd4jLong i, const bfloat16 value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong value); -template void NDArray::p(const Nd4jLong i, const int value); -template void NDArray::p(const Nd4jLong i, const int8_t value); -template void NDArray::p(const Nd4jLong i, const uint8_t value); -template void NDArray::p(const Nd4jLong i, const uint16_t value); -template void NDArray::p(const Nd4jLong i, const uint32_t value); -template void NDArray::p(const Nd4jLong i, const uint64_t value); -template void NDArray::p(const Nd4jLong i, const int16_t value); -template void NDArray::p(const Nd4jLong i, const bool value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const double value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const float value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const float16 value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const bfloat16 value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const int value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const int8_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const uint8_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const uint16_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const uint32_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const uint64_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const int16_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const bool value); ////////////////////////////////////////////////////////////////////////// // This method sets value in 2D matrix to position i, j @@ -3996,19 +3996,19 @@ void NDArray::p(const Nd4jLong i, const Nd4jLong j, const T value) { BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->getBuffer(), xOffset, p), LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {}); } -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const double value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const float value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const float16 value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const bfloat16 value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const int value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const int8_t value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const uint8_t value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const uint16_t value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const uint32_t value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const uint64_t value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const int16_t value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const bool value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const double value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const float value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const float16 value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const bfloat16 value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const int value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const int8_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const uint8_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const uint16_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const uint32_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const uint64_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const int16_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const bool value); ////////////////////////////////////////////////////////////////////////// // This method sets value in 3D matrix to position i,j,k @@ -4026,19 +4026,19 @@ void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const T va BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->getBuffer(), xOffset, p), LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {}); } -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const double value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const float value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const float16 value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const bfloat16 value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const int value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const int8_t value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const uint8_t value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const uint16_t value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const uint32_t value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const uint64_t value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const int16_t value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const bool value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const double value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const float value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const float16 value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const bfloat16 value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const int value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const int8_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const uint8_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const uint16_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const uint32_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const uint64_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const int16_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const bool value); ////////////////////////////////////////////////////////////////////////// template @@ -4055,19 +4055,19 @@ void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4j BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->getBuffer(), xOffset, p), LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {}); } -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const double value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const float value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const float16 value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const bfloat16 value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const Nd4jLong value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const int value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const int8_t value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const uint8_t value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const uint16_t value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const uint32_t value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const uint64_t value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const int16_t value); -template void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const bool value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const double value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const float value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const float16 value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const bfloat16 value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const Nd4jLong value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const int value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const int8_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const uint8_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const uint16_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const uint32_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const uint64_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const int16_t value); +template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const bool value); //////////////////////////////////////////////////////////////////////// void NDArray::p(const Nd4jLong i, const NDArray& scalar) { @@ -4256,7 +4256,7 @@ void NDArray::templatedAssign(void *xBuffer, Nd4jLong xOffset, const void *yBuff if (xBuffer != nullptr && yBuffer != nullptr) *(reinterpret_cast(xBuffer) + xOffset) = *(reinterpret_cast(yBuffer) + yOffset); } -BUILD_SINGLE_TEMPLATE(template void NDArray::templatedAssign, (void *xBuffer, const Nd4jLong xOffset, const void *yBuffer, const Nd4jLong yOffset) const, LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT void NDArray::templatedAssign, (void *xBuffer, const Nd4jLong xOffset, const void *yBuffer, const Nd4jLong yOffset) const, LIBND4J_TYPES); ////////////////////////////////////////////////////////////////////////// diff --git a/libnd4j/blas/cpu/NDArrayFactory.cpp b/libnd4j/blas/cpu/NDArrayFactory.cpp index b091f13b7..54cc6bba8 100644 --- a/libnd4j/blas/cpu/NDArrayFactory.cpp +++ b/libnd4j/blas/cpu/NDArrayFactory.cpp @@ -29,7 +29,7 @@ namespace nd4j { //////////////////////////////////////////////////////////////////////// template <> - NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context) { + ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context) { if ((int) shape.size() > MAX_RANK) throw std::invalid_argument("NDArrayFactory::create: rank of NDArray can't exceed 32 !"); @@ -71,8 +71,19 @@ namespace nd4j { NDArray result(buffer, descriptor, context); return result; - } + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::vector& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::vector& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::vector& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::vector& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::vector& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::vector& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::vector& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::vector& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::vector& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::vector& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::vector& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::vector& data, nd4j::LaunchContext * context); NDArray NDArrayFactory::string(const char *str, nd4j::LaunchContext * context) { std::string s(str); @@ -118,7 +129,7 @@ template NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, nd4j::LaunchContext * context) { return create_(order, shape, DataTypeUtils::fromT(), context); } -BUILD_SINGLE_TEMPLATE(template NDArray* NDArrayFactory::create_, (const char order, const std::vector &shape, nd4j::LaunchContext * context), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT NDArray* NDArrayFactory::create_, (const char order, const std::vector &shape, nd4j::LaunchContext * context), LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////// template @@ -128,20 +139,20 @@ void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector) { } template <> -void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector) { +void ND4J_EXPORT NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector) { auto p = reinterpret_cast(ptr); for (Nd4jLong e = 0; e < vector.size(); e++) p[e] = vector[e]; } -template void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector); -template void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector); -template void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector); -template void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector); -template void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector); -template void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector); -template void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector); -template void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector); +template ND4J_EXPORT void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector); +template ND4J_EXPORT void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector); +template ND4J_EXPORT void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector); +template ND4J_EXPORT void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector); +template ND4J_EXPORT void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector); +template ND4J_EXPORT void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector); +template ND4J_EXPORT void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector); +template ND4J_EXPORT void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector &vector); #ifndef __JAVACPP_HACK__ @@ -150,16 +161,16 @@ template void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector& shape, const T value, const char order, nd4j::LaunchContext * context) { return valueOf(std::vector(shape), value, order); } - template NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const double value, const char order, nd4j::LaunchContext * context); - template NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const float value, const char order, nd4j::LaunchContext * context); - template NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const float16 value, const char order, nd4j::LaunchContext * context); - template NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const bfloat16 value, const char order, nd4j::LaunchContext * context); - template NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const Nd4jLong value, const char order, nd4j::LaunchContext * context); - template NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const int value, const char order, nd4j::LaunchContext * context); - template NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const uint8_t value, const char order, nd4j::LaunchContext * context); - template NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const int8_t value, const char order, nd4j::LaunchContext * context); - template NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const int16_t value, const char order, nd4j::LaunchContext * context); - template NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const bool value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const double value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const float value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const float16 value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const bfloat16 value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const Nd4jLong value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const int value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const uint8_t value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const int8_t value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const int16_t value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::initializer_list& shape, const bool value, const char order, nd4j::LaunchContext * context); //////////////////////////////////////////////////////////////////////// template @@ -167,18 +178,18 @@ template void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector vec(data); return create(order, shape, vec, context); } - template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); - template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); - template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); - template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); - template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); - template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); - template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); - template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); - template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); - template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); - template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); - template NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray NDArrayFactory::create(const char order, const std::vector &shape, const std::initializer_list& data, nd4j::LaunchContext * context); #endif @@ -197,19 +208,19 @@ template void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector NDArray NDArrayFactory::create(nd4j::DataType type, const T scalar, nd4j::LaunchContext * context) { @@ -223,20 +234,20 @@ template void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector NDArray NDArrayFactory::create(const T scalar, nd4j::LaunchContext * context) { @@ -252,19 +263,19 @@ template void NDArrayFactory::memcpyFromVector(void *ptr, const std::vector & return new NDArray(NDArrayFactory::create(order, shape, data, context)); } -template NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); -template NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); -template NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); -template NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); -template NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); -template NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); -template NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); -template NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); -template NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); -template NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); -template NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); -template NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); -template NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray* NDArrayFactory::create_(const char order, const std::vector &shape, const std::vector &data, nd4j::LaunchContext * context); //////////////////////////////////////////////////////////////////////// template <> - NDArray* NDArrayFactory::valueOf(const std::vector& shape, NDArray* value, const char order, nd4j::LaunchContext * context) { + ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::vector& shape, NDArray* value, const char order, nd4j::LaunchContext * context) { auto result = create_(order, shape, value->dataType(), context); result->assign(*value); return result; } template <> - NDArray* NDArrayFactory::valueOf(const std::vector& shape, NDArray& value, const char order, nd4j::LaunchContext * context) { + ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::vector& shape, NDArray& value, const char order, nd4j::LaunchContext * context) { auto result = create_(order, shape, value.dataType(), context); result->assign(value); return result; @@ -309,16 +320,16 @@ template NDArray* NDArrayFactory::create_(const char order, const std::vectorassign(value); return result; } - template NDArray* NDArrayFactory::valueOf(const std::vector& shape, const double value, const char order, nd4j::LaunchContext * context); - template NDArray* NDArrayFactory::valueOf(const std::vector& shape, const float value, const char order, nd4j::LaunchContext * context); - template NDArray* NDArrayFactory::valueOf(const std::vector& shape, const float16 value, const char order, nd4j::LaunchContext * context); - template NDArray* NDArrayFactory::valueOf(const std::vector& shape, const bfloat16 value, const char order, nd4j::LaunchContext * context); - template NDArray* NDArrayFactory::valueOf(const std::vector& shape, const Nd4jLong value, const char order, nd4j::LaunchContext * context); - template NDArray* NDArrayFactory::valueOf(const std::vector& shape, const int value, const char order, nd4j::LaunchContext * context); - template NDArray* NDArrayFactory::valueOf(const std::vector& shape, const int16_t value, const char order, nd4j::LaunchContext * context); - template NDArray* NDArrayFactory::valueOf(const std::vector& shape, const int8_t value, const char order, nd4j::LaunchContext * context); - template NDArray* NDArrayFactory::valueOf(const std::vector& shape, const uint8_t value, const char order, nd4j::LaunchContext * context); - template NDArray* NDArrayFactory::valueOf(const std::vector& shape, const bool value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::vector& shape, const double value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::vector& shape, const float value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::vector& shape, const float16 value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::vector& shape, const bfloat16 value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::vector& shape, const Nd4jLong value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::vector& shape, const int value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::vector& shape, const int16_t value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::vector& shape, const int8_t value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::vector& shape, const uint8_t value, const char order, nd4j::LaunchContext * context); + template ND4J_EXPORT NDArray* NDArrayFactory::valueOf(const std::vector& shape, const bool value, const char order, nd4j::LaunchContext * context); //////////////////////////////////////////////////////////////////////// @@ -334,19 +345,19 @@ template NDArray* NDArrayFactory::create_(const char order, const std::vector @@ -363,19 +374,19 @@ template NDArray* NDArrayFactory::create_(const char order, const std::vector @@ -383,14 +394,14 @@ template NDArray* NDArrayFactory::create_(const char order, const std::vector vec(shape); return create(order, vec, context); } - BUILD_SINGLE_TEMPLATE(template NDArray NDArrayFactory::create, (const char, const std::initializer_list&, nd4j::LaunchContext * context), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT NDArray NDArrayFactory::create, (const char, const std::initializer_list&, nd4j::LaunchContext * context), LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////// template NDArray NDArrayFactory::create(const char order, const std::vector &shape, nd4j::LaunchContext * context) { return create(order, shape, DataTypeUtils::fromT(), context); } - BUILD_SINGLE_TEMPLATE(template NDArray NDArrayFactory::create, (const char order, const std::vector &shape, nd4j::LaunchContext * context), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT NDArray NDArrayFactory::create, (const char order, const std::vector &shape, nd4j::LaunchContext * context), LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////// NDArray NDArrayFactory::create(const char order, const std::vector &shape, nd4j::DataType dtype, nd4j::LaunchContext* context) { @@ -443,17 +454,17 @@ NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext return res; } -template NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(const std::vector &values, nd4j::LaunchContext * context); //////////////////////////////////////////////////////////////////////// template @@ -466,7 +477,7 @@ template NDArray NDArrayFactory::create(const std::vector &values, nd4j::L return result; } - BUILD_SINGLE_TEMPLATE(template NDArray* NDArrayFactory::empty_, (nd4j::LaunchContext * context), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT NDArray* NDArrayFactory::empty_, (nd4j::LaunchContext * context), LIBND4J_TYPES); NDArray* NDArrayFactory::empty_(nd4j::DataType dataType, nd4j::LaunchContext * context) { if (context == nullptr) @@ -486,7 +497,7 @@ template NDArray NDArrayFactory::create(const std::vector &values, nd4j::L NDArray NDArrayFactory::empty(nd4j::LaunchContext * context) { return empty(DataTypeUtils::fromT(), context); } - BUILD_SINGLE_TEMPLATE(template NDArray NDArrayFactory::empty, (nd4j::LaunchContext * context), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT NDArray NDArrayFactory::empty, (nd4j::LaunchContext * context), LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////// NDArray NDArrayFactory::empty(nd4j::DataType dataType, nd4j::LaunchContext * context) { @@ -529,16 +540,16 @@ NDArray NDArrayFactory::create(T* buffer, const char order, const std::initializ return result; } -template NDArray NDArrayFactory::create(double* buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(float* buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(float16* buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(bfloat16* buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(Nd4jLong * buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(int* buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(bool* buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(uint8_t * buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(int8_t* buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); -template NDArray NDArrayFactory::create(int16_t* buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(double* buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(float* buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(float16* buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(bfloat16* buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(Nd4jLong * buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(int* buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(bool* buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(uint8_t * buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(int8_t* buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); +template ND4J_EXPORT NDArray NDArrayFactory::create(int16_t* buffer, const char order, const std::initializer_list& shape, nd4j::LaunchContext * context); NDArray NDArrayFactory::string(char order, const std::vector &shape, const std::initializer_list &strings, nd4j::LaunchContext * context) { diff --git a/libnd4j/blas/cuda/NDArray.cu b/libnd4j/blas/cuda/NDArray.cu index f70760f9a..be90a22ae 100644 --- a/libnd4j/blas/cuda/NDArray.cu +++ b/libnd4j/blas/cuda/NDArray.cu @@ -150,7 +150,7 @@ void NDArray::fillAsTriangular(const float val, int lower, int upper, const char manager.synchronize(); } -BUILD_SINGLE_TEMPLATE(template void NDArray::fillAsTriangular, (const float val, int lower, int upper, const char direction, NDArray* target), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT void NDArray::fillAsTriangular, (const float val, int lower, int upper, const char direction, NDArray* target), LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////// template diff --git a/libnd4j/buildnativeoperations.sh b/libnd4j/buildnativeoperations.sh index 56e225a5d..351a4f8e2 100755 --- a/libnd4j/buildnativeoperations.sh +++ b/libnd4j/buildnativeoperations.sh @@ -168,140 +168,133 @@ fi case "$OS" in linux-armhf) - export RPI_BIN=$RPI_HOME/tools/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/bin/arm-linux-gnueabihf - export CMAKE_COMMAND="$CMAKE_COMMAND -D CMAKE_TOOLCHAIN_FILE=cmake/rpi.cmake" - if [ -z "$ARCH" ]; then + export RPI_BIN=$RPI_HOME/tools/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/bin/arm-linux-gnueabihf + export CMAKE_COMMAND="$CMAKE_COMMAND -D CMAKE_TOOLCHAIN_FILE=cmake/rpi.cmake" + if [ -z "$ARCH" ]; then ARCH="armv7-r" - fi + fi ;; linux-arm64) - if [ -z "$ARCH" ]; then + if [ -z "$ARCH" ]; then ARCH="armv8-a" - fi + fi ;; android-arm) - if [ -z "$ARCH" ]; then + if [ -z "$ARCH" ]; then ARCH="armv7-a" - fi - export ANDROID_BIN="$ANDROID_NDK/toolchains/arm-linux-androideabi-4.9/prebuilt/$KERNEL/" - export ANDROID_CPP="$ANDROID_NDK/sources/cxx-stl/llvm-libc++/" - export ANDROID_LLVM="$ANDROID_NDK/toolchains/llvm/prebuilt/$KERNEL/" - export ANDROID_ROOT="$ANDROID_NDK/platforms/android-14/arch-arm/" - export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/android-arm.cmake -DANDROID_BUILD=true" + fi + export ANDROID_BIN="$ANDROID_NDK/toolchains/arm-linux-androideabi-4.9/prebuilt/$KERNEL/" + export ANDROID_CPP="$ANDROID_NDK/sources/cxx-stl/llvm-libc++/" + export ANDROID_LLVM="$ANDROID_NDK/toolchains/llvm/prebuilt/$KERNEL/" + export ANDROID_ROOT="$ANDROID_NDK/platforms/android-14/arch-arm/" + export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/android-arm.cmake -DANDROID_BUILD=true" ;; android-arm64) - if [ -z "$ARCH" ]; then + if [ -z "$ARCH" ]; then ARCH="armv8-a" - fi - export ANDROID_BIN="$ANDROID_NDK/toolchains/aarch64-linux-android-4.9/prebuilt/$KERNEL/" - export ANDROID_CPP="$ANDROID_NDK/sources/cxx-stl/llvm-libc++/" - export ANDROID_LLVM="$ANDROID_NDK/toolchains/llvm/prebuilt/$KERNEL/" - export ANDROID_ROOT="$ANDROID_NDK/platforms/android-21/arch-arm64/" - export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/android-arm64.cmake -DANDROID_BUILD=true" + fi + export ANDROID_BIN="$ANDROID_NDK/toolchains/aarch64-linux-android-4.9/prebuilt/$KERNEL/" + export ANDROID_CPP="$ANDROID_NDK/sources/cxx-stl/llvm-libc++/" + export ANDROID_LLVM="$ANDROID_NDK/toolchains/llvm/prebuilt/$KERNEL/" + export ANDROID_ROOT="$ANDROID_NDK/platforms/android-21/arch-arm64/" + export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/android-arm64.cmake -DANDROID_BUILD=true" ;; android-x86) - if [ -z "$ARCH" ]; then + if [ -z "$ARCH" ]; then ARCH="i686" - fi - export ANDROID_BIN="$ANDROID_NDK/toolchains/x86-4.9/prebuilt/$KERNEL/" - export ANDROID_CPP="$ANDROID_NDK/sources/cxx-stl/llvm-libc++/" - export ANDROID_LLVM="$ANDROID_NDK/toolchains/llvm/prebuilt/$KERNEL/" - export ANDROID_ROOT="$ANDROID_NDK/platforms/android-14/arch-x86/" - export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/android-x86.cmake -DANDROID_BUILD=true" + fi + export ANDROID_BIN="$ANDROID_NDK/toolchains/x86-4.9/prebuilt/$KERNEL/" + export ANDROID_CPP="$ANDROID_NDK/sources/cxx-stl/llvm-libc++/" + export ANDROID_LLVM="$ANDROID_NDK/toolchains/llvm/prebuilt/$KERNEL/" + export ANDROID_ROOT="$ANDROID_NDK/platforms/android-14/arch-x86/" + export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/android-x86.cmake -DANDROID_BUILD=true" ;; android-x86_64) - if [ -z "$ARCH" ]; then + if [ -z "$ARCH" ]; then ARCH="x86-64" - fi - export ANDROID_BIN="$ANDROID_NDK/toolchains/x86_64-4.9/prebuilt/$KERNEL/" - export ANDROID_CPP="$ANDROID_NDK/sources/cxx-stl/llvm-libc++/" - export ANDROID_LLVM="$ANDROID_NDK/toolchains/llvm/prebuilt/$KERNEL/" - export ANDROID_ROOT="$ANDROID_NDK/platforms/android-21/arch-x86_64/" - export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/android-x86_64.cmake -DANDROID_BUILD=true" + fi + export ANDROID_BIN="$ANDROID_NDK/toolchains/x86_64-4.9/prebuilt/$KERNEL/" + export ANDROID_CPP="$ANDROID_NDK/sources/cxx-stl/llvm-libc++/" + export ANDROID_LLVM="$ANDROID_NDK/toolchains/llvm/prebuilt/$KERNEL/" + export ANDROID_ROOT="$ANDROID_NDK/platforms/android-21/arch-x86_64/" + export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/android-x86_64.cmake -DANDROID_BUILD=true" ;; ios-x86_64) - LIBTYPE="static" - ARCH="x86-64" - if xcrun --sdk iphoneos --show-sdk-version &> /dev/null; then - export IOS_VERSION="$(xcrun --sdk iphoneos --show-sdk-version)" - else + LIBTYPE="static" + ARCH="x86-64" + if xcrun --sdk iphoneos --show-sdk-version &> /dev/null; then + export IOS_VERSION="$(xcrun --sdk iphoneos --show-sdk-version)" + else export IOS_VERSION="10.3" - fi - XCODE_PATH="$(xcode-select --print-path)" - export IOS_SDK="$XCODE_PATH/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator$IOS_VERSION.sdk" - export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/ios-x86_64.cmake --debug-trycompile -DIOS_BUILD=true" + fi + XCODE_PATH="$(xcode-select --print-path)" + export IOS_SDK="$XCODE_PATH/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator$IOS_VERSION.sdk" + export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/ios-x86_64.cmake --debug-trycompile -DIOS_BUILD=true" ;; ios-x86) - LIBTYPE="static" - ARCH="i386" - if xcrun --sdk iphoneos --show-sdk-version &> /dev/null; then - export IOS_VERSION="$(xcrun --sdk iphoneos --show-sdk-version)" - else + LIBTYPE="static" + ARCH="i386" + if xcrun --sdk iphoneos --show-sdk-version &> /dev/null; then + export IOS_VERSION="$(xcrun --sdk iphoneos --show-sdk-version)" + else export IOS_VERSION="10.3" - fi - XCODE_PATH="$(xcode-select --print-path)" - export IOS_SDK="$XCODE_PATH/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator$IOS_VERSION.sdk" - export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/ios-x86.cmake --debug-trycompile -DIOS_BUILD=true" + fi + XCODE_PATH="$(xcode-select --print-path)" + export IOS_SDK="$XCODE_PATH/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator$IOS_VERSION.sdk" + export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/ios-x86.cmake --debug-trycompile -DIOS_BUILD=true" ;; ios-arm64) - LIBTYPE="static" - ARCH="arm64" - if xcrun --sdk iphoneos --show-sdk-version &> /dev/null; then - export IOS_VERSION="$(xcrun --sdk iphoneos --show-sdk-version)" - else + LIBTYPE="static" + ARCH="arm64" + if xcrun --sdk iphoneos --show-sdk-version &> /dev/null; then + export IOS_VERSION="$(xcrun --sdk iphoneos --show-sdk-version)" + else export IOS_VERSION="10.3" - fi - XCODE_PATH="$(xcode-select --print-path)" - export IOS_SDK="$XCODE_PATH/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS$IOS_VERSION.sdk" - export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/ios-arm64.cmake --debug-trycompile -DIOS_BUILD=true" + fi + XCODE_PATH="$(xcode-select --print-path)" + export IOS_SDK="$XCODE_PATH/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS$IOS_VERSION.sdk" + export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/ios-arm64.cmake --debug-trycompile -DIOS_BUILD=true" ;; ios-arm) - LIBTYPE="static" - ARCH="armv7" - if xcrun --sdk iphoneos --show-sdk-version &> /dev/null; then - export IOS_VERSION="$(xcrun --sdk iphoneos --show-sdk-version)" - else + LIBTYPE="static" + ARCH="armv7" + if xcrun --sdk iphoneos --show-sdk-version &> /dev/null; then + export IOS_VERSION="$(xcrun --sdk iphoneos --show-sdk-version)" + else export IOS_VERSION="10.3" - fi - XCODE_PATH="$(xcode-select --print-path)" - export IOS_SDK="$XCODE_PATH/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS$IOS_VERSION.sdk" - export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/ios-arm.cmake --debug-trycompile -DIOS_BUILD=true" + fi + XCODE_PATH="$(xcode-select --print-path)" + export IOS_SDK="$XCODE_PATH/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS$IOS_VERSION.sdk" + export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/ios-arm.cmake --debug-trycompile -DIOS_BUILD=true" ;; ios-armv7) - # change those 2 parameters and make sure the IOS_SDK exists - export iPhoneOS="iPhoneOS" - export IOS_VERSION="10.3" - LIBTYPE="static" - ARCH="armv7" - export IOS_SDK="/Applications/Xcode.app/Contents/Developer/Platforms/${iPhoneOS}.platform/Developer/SDKs/${iPhoneOS}${IOS_VERSION}.sdk" - export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/ios-armv7.cmake --debug-trycompile -DIOS_BUILD=true" + # change those 2 parameters and make sure the IOS_SDK exists + export iPhoneOS="iPhoneOS" + export IOS_VERSION="10.3" + LIBTYPE="static" + ARCH="armv7" + export IOS_SDK="/Applications/Xcode.app/Contents/Developer/Platforms/${iPhoneOS}.platform/Developer/SDKs/${iPhoneOS}${IOS_VERSION}.sdk" + export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/ios-armv7.cmake --debug-trycompile -DIOS_BUILD=true" ;; linux*) ;; macosx*) - # Do something under Mac OS X platform - #if [ "$CHIP" == "cuda" ]; then - export CC=clang - export CXX=clang++ - PARALLEL="true" - #else - # export CC="$(ls -1 /usr/local/bin/gcc-? | head -n 1)" - # export CXX="$(ls -1 /usr/local/bin/g++-? | head -n 1)" - # PARALLEL="true" - #fi - export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_MACOSX_RPATH=ON -DAPPLE_BUILD=true" + export CC=clang + export CXX=clang++ + PARALLEL="true" + export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_MACOSX_RPATH=ON -DAPPLE_BUILD=true" ;; windows*) diff --git a/libnd4j/include/array/impl/ExtraArguments.cpp b/libnd4j/include/array/impl/ExtraArguments.cpp index 55cda66b0..f9174ea0f 100644 --- a/libnd4j/include/array/impl/ExtraArguments.cpp +++ b/libnd4j/include/array/impl/ExtraArguments.cpp @@ -89,7 +89,7 @@ namespace nd4j { delete[] target; #endif } - BUILD_SINGLE_TEMPLATE(template void ExtraArguments::convertAndCopy, (Nd4jPointer pointer, Nd4jLong offset), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT void ExtraArguments::convertAndCopy, (Nd4jPointer pointer, Nd4jLong offset), LIBND4J_TYPES); void* ExtraArguments::allocate(size_t length, size_t elementSize) { #ifdef __CUDABLAS__ @@ -119,7 +119,7 @@ namespace nd4j { void* ExtraArguments::argumentsAsT(Nd4jLong offset) { return argumentsAsT(DataTypeUtils::fromT(), offset); } - BUILD_SINGLE_TEMPLATE(template void *ExtraArguments::argumentsAsT, (Nd4jLong offset), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT void *ExtraArguments::argumentsAsT, (Nd4jLong offset), LIBND4J_TYPES); void* ExtraArguments::argumentsAsT(nd4j::DataType dataType, Nd4jLong offset) { diff --git a/libnd4j/include/exceptions/allocation_exception.h b/libnd4j/include/exceptions/allocation_exception.h index 23c53d166..29756d253 100644 --- a/libnd4j/include/exceptions/allocation_exception.h +++ b/libnd4j/include/exceptions/allocation_exception.h @@ -24,9 +24,17 @@ #include #include #include +#include + +#if defined(_MSC_VER) + +// we're ignoring warning about non-exportable parent class, since std::runtime_error is a part of Standard C++ Library +#pragma warning( disable : 4275 ) + +#endif namespace nd4j { - class allocation_exception : public std::runtime_error { + class ND4J_EXPORT allocation_exception : public std::runtime_error { public: allocation_exception(std::string message); ~allocation_exception() = default; diff --git a/libnd4j/include/exceptions/cuda_exception.h b/libnd4j/include/exceptions/cuda_exception.h index 3f6fce4d5..5150033e8 100644 --- a/libnd4j/include/exceptions/cuda_exception.h +++ b/libnd4j/include/exceptions/cuda_exception.h @@ -23,9 +23,17 @@ #include #include +#include + +#if defined(_MSC_VER) + +// we're ignoring warning about non-exportable parent class, since std::runtime_error is a part of Standard C++ Library +#pragma warning( disable : 4275 ) + +#endif namespace nd4j { - class cuda_exception : public std::runtime_error { + class ND4J_EXPORT cuda_exception : public std::runtime_error { public: cuda_exception(std::string message); ~cuda_exception() = default; diff --git a/libnd4j/include/exceptions/datatype_exception.h b/libnd4j/include/exceptions/datatype_exception.h index 05e8ae14a..171a2b13b 100644 --- a/libnd4j/include/exceptions/datatype_exception.h +++ b/libnd4j/include/exceptions/datatype_exception.h @@ -24,9 +24,17 @@ #include #include #include +#include + +#if defined(_MSC_VER) + +// we're ignoring warning about non-exportable parent class, since std::runtime_error is a part of Standard C++ Library +#pragma warning( disable : 4275 ) + +#endif namespace nd4j { - class datatype_exception : public std::runtime_error { + class ND4J_EXPORT datatype_exception : public std::runtime_error { public: datatype_exception(std::string message); ~datatype_exception() = default; diff --git a/libnd4j/include/exceptions/graph_exception.h b/libnd4j/include/exceptions/graph_exception.h index 6daf833cf..440fa5aa4 100644 --- a/libnd4j/include/exceptions/graph_exception.h +++ b/libnd4j/include/exceptions/graph_exception.h @@ -24,9 +24,17 @@ #include #include #include +#include + +#if defined(_MSC_VER) + +// we're ignoring warning about non-exportable parent class, since std::runtime_error is a part of Standard C++ Library +#pragma warning( disable : 4275 ) + +#endif namespace nd4j { - class graph_exception : public std::runtime_error { + class ND4J_EXPORT graph_exception : public std::runtime_error { protected: Nd4jLong _graphId; std::string _message; diff --git a/libnd4j/include/exceptions/graph_execution_exception.h b/libnd4j/include/exceptions/graph_execution_exception.h index 03f0a37e4..92b02e2ee 100644 --- a/libnd4j/include/exceptions/graph_execution_exception.h +++ b/libnd4j/include/exceptions/graph_execution_exception.h @@ -25,9 +25,17 @@ #include #include #include +#include + +#if defined(_MSC_VER) + +// we're ignoring warning about non-exportable parent class, since std::runtime_error is a part of Standard C++ Library +#pragma warning( disable : 4275 ) + +#endif namespace nd4j { - class graph_execution_exception: public graph_exception { + class ND4J_EXPORT graph_execution_exception: public graph_exception { public: explicit graph_execution_exception(Nd4jLong graphId); }; diff --git a/libnd4j/include/exceptions/graph_exists_exception.h b/libnd4j/include/exceptions/graph_exists_exception.h index 355518d02..985770ad3 100644 --- a/libnd4j/include/exceptions/graph_exists_exception.h +++ b/libnd4j/include/exceptions/graph_exists_exception.h @@ -25,9 +25,17 @@ #include #include #include +#include + +#if defined(_MSC_VER) + +// we're ignoring warning about non-exportable parent class, since std::runtime_error is a part of Standard C++ Library +#pragma warning( disable : 4275 ) + +#endif namespace nd4j { - class graph_exists_exception: public graph_exception { + class ND4J_EXPORT graph_exists_exception: public graph_exception { public: explicit graph_exists_exception(Nd4jLong graphId); }; diff --git a/libnd4j/include/exceptions/no_results_exception.h b/libnd4j/include/exceptions/no_results_exception.h index f7673ed0c..0fa1bb167 100644 --- a/libnd4j/include/exceptions/no_results_exception.h +++ b/libnd4j/include/exceptions/no_results_exception.h @@ -25,9 +25,17 @@ #include #include #include +#include + +#if defined(_MSC_VER) + +// we're ignoring warning about non-exportable parent class, since std::runtime_error is a part of Standard C++ Library +#pragma warning( disable : 4275 ) + +#endif namespace nd4j { - class no_results_exception: public graph_exception { + class ND4J_EXPORT no_results_exception: public graph_exception { public: explicit no_results_exception(Nd4jLong graphId); }; diff --git a/libnd4j/include/exceptions/unknown_graph_exception.h b/libnd4j/include/exceptions/unknown_graph_exception.h index 90d9d8e2e..83efc9dcf 100644 --- a/libnd4j/include/exceptions/unknown_graph_exception.h +++ b/libnd4j/include/exceptions/unknown_graph_exception.h @@ -25,9 +25,17 @@ #include #include #include +#include + +#if defined(_MSC_VER) + +// we're ignoring warning about non-exportable parent class, since std::runtime_error is a part of Standard C++ Library +#pragma warning( disable : 4275 ) + +#endif namespace nd4j { - class unknown_graph_exception: public graph_exception { + class ND4J_EXPORT unknown_graph_exception: public graph_exception { public: explicit unknown_graph_exception(Nd4jLong graphId); }; diff --git a/libnd4j/include/execution/ThreadPool.h b/libnd4j/include/execution/ThreadPool.h index e17b4b540..6811f1b1c 100644 --- a/libnd4j/include/execution/ThreadPool.h +++ b/libnd4j/include/execution/ThreadPool.h @@ -33,7 +33,7 @@ #include namespace samediff { - class ThreadPool { + class ND4J_EXPORT ThreadPool { private: static ThreadPool* _INSTANCE; diff --git a/libnd4j/include/execution/Threads.h b/libnd4j/include/execution/Threads.h index be12a311a..14467883f 100644 --- a/libnd4j/include/execution/Threads.h +++ b/libnd4j/include/execution/Threads.h @@ -27,7 +27,7 @@ #include namespace samediff { - class ThreadsHelper { + class ND4J_EXPORT ThreadsHelper { public: static int numberOfThreads(int maxThreads, uint64_t numberOfElements); static int numberOfThreads2d(int maxThreads, uint64_t iters_x, uint64_t iters_y); @@ -36,7 +36,7 @@ namespace samediff { static int pickLoop3d(int numThreads, uint64_t iters_x, uint64_t iters_y, uint64_t iters_z); }; - class Span { + class ND4J_EXPORT Span { private: int64_t _startX, _stopX, _incX; public: @@ -50,7 +50,7 @@ namespace samediff { static Span build(uint64_t thread_id, uint64_t num_threads, int64_t start_x, int64_t stop_x, int64_t inc_x); }; - class Span2 { + class ND4J_EXPORT Span2 { private: int64_t _startX, _stopX, _incX; int64_t _startY, _stopY, _incY; @@ -70,7 +70,7 @@ namespace samediff { static Span2 build(int loop, uint64_t thread_id, uint64_t num_threads, int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y); }; - class Span3 { + class ND4J_EXPORT Span3 { private: int64_t _startX, _stopX, _incX; int64_t _startY, _stopY, _incY; @@ -94,7 +94,7 @@ namespace samediff { static Span3 build(int loop, uint64_t thread_id, uint64_t num_threads, int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y, int64_t start_z, int64_t stop_z, int64_t inc_z); }; - class Threads { + class ND4J_EXPORT Threads { public: /** * This function executes 1 dimensional loop for a given number of threads diff --git a/libnd4j/include/execution/Ticket.h b/libnd4j/include/execution/Ticket.h index e4152b66a..80bf54145 100644 --- a/libnd4j/include/execution/Ticket.h +++ b/libnd4j/include/execution/Ticket.h @@ -29,7 +29,7 @@ #include namespace samediff { - class Ticket { + class ND4J_EXPORT Ticket { private: bool _acquired = false; std::vector*> _queues; diff --git a/libnd4j/include/graph/Variable.h b/libnd4j/include/graph/Variable.h index 2e0053176..60f977e97 100644 --- a/libnd4j/include/graph/Variable.h +++ b/libnd4j/include/graph/Variable.h @@ -64,7 +64,7 @@ namespace nd4j { Variable* clone(); template - Variable* asT(); + ND4J_EXPORT Variable* asT(); bool hasNDArray(); nd4j::NDArray* getNDArray(); diff --git a/libnd4j/include/graph/impl/Node.cpp b/libnd4j/include/graph/impl/Node.cpp index 795d9b7f0..9d2224d2f 100644 --- a/libnd4j/include/graph/impl/Node.cpp +++ b/libnd4j/include/graph/impl/Node.cpp @@ -311,7 +311,7 @@ namespace nd4j { node->_dataType = DataTypeUtils::fromT(); return node; } - BUILD_SINGLE_TEMPLATE(template Node* Node::asT, (), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT Node* Node::asT, (), LIBND4J_TYPES); nd4j::graph::Node::Node(nd4j::ops::DeclarableOp *customOp, int id, std::initializer_list input, std::initializer_list output, std::initializer_list dimensions, float scalar, std::initializer_list tArgs, std::initializer_list iArgs) { this->_opType = OpType_CUSTOM; diff --git a/libnd4j/include/graph/impl/Variable.cpp b/libnd4j/include/graph/impl/Variable.cpp index e54112783..d77bded2e 100644 --- a/libnd4j/include/graph/impl/Variable.cpp +++ b/libnd4j/include/graph/impl/Variable.cpp @@ -50,7 +50,7 @@ namespace nd4j { return result; } - BUILD_SINGLE_TEMPLATE(template Variable* Variable::asT, (), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT Variable* Variable::asT, (), LIBND4J_TYPES); nd4j::graph::Variable* nd4j::graph::Variable::clone() { auto result = new Variable(this->isPlaceholder()); diff --git a/libnd4j/include/helpers/AttentionHelper.h b/libnd4j/include/helpers/AttentionHelper.h index a04b26ac8..186f959fd 100644 --- a/libnd4j/include/helpers/AttentionHelper.h +++ b/libnd4j/include/helpers/AttentionHelper.h @@ -24,7 +24,7 @@ #include "NDArray.h" namespace nd4j { - class AttentionHelper { + class ND4J_EXPORT AttentionHelper { public: static nd4j::NDArray multiHeadProject(const nd4j::NDArray* input, const nd4j::NDArray* projectionMatrix, nd4j::LaunchContext * context = nd4j::LaunchContext ::defaultContext()); diff --git a/libnd4j/include/helpers/BenchmarkHelper.h b/libnd4j/include/helpers/BenchmarkHelper.h index 58ed7e1b7..8dc946a2a 100644 --- a/libnd4j/include/helpers/BenchmarkHelper.h +++ b/libnd4j/include/helpers/BenchmarkHelper.h @@ -44,7 +44,7 @@ namespace nd4j { - class BenchmarkHelper { + class ND4J_EXPORT BenchmarkHelper { private: unsigned int _wIterations; unsigned int _rIterations; diff --git a/libnd4j/include/helpers/BitwiseUtils.h b/libnd4j/include/helpers/BitwiseUtils.h index e8990a34d..6defc4c49 100644 --- a/libnd4j/include/helpers/BitwiseUtils.h +++ b/libnd4j/include/helpers/BitwiseUtils.h @@ -28,7 +28,7 @@ #include namespace nd4j { - class BitwiseUtils { + class ND4J_EXPORT BitwiseUtils { public: diff --git a/libnd4j/include/helpers/CudaLaunchHelper.h b/libnd4j/include/helpers/CudaLaunchHelper.h index c8c22383c..9fec14764 100644 --- a/libnd4j/include/helpers/CudaLaunchHelper.h +++ b/libnd4j/include/helpers/CudaLaunchHelper.h @@ -28,7 +28,7 @@ #include namespace nd4j { - class CudaLaunchHelper { + class ND4J_EXPORT CudaLaunchHelper { public: static Triple getFlatLaunchParams(Nd4jLong length, int SM, int CORES, int SHARED_MEMORY); static int getReductionBlocks(Nd4jLong xLength, int blockSize = 512); diff --git a/libnd4j/include/helpers/DebugHelper.h b/libnd4j/include/helpers/DebugHelper.h index a932ac759..945bebe8e 100644 --- a/libnd4j/include/helpers/DebugHelper.h +++ b/libnd4j/include/helpers/DebugHelper.h @@ -40,7 +40,7 @@ #include namespace nd4j { class NDArray; - class DebugHelper { + class ND4J_EXPORT DebugHelper { public: // cuda-specific debug functions diff --git a/libnd4j/include/helpers/GradCheck.h b/libnd4j/include/helpers/GradCheck.h index cda0b5eae..32f66109a 100644 --- a/libnd4j/include/helpers/GradCheck.h +++ b/libnd4j/include/helpers/GradCheck.h @@ -27,7 +27,7 @@ namespace nd4j { -class GradCheck { +class ND4J_EXPORT GradCheck { public: enum LossFunc {MEAN = 0, SUM = 1}; diff --git a/libnd4j/include/helpers/MmulHelper.h b/libnd4j/include/helpers/MmulHelper.h index ff0a7d1b2..76244d050 100644 --- a/libnd4j/include/helpers/MmulHelper.h +++ b/libnd4j/include/helpers/MmulHelper.h @@ -25,7 +25,7 @@ #include "NDArray.h" namespace nd4j { - class MmulHelper { + class ND4J_EXPORT MmulHelper { private: diff --git a/libnd4j/include/helpers/OmpLaunchHelper.h b/libnd4j/include/helpers/OmpLaunchHelper.h index 1001d6163..dac93cbe2 100644 --- a/libnd4j/include/helpers/OmpLaunchHelper.h +++ b/libnd4j/include/helpers/OmpLaunchHelper.h @@ -28,7 +28,7 @@ namespace nd4j { -class OmpLaunchHelper { +class ND4J_EXPORT OmpLaunchHelper { public: diff --git a/libnd4j/include/helpers/PointersManager.h b/libnd4j/include/helpers/PointersManager.h index b0cc931ff..50fdbccf9 100644 --- a/libnd4j/include/helpers/PointersManager.h +++ b/libnd4j/include/helpers/PointersManager.h @@ -30,7 +30,7 @@ namespace nd4j { -class PointersManager { +class ND4J_EXPORT PointersManager { private: diff --git a/libnd4j/include/helpers/RandomLauncher.h b/libnd4j/include/helpers/RandomLauncher.h index 24921dc21..2e477e079 100644 --- a/libnd4j/include/helpers/RandomLauncher.h +++ b/libnd4j/include/helpers/RandomLauncher.h @@ -24,7 +24,7 @@ #include namespace nd4j { - class RandomLauncher { + class ND4J_EXPORT RandomLauncher { public: static void applyDropOut(nd4j::LaunchContext *context, nd4j::graph::RandomGenerator& rng, NDArray *array, double retainProb, NDArray* z = nullptr); static void applyInvertedDropOut(nd4j::LaunchContext *context, nd4j::graph::RandomGenerator& rng, NDArray *array, double retainProb, NDArray* z = nullptr); diff --git a/libnd4j/include/helpers/ShapeUtils.h b/libnd4j/include/helpers/ShapeUtils.h index 74719dabb..5f76c11b5 100644 --- a/libnd4j/include/helpers/ShapeUtils.h +++ b/libnd4j/include/helpers/ShapeUtils.h @@ -26,7 +26,7 @@ namespace nd4j { - class ShapeUtils { + class ND4J_EXPORT ShapeUtils { public: diff --git a/libnd4j/include/helpers/SimpleReadWriteLock.h b/libnd4j/include/helpers/SimpleReadWriteLock.h index cb82e7348..b7637f355 100644 --- a/libnd4j/include/helpers/SimpleReadWriteLock.h +++ b/libnd4j/include/helpers/SimpleReadWriteLock.h @@ -23,6 +23,7 @@ #include #include +#include /** * This class provides PRIMITIVE read-write lock, and should NOT be used outside of GraphServer due to its inefficiency. @@ -31,7 +32,7 @@ * Basic idea: write lock won't be obtained before all read requests served */ namespace nd4j { - class SimpleReadWriteLock { + class ND4J_EXPORT SimpleReadWriteLock { private: std::atomic _read_locks; std::atomic _write_locks; diff --git a/libnd4j/include/helpers/StringUtils.h b/libnd4j/include/helpers/StringUtils.h index 9891661ad..1a450450f 100644 --- a/libnd4j/include/helpers/StringUtils.h +++ b/libnd4j/include/helpers/StringUtils.h @@ -27,7 +27,7 @@ #include namespace nd4j { - class StringUtils { + class ND4J_EXPORT StringUtils { public: template static FORCEINLINE std::string valueToString(T value) { diff --git a/libnd4j/include/helpers/cublasHelper.h b/libnd4j/include/helpers/cublasHelper.h index 94cd2446b..53d30abf6 100644 --- a/libnd4j/include/helpers/cublasHelper.h +++ b/libnd4j/include/helpers/cublasHelper.h @@ -27,7 +27,7 @@ #include namespace nd4j { - class CublasHelper { + class ND4J_EXPORT CublasHelper { private: static CublasHelper *_INSTANCE; static std::mutex _mutex; diff --git a/libnd4j/include/memory/MemoryRegistrator.h b/libnd4j/include/memory/MemoryRegistrator.h index 8bf5918a6..53e97d35e 100644 --- a/libnd4j/include/memory/MemoryRegistrator.h +++ b/libnd4j/include/memory/MemoryRegistrator.h @@ -24,10 +24,11 @@ #include "Workspace.h" #include #include +#include namespace nd4j { namespace memory { - class MemoryRegistrator { + class ND4J_EXPORT MemoryRegistrator { protected: static MemoryRegistrator* _INSTANCE; Workspace* _workspace; diff --git a/libnd4j/include/memory/MemoryReport.h b/libnd4j/include/memory/MemoryReport.h index 863c439ee..636178d45 100644 --- a/libnd4j/include/memory/MemoryReport.h +++ b/libnd4j/include/memory/MemoryReport.h @@ -22,10 +22,11 @@ #define LIBND4J_MEMORYREPORT_H #include +#include namespace nd4j { namespace memory { - class MemoryReport { + class ND4J_EXPORT MemoryReport { private: Nd4jLong _vm = 0; Nd4jLong _rss = 0; diff --git a/libnd4j/include/memory/MemoryUtils.h b/libnd4j/include/memory/MemoryUtils.h index 985ca466d..5fe27898c 100644 --- a/libnd4j/include/memory/MemoryUtils.h +++ b/libnd4j/include/memory/MemoryUtils.h @@ -22,10 +22,11 @@ #define LIBND4J_MEMORYUTILS_H #include "MemoryReport.h" +#include namespace nd4j { namespace memory { - class MemoryUtils { + class ND4J_EXPORT MemoryUtils { public: static bool retrieveMemoryStatistics(MemoryReport& report); }; diff --git a/libnd4j/include/ops/BroadcastBoolOpsTuple.h b/libnd4j/include/ops/BroadcastBoolOpsTuple.h index 9bffc1198..7b0f96505 100644 --- a/libnd4j/include/ops/BroadcastBoolOpsTuple.h +++ b/libnd4j/include/ops/BroadcastBoolOpsTuple.h @@ -22,9 +22,10 @@ #define DEV_TESTS_BROADCASTBOOLOPSTUPLE_H #include +#include namespace nd4j { - class BroadcastBoolOpsTuple { + class ND4J_EXPORT BroadcastBoolOpsTuple { private: public: diff --git a/libnd4j/include/ops/BroadcastIntOpsTuple.h b/libnd4j/include/ops/BroadcastIntOpsTuple.h index df40907a9..c96244b1a 100644 --- a/libnd4j/include/ops/BroadcastIntOpsTuple.h +++ b/libnd4j/include/ops/BroadcastIntOpsTuple.h @@ -22,9 +22,10 @@ #define DEV_TESTS_BROADCASTINTOPSTUPLE_H #include +#include namespace nd4j { - class BroadcastIntOpsTuple { + class ND4J_EXPORT BroadcastIntOpsTuple { private: public: diff --git a/libnd4j/include/ops/BroadcastOpsTuple.h b/libnd4j/include/ops/BroadcastOpsTuple.h index 0450e50ab..256e37341 100644 --- a/libnd4j/include/ops/BroadcastOpsTuple.h +++ b/libnd4j/include/ops/BroadcastOpsTuple.h @@ -22,9 +22,10 @@ #define DEV_TESTS_BROADCASTOPSTUPLE_H #include +#include namespace nd4j { - class BroadcastOpsTuple { + class ND4J_EXPORT BroadcastOpsTuple { private: public: diff --git a/libnd4j/include/ops/declarable/generic/helpers/BroadcastHelper.h b/libnd4j/include/ops/declarable/generic/helpers/BroadcastHelper.h index 997857bf3..5e91641ca 100644 --- a/libnd4j/include/ops/declarable/generic/helpers/BroadcastHelper.h +++ b/libnd4j/include/ops/declarable/generic/helpers/BroadcastHelper.h @@ -108,6 +108,9 @@ namespace nd4j { if (!x->isScalar() && !y->isScalar() && x->isSameShape(y)) { x->applyPairwiseTransform(op.p, y, z, nullptr); + } else if (ShapeUtils::areShapesBroadcastable(*x, *y)) { + x->applyTrueBroadcast(op, y, z, true, extraArgs); + return z; } else if (!x->isScalar() && y->isScalar()) { x->applyScalarArr(op.s, const_cast(y), z); } else if (x->isScalar() && !y->isScalar()) { diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/adjust_contrast.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/adjust_contrast.cpp index cc11eedca..d790dd9c2 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/adjust_contrast.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/adjust_contrast.cpp @@ -32,6 +32,10 @@ CONFIGURABLE_OP_IMPL(adjust_contrast, 1, 1, true, -2, 0) { auto input = INPUT_VARIABLE(0); auto output = OUTPUT_VARIABLE(0); + // just skip op if input is empty + if (input->isEmpty()) + return Status::OK(); + REQUIRE_TRUE(block.numT() > 0 || block.width() > 1, 0, "ADJUST_CONTRAST: Scale factor required"); const double factor = block.width() > 1 ? INPUT_VARIABLE(1)->e(0) : T_ARG(0); @@ -70,6 +74,10 @@ DECLARE_TYPES(adjust_contrast) { const double factor = block.width() > 1 ? INPUT_VARIABLE(1)->e(0) : T_ARG(0); + // just skip op if input is empty + if (input->isEmpty()) + return Status::OK(); + REQUIRE_TRUE(input->rankOf() > 2, 0, "ADJUST_CONTRAST_V2: op expects rank of input array to be >= 3, but got %i instead", input->rankOf()); REQUIRE_TRUE(input->sizeAt(-1) == 3, 0, "ADJUST_CONTRAST_V2: operation expects image with 3 channels (R, G, B), but got %i instead", input->sizeAt(-1)); diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/adjust_hue.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/adjust_hue.cpp index 16062769a..d1d81acf8 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/adjust_hue.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/adjust_hue.cpp @@ -35,6 +35,10 @@ CONFIGURABLE_OP_IMPL(adjust_hue, 1, 1, true, 1, -2) { auto input = INPUT_VARIABLE(0); auto output = OUTPUT_VARIABLE(0); + // just skip op if input is empty + if (input->isEmpty()) + return Status::OK(); + const int rank = input->rankOf(); const int dimC = block.getIArguments()->size() > 0 ? (INT_ARG(0) >= 0 ? INT_ARG(0) : INT_ARG(0) + rank) : rank - 1; const double delta = T_ARG(0); diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/adjust_saturation.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/adjust_saturation.cpp index b4472bef5..5030e5952 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/adjust_saturation.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/adjust_saturation.cpp @@ -33,6 +33,10 @@ CONFIGURABLE_OP_IMPL(adjust_saturation, 1, 1, true, 1, -2) { auto input = INPUT_VARIABLE(0); auto output = OUTPUT_VARIABLE(0); + // just skip op if input is empty + if (input->isEmpty()) + return Status::OK(); + const int rank = input->rankOf(); const int dimC = block.getIArguments()->size() > 0 ? (INT_ARG(0) >= 0 ? INT_ARG(0) : INT_ARG(0) + rank) : rank - 1; const double factor = T_ARG(0); diff --git a/libnd4j/include/ops/declarable/generic/shape/create.cpp b/libnd4j/include/ops/declarable/generic/shape/create.cpp index e743a5cad..c87f63a56 100644 --- a/libnd4j/include/ops/declarable/generic/shape/create.cpp +++ b/libnd4j/include/ops/declarable/generic/shape/create.cpp @@ -25,6 +25,7 @@ namespace nd4j { namespace ops { + CUSTOM_OP_IMPL(create, 1, 1, false, 0, 1) { auto init = block.numB() > 0 ? B_ARG(0) : true; diff --git a/libnd4j/include/ops/declarable/helpers/activations.h b/libnd4j/include/ops/declarable/helpers/activations.h index 67d80f3c2..331170369 100644 --- a/libnd4j/include/ops/declarable/helpers/activations.h +++ b/libnd4j/include/ops/declarable/helpers/activations.h @@ -27,23 +27,23 @@ namespace nd4j { namespace ops { namespace helpers { - void softMaxForVector(nd4j::LaunchContext * context, const NDArray &input, NDArray &output); + ND4J_EXPORT void softMaxForVector(nd4j::LaunchContext * context, const NDArray &input, NDArray &output); - void logSoftMaxForVector(nd4j::LaunchContext * context, const NDArray &input, NDArray &output); + ND4J_EXPORT void logSoftMaxForVector(nd4j::LaunchContext * context, const NDArray &input, NDArray &output); - void softmax(nd4j::LaunchContext * context, const NDArray &input, NDArray &output, const int dimension); + ND4J_EXPORT void softmax(nd4j::LaunchContext * context, const NDArray &input, NDArray &output, const int dimension); - void logSoftmax(nd4j::LaunchContext * context, const NDArray &input, NDArray &output, const int dimension); + ND4J_EXPORT void logSoftmax(nd4j::LaunchContext * context, const NDArray &input, NDArray &output, const int dimension); - void softmaxDerivative(nd4j::LaunchContext * context, const NDArray& input, NDArray& output, const int dimension); + ND4J_EXPORT void softmaxDerivative(nd4j::LaunchContext * context, const NDArray& input, NDArray& output, const int dimension); - void prelu(nd4j::LaunchContext * context, const NDArray &input, const NDArray &alpha, NDArray &output); + ND4J_EXPORT void prelu(nd4j::LaunchContext * context, const NDArray &input, const NDArray &alpha, NDArray &output); - void preluBP(nd4j::LaunchContext * context, const NDArray &input, const NDArray &alpha, const NDArray &dLdO, NDArray &dLdI, NDArray &dLdA); + ND4J_EXPORT void preluBP(nd4j::LaunchContext * context, const NDArray &input, const NDArray &alpha, const NDArray &dLdO, NDArray &dLdI, NDArray &dLdA); - void thresholdRelu(nd4j::LaunchContext * context, const NDArray &input, double threshold, NDArray &output); + ND4J_EXPORT void thresholdRelu(nd4j::LaunchContext * context, const NDArray &input, double threshold, NDArray &output); - void thresholdReluDerivative(nd4j::LaunchContext * context, NDArray *input, double threshold, NDArray* dLdO, NDArray *output); + ND4J_EXPORT void thresholdReluDerivative(nd4j::LaunchContext * context, NDArray *input, double threshold, NDArray* dLdO, NDArray *output); } } } diff --git a/libnd4j/include/ops/declarable/helpers/col2im.h b/libnd4j/include/ops/declarable/helpers/col2im.h index 793da4798..66d7a684a 100644 --- a/libnd4j/include/ops/declarable/helpers/col2im.h +++ b/libnd4j/include/ops/declarable/helpers/col2im.h @@ -27,7 +27,7 @@ namespace nd4j { namespace ops { namespace helpers { - void col2im(nd4j::LaunchContext & context, const NDArray& input, NDArray& output, const int sH, const int sW, const int pH, const int pW, const int iH, const int iW, const int dH, const int dW); + ND4J_EXPORT void col2im(nd4j::LaunchContext & context, const NDArray& input, NDArray& output, const int sH, const int sW, const int pH, const int pW, const int iH, const int iW, const int dH, const int dW); } diff --git a/libnd4j/include/ops/declarable/helpers/convolutions.h b/libnd4j/include/ops/declarable/helpers/convolutions.h index 65544960a..68b39cfd5 100644 --- a/libnd4j/include/ops/declarable/helpers/convolutions.h +++ b/libnd4j/include/ops/declarable/helpers/convolutions.h @@ -23,6 +23,7 @@ #include #include +#include #include @@ -35,7 +36,7 @@ namespace nd4j { PNORM_POOL = 2, }; - class ConvolutionUtils { + class ND4J_EXPORT ConvolutionUtils { public: static inline void calcOutSizePool2D(int& oH, int& oW, const int kH, const int kW, const int sH, const int sW, const int pH, const int pW, const int dH, const int dW, const int iH, const int iW, const int paddingMode) { diff --git a/libnd4j/include/ops/declarable/helpers/im2col.h b/libnd4j/include/ops/declarable/helpers/im2col.h index 04559e494..f484c9bc4 100644 --- a/libnd4j/include/ops/declarable/helpers/im2col.h +++ b/libnd4j/include/ops/declarable/helpers/im2col.h @@ -27,7 +27,7 @@ namespace nd4j { namespace ops { namespace helpers { - void im2col(nd4j::LaunchContext & context, const NDArray& im, NDArray& col, const int kH, const int kW, const int sH, const int sW, const int pH, const int pW, const int dH, const int dW, const NDArray& arrZeroPadVal); + ND4J_EXPORT void im2col(nd4j::LaunchContext & context, const NDArray& im, NDArray& col, const int kH, const int kW, const int sH, const int sW, const int pH, const int pW, const int dH, const int dW, const NDArray& arrZeroPadVal); } } } diff --git a/libnd4j/include/ops/declarable/helpers/lstmLayer.h b/libnd4j/include/ops/declarable/helpers/lstmLayer.h index 7d94c32e0..d0bc16b66 100644 --- a/libnd4j/include/ops/declarable/helpers/lstmLayer.h +++ b/libnd4j/include/ops/declarable/helpers/lstmLayer.h @@ -29,13 +29,13 @@ namespace ops { namespace helpers { ////////////////////////////////////////////////////////////////////////// -void lstmLayerCell(const NDArray* x, const NDArray* Wx, const NDArray* Wr, +void ND4J_EXPORT lstmLayerCell(const NDArray* x, const NDArray* Wx, const NDArray* Wr, const NDArray* b, const NDArray* hI, const NDArray* cI, const NDArray* Wp, const std::vector& params, NDArray* h, NDArray* c); ////////////////////////////////////////////////////////////////////////// -void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr, +void ND4J_EXPORT lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr, const NDArray* b, const NDArray* seqLen, const NDArray* hI, const NDArray* cI, const NDArray* Wp, const std::vector& params, const bool forward, diff --git a/libnd4j/include/ops/declarable/helpers/multiUnique.h b/libnd4j/include/ops/declarable/helpers/multiUnique.h index 587ce44f0..12fa6db10 100644 --- a/libnd4j/include/ops/declarable/helpers/multiUnique.h +++ b/libnd4j/include/ops/declarable/helpers/multiUnique.h @@ -26,7 +26,7 @@ namespace nd4j { namespace ops { namespace helpers { - bool multiUnique(std::vector const& inputList, nd4j::memory::Workspace* workspace = nullptr); + ND4J_EXPORT bool multiUnique(std::vector const& inputList, nd4j::memory::Workspace* workspace = nullptr); } } diff --git a/libnd4j/tests_cpu/layers_tests/BroadcastableOpsTests.cpp b/libnd4j/tests_cpu/layers_tests/BroadcastableOpsTests.cpp index 33a8fa10a..ffa19412a 100644 --- a/libnd4j/tests_cpu/layers_tests/BroadcastableOpsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/BroadcastableOpsTests.cpp @@ -774,8 +774,27 @@ TEST_F(BroadcastableOpsTests, broadcast_bool_2) { ASSERT_TRUE(z.equalsTo(e)); } -TEST_F(BroadcastableOpsTests, broadcast_2) { +TEST_F(BroadcastableOpsTests, broadcast_bool_3) { + auto x = NDArrayFactory::create(0); + auto y = NDArrayFactory::create('c', {3}, {2, 1, 2}); + NDArray z('c', {3}, nd4j::DataType::BOOL); + NDArray e('c', {3}, nd4j::DataType::BOOL); + + e.assign(true); + + nd4j::ops::less op; + auto status = op.execute({&x, &y}, {&z}, {}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, status); + + // z.printIndexedBuffer("Z"); + + ASSERT_TRUE(z.isSameShape(e)); + ASSERT_TRUE(z.equalsTo(e)); +} + +TEST_F(BroadcastableOpsTests, broadcast_2) { NDArray x('c', {3, 1, 2}, nd4j::DataType::FLOAT32); NDArray y('c', {2, 2}, nd4j::DataType::FLOAT32); NDArray z('c', {3, 2, 2}, nd4j::DataType::FLOAT32); @@ -797,3 +816,19 @@ TEST_F(BroadcastableOpsTests, broadcast_2) { ASSERT_TRUE(z.equalsTo(e)); } +TEST_F(BroadcastableOpsTests, broadcast_3) { + auto x = NDArrayFactory::create(0); + auto y = NDArrayFactory::create('c', {3}, {2, 1, 2}); + NDArray z('c', {3}, nd4j::DataType::INT32); + auto e = NDArrayFactory::create('c', {3}, {2, 1, 2}); + + nd4j::ops::add op; + auto status = op.execute({&x, &y}, {&z}, {}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, status); + + // z.printIndexedBuffer("Z"); + + ASSERT_TRUE(z.isSameShape(e)); + ASSERT_TRUE(z.equalsTo(e)); +} diff --git a/libnd4j/tests_cpu/layers_tests/CMakeLists.txt b/libnd4j/tests_cpu/layers_tests/CMakeLists.txt index 1d5a1df98..52fa0ca17 100644 --- a/libnd4j/tests_cpu/layers_tests/CMakeLists.txt +++ b/libnd4j/tests_cpu/layers_tests/CMakeLists.txt @@ -29,7 +29,7 @@ if (CUDA_BLAS) if(WIN32) message("CUDA on Windows: enabling /EHsc") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /FS /w") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /FS") SET_TARGET_PROPERTIES(${LIBND4J_NAME} PROPERTIES COMPILER_FLAGS "/EHsc") endif() diff --git a/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu b/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu index c8b6fa1d9..593d47bb5 100644 --- a/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu +++ b/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu @@ -97,6 +97,8 @@ TEST_F(CudaBasicsTests1, TestPairwise_1) { cudaMemcpyAsync(devBufferPtrX, x.buffer(), x.lengthOf() * x.sizeOfT(), cudaMemcpyHostToDevice, *stream); cudaMemcpyAsync(devShapePtrX, x.shapeInfo(), shape::shapeInfoByteLength(x.shapeInfo()), cudaMemcpyHostToDevice, *stream); + res = cudaStreamSynchronize(*stream); + ASSERT_EQ(0, res); LaunchContext lc(stream, nullptr, nullptr); NativeOpExecutioner::execPairwiseTransform(&lc, pairwise::Add, nullptr, x.shapeInfo(), devBufferPtrX, reinterpret_cast(devShapePtrX), nullptr, x.shapeInfo(), devBufferPtrX, reinterpret_cast(devShapePtrX), nullptr, z.shapeInfo(), devBufferPtrZ, reinterpret_cast(devShapePtrX), nullptr); @@ -117,6 +119,7 @@ TEST_F(CudaBasicsTests1, TestPairwise_1) { z.tickWriteHost(); for (int e = 0; e < z.lengthOf(); e++) { + nd4j_printf("step %i\n", e); ASSERT_NEAR(exp.e(e), z.e(e), 1e-5); } } @@ -169,6 +172,8 @@ TEST_F(CudaBasicsTests1, execIndexReduceScalar_1) { void* reductionPointer = nullptr; cudaResult = cudaMalloc(reinterpret_cast(&reductionPointer), 1024*1024); ASSERT_EQ(0, cudaResult); + cudaResult = cudaMemset(reductionPointer, 0, 1024 * 1024); + ASSERT_EQ(0, cudaResult); LaunchContext lc(&stream, LaunchContext::defaultContext()->getReductionPointer(), LaunchContext::defaultContext()->getScalarPointer(), LaunchContext::defaultContext()->getAllocationPointer()); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp index 2fbd42af7..67cd56d5e 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp @@ -1576,32 +1576,32 @@ TEST_F(DeclarableOpsTests6, LogDet_3) { //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests6, MatrixInverse_1) { - auto x = NDArrayFactory::create('c', {2, 5, 5}, { - 2., 4., 60., 8., 10., - 0., 1., 2., 3., 4., - 0., 0., 2., 4., 6., - 0., 0., 0., 1., 2., - 0., 0., 0., 0., 4., + auto x = NDArrayFactory::create('c', {2, 5, 5}, { + 2.f, 4.f, 60.f, 8.f, 10.f, + 0.f, 1.f, 2.f, 3.f, 4.f, + 0.f, 0.f, 2.f, 4.f, 6.f, + 0.f, 0.f, 0.f, 1.f, 2.f, + 0.f, 0.f, 0.f, 0.f, 4.f, - 1., 0., 0., 0., 0., - 2., 1., 0., 0., 0., - 30., 2., 1., 0., 0., - 4., 3., 2., 1., 0., - 5., 4., 3., 2., 1., + 1.f, 0.f, 0.f, 0.f, 0.f, + 2.f, 1.f, 0.f, 0.f, 0.f, + 30.f, 2.f, 1.f, 0.f, 0.f, + 4.f, 3.f, 2.f, 1.f, 0.f, + 5.f, 4.f, 3.f, 2.f, 1.f }); - auto exp = NDArrayFactory::create('c', {2, 5, 5}, { - 0.5, -2.0, -13.0, 54.0, -6.75, - 0.0, 1.0, -1.0, 1.0, 0.0, - 0, 0, 0.5, -2.0, 0.25, - 0, 0, 0, 1.0, -0.5, - 0, 0, 0, 0, 0.25, + auto exp = NDArrayFactory::create('c', {2, 5, 5}, { + 0.5f, -2.0f, -13.0f, 54.0f, -6.75f, + 0.0f, 1.0f, -1.0f, 1.0f, 0.0f, + 0.f, 0.f, 0.5f, -2.0f, 0.25f, + 0.f, 0.f, 0.f, 1.0f, -0.5f, + 0.f, 0.f, 0.f, 0.f, 0.25f, - 1.0, 0.0, 0.0, 0.0, 0., - -2.0, 1.0, 0., 0., 0., - -26.0, -2.0, 1, 0, 0., - 54.0, 1.0, -2.0, 1, 0., - -27.0, 0.0, 1.0, -2.0, 1. + 1.0f, 0.0f, 0.0f, 0.0f, 0.f, + -2.0f, 1.0f, 0.f, 0.f, 0.f, + -26.0f, -2.0f, 1.f, 0.f, 0.f, + 54.0f, 1.0f, -2.0f, 1.f, 0.f, + -27.0f, 0.0f, 1.0f, -2.0f, 1.f, }); nd4j::ops::matrix_inverse op; @@ -1620,8 +1620,8 @@ TEST_F(DeclarableOpsTests6, MatrixInverse_1) { //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests6, MatrixInverse_010) { - auto x = NDArrayFactory::create('c', {1, 5, 5}, {1., 0., 0., 0., 0.,2., 1., 0., 0., 0.,30., 2., 1., 0., 0.,4., 3., 2., 1., 0.,5., 4., 3., 2., 1.,}); - auto exp = NDArrayFactory::create('c', {1, 5, 5}, {1.0, 0.0, 0.0, 0.0, 0.,-2.0, 1.0, 0., 0., 0.,-26.0, -2.0, 1, 0, 0.,54.0, 1.0, -2.0, 1, 0.,-27.0, 0.0, 1.0, -2.0, 1.}); + auto x = NDArrayFactory::create('c', {1, 5, 5}, {1.f, 0.f, 0.f, 0.f, 0.f, 2.f, 1.f, 0.f, 0.f, 0.f, 30.f, 2.f, 1.f, 0.f, 0.f, 4.f, 3.f, 2.f, 1.f, 0.f, 5.f, 4.f, 3.f, 2.f, 1.f, }); + auto exp = NDArrayFactory::create('c', {1, 5, 5}, {1.0f, 0.0f, 0.0f, 0.0f, 0.f, -2.0f, 1.0f, 0.f, 0.f, 0.f, -26.0f, -2.0f, 1.f, 0.f, 0.f, 54.0f, 1.0f, -2.0f, 1.f, 0.f, -27.0f, 0.0f, 1.0f, -2.0f, 1.f}); nd4j::ops::matrix_inverse op; auto result = op.execute({&x}, {}, {}, {}, false, nd4j::DataType::FLOAT32); @@ -1639,9 +1639,9 @@ TEST_F(DeclarableOpsTests6, MatrixInverse_010) { //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests6, MatrixInverse_01) { - auto x = NDArrayFactory::create('c', {1, 5, 5}, {2., 4., 60., 8., 10., 0., 1., 2., 3., 4., 0., 0., 2., 4., 6., 0., 0., 0., 1., 2., 0., 0., 0., 0., 4. }); + auto x = NDArrayFactory::create('c', {1, 5, 5}, {2.f, 4.f, 60.f, 8.f, 10.f, 0.f, 1.f, 2.f, 3.f, 4.f, 0.f, 0.f, 2.f, 4.f, 6.f, 0.f, 0.f, 0.f, 1.f, 2.f, 0.f, 0.f, 0.f, 0.f, 4.f }); - auto exp = NDArrayFactory::create('c', {1, 5, 5}, {0.5, -2.0, -13.0, 54.0, -6.75, 0.0, 1.0, -1.0, 1.0, 0.0, 0, 0, 0.5, -2.0, 0.25, 0, 0, 0, 1.0, -0.5, 0, 0, 0, 0, 0.25 }); + auto exp = NDArrayFactory::create('c', {1, 5, 5}, {0.5f, -2.0f, -13.0f, 54.0f, -6.75f, 0.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.f, 0.f, 0.5f, -2.0f, 0.25f, 0.f, 0.f, 0.f, 1.0f, -0.5f, 0.f, 0.f, 0.f, 0.f, 0.25f }); nd4j::ops::matrix_inverse op; auto result = op.execute({&x}, {}, {}, {}, false, nd4j::DataType::FLOAT32); @@ -1658,8 +1658,8 @@ TEST_F(DeclarableOpsTests6, MatrixInverse_01) { //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests6, MatrixInverse_02) { - auto x = NDArrayFactory::create('c', {1, 5, 5}, {1., 0., 0., 0., 0., 2., 1., 0., 0., 0., 30., 2., 1., 0., 0., 4., 3., 2., 1., 0., 5., 4., 3., 2., 1. }); - auto exp = NDArrayFactory::create('c', {1, 5, 5}, {1.0, 0.0, 0.0, 0.0, 0., -2.0, 1.0, 0., 0., 0., -26.0, -2.0, 1, 0, 0., 54.0, 1.0, -2.0, 1, 0., -27.0, 0.0, 1.0, -2.0, 1. }); + auto x = NDArrayFactory::create('c', {1, 5, 5}, {1.f, 0.f, 0.f, 0.f, 0.f, 2.f, 1.f, 0.f, 0.f, 0.f, 30.f, 2.f, 1.f, 0.f, 0.f, 4.f, 3.f, 2.f, 1.f, 0.f, 5.f, 4.f, 3.f, 2.f, 1.f }); + auto exp = NDArrayFactory::create('c', {1, 5, 5}, {1.0f, 0.0f, 0.0f, 0.0f, 0.f, -2.0f, 1.0f, 0.f, 0.f, 0.f, -26.0f, -2.0f, 1.f, 0.f, 0.f, 54.0f, 1.0f, -2.0f, 1.f, 0.f, -27.0f, 0.0f, 1.0f, -2.0f, 1.f }); nd4j::ops::matrix_inverse op; auto result = op.execute({&x}, {}, {}, {}, false, nd4j::DataType::FLOAT32); diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/broadcast/BasicBroadcastTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/broadcast/BasicBroadcastTests.java index d9057c95a..7b72f4bae 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/broadcast/BasicBroadcastTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/broadcast/BasicBroadcastTests.java @@ -24,6 +24,7 @@ import org.junit.runners.Parameterized; import org.nd4j.linalg.BaseNd4jTest; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.impl.transforms.custom.LessThan; import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.AddOp; import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.RealDivOp; import org.nd4j.linalg.factory.Nd4j; @@ -288,6 +289,30 @@ public class BasicBroadcastTests extends BaseNd4jTest { } } + @Test + public void testLt(){ + INDArray x = Nd4j.scalar(0); + INDArray y = Nd4j.createFromArray(2,1,2); + + INDArray result = Nd4j.create(DataType.BOOL, 3); + INDArray lt = Nd4j.exec(new LessThan(x,y,result))[0]; + + INDArray exp = Nd4j.createFromArray(true, true, true); + assertEquals(exp, lt); + } + + @Test + public void testAdd(){ + INDArray x = Nd4j.scalar(0); + INDArray y = Nd4j.createFromArray(2,1,2); + + INDArray result = Nd4j.create(DataType.INT, 3); + INDArray sum = Nd4j.exec(new AddOp(x,y,result))[0]; + + INDArray exp = Nd4j.createFromArray(2, 1, 2); + assertEquals(exp, sum); + } + @Override public char ordering() { return 'c';