diff --git a/libnd4j/CMakeLists.txt b/libnd4j/CMakeLists.txt index d8b0439b4..6ee102cb3 100755 --- a/libnd4j/CMakeLists.txt +++ b/libnd4j/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.6) +cmake_minimum_required(VERSION 3.15) project(libnd4j) set(CMAKE_VERBOSE_MAKEFILE OFF) option(NATIVE "Optimize for build machine (might not work on others)" OFF) @@ -7,6 +7,17 @@ set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS OFF) option(BUILD_TESTS "Build tests" OFF) +option(FLATBUFFERS_BUILD_FLATC "Enable the build of the flatbuffers compiler" OFF) +set(FLATBUFFERS_BUILD_FLATC "OFF" CACHE STRING "Hack to disable flatc build" FORCE) + +set(CMAKE_CXX_STANDARD 11) +if (CUDA_BLAS) + enable_language(CUDA) + set(CMAKE_CUDA_STANDARD 11) +endif() + +# MSVC runtime lib can be either "MultiThreaded" or "MultiThreadedDLL", /MT and /MD respectively +set(MSVC_RT_LIB "MultiThreadedDLL") set(X86_BUILD false) @@ -17,23 +28,23 @@ endif() # -fsanitize=address # -fsanitize=leak if (ANDROID_BUILD) - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -fPIC -std=c++11 -Wno-braced-scalar-init -Wno-delete-non-virtual-dtor -Wno-unused-command-line-argument -Wno-dangling-else -D_RELEASE=true") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g -fPIC -std=c++11 -Wno-braced-scalar-init -Wno-delete-non-virtual-dtor -Wno-unused-command-line-argument -Wno-dangling-else") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -fPIC -Wno-braced-scalar-init -Wno-delete-non-virtual-dtor -Wno-unused-command-line-argument -Wno-dangling-else -D_RELEASE=true") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g -fPIC -Wno-braced-scalar-init -Wno-delete-non-virtual-dtor -Wno-unused-command-line-argument -Wno-dangling-else") elseif (APPLE) - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -fPIC -std=c++11 -Wno-braced-scalar-init -Wno-delete-non-virtual-dtor -Wno-unused-command-line-argument -Wno-dangling-else -D__APPLE_OS__=true -D_RELEASE=true") - set(CMAKE_CXX_FLAGS_DEBUG " -O0 -g -fPIC -std=c++11 -Wno-braced-scalar-init -Wno-delete-non-virtual-dtor -Wno-unused-command-line-argument -Wno-dangling-else -D__APPLE_OS__=true") + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -fPIC -Wno-braced-scalar-init -Wno-delete-non-virtual-dtor -Wno-unused-command-line-argument -Wno-dangling-else -D__APPLE_OS__=true -D_RELEASE=true") + set(CMAKE_CXX_FLAGS_DEBUG " -O0 -g -fPIC -Wno-braced-scalar-init -Wno-delete-non-virtual-dtor -Wno-unused-command-line-argument -Wno-dangling-else -D__APPLE_OS__=true") elseif(WIN32) set(X86_BUILD true) if (CUDA_BLAS) set(CMAKE_CXX_FLAGS_RELEASE "-D_RELEASE=true") set(CMAKE_CXX_FLAGS_DEBUG " /FS /EHsc") else() - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -fPIC -std=c++11 -fmax-errors=2 -D_RELEASE=true") - set(CMAKE_CXX_FLAGS_DEBUG " -g -O2 -fPIC -std=c++11 -fmax-errors=2") + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -fPIC -fmax-errors=2 -D_RELEASE=true") + set(CMAKE_CXX_FLAGS_DEBUG " -g -O2 -fPIC -fmax-errors=2") endif() else() - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -fPIC -std=c++11 -fmax-errors=2 -D_RELEASE=true") - set(CMAKE_CXX_FLAGS_DEBUG " -g -O0 -fPIC -std=c++11 -fmax-errors=2") + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -fPIC -fmax-errors=2 -D_RELEASE=true") + set(CMAKE_CXX_FLAGS_DEBUG " -g -O0 -fPIC -fmax-errors=2") if (CPU_BLAS) set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address") diff --git a/libnd4j/CMakeLists.txt.in b/libnd4j/CMakeLists.txt.in index 33946a014..8e8741c86 100644 --- a/libnd4j/CMakeLists.txt.in +++ b/libnd4j/CMakeLists.txt.in @@ -9,6 +9,7 @@ ExternalProject_Add(flatbuffers SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/flatbuffers-src" BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/flatbuffers-build" CONFIGURE_COMMAND "" + CMAKE_ARGS "-DFLATBUFFERS_BUILD_FLATC=OFF" BUILD_COMMAND "" INSTALL_COMMAND "" TEST_COMMAND "" diff --git a/libnd4j/blas/CMakeLists.txt b/libnd4j/blas/CMakeLists.txt index c86bdc13a..6d7944f14 100755 --- a/libnd4j/blas/CMakeLists.txt +++ b/libnd4j/blas/CMakeLists.txt @@ -136,116 +136,61 @@ if(CUDA_BLAS) add_definitions(-D__CUDABLAS__=true) if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") set (CMAKE_CXX_FLAGS "") - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - if ("${CMAKE_C_COMPILER_VERSION}" VERSION_GREATER 4.9 AND "$ENV{TRICK_NVCC}" STREQUAL "YES" AND CUDA_VERSION VERSION_LESS "8.0") - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__GNUC__=4 -D__GNUC_MINOR__=9 -D_FORCE_INLINES -D_MWAITXINTRIN_H_INCLUDED") - set (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler -std=c++11 -Dnullptr=NULL") - message("TRICKING CUDA INTO SUPPORTING GCC > 4.9 YOU ARE PROCEEDING AT YOUR OWN RISK") - endif() - endif() - - # we want OpenMP to be available for hybrid operations, at least for GCC - if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - find_package(OpenMP) - if (OPENMP_FOUND) - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - endif() endif() if (CUDA_FOUND) message("CUDA include directory: ${CUDA_INCLUDE_DIRS}") include_directories(${CUDA_INCLUDE_DIRS}) message("CUDA found!") - set( CUDA_ARCHITECTURE_MINIMUM "3.0" CACHE STRING "Minimum required CUDA compute capability" ) - SET(CUDA_VERBOSE_BUILD OFF) - SET(CUDA_SEPARABLE_COMPILATION OFF) - #set(CUDA_COMPUTE_CAPABILITY "61") - set(CUDA_COMPUTE_CAPABILITY "35") - # make NVCC more verbose to prevent timeouts on CI servers - #list(APPEND CUDA_NVCC_FLAGS -v) + if ("${EXPERIMENTAL}" STREQUAL "yes") message("Experimental mode ENABLED") - list(APPEND CUDA_NVCC_FLAGS -D__ND4J_EXPERIMENTAL__=true) - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__ND4J_EXPERIMENTAL__=true") - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__ND4J_EXPERIMENTAL__=true") - set (EXPM " -D__ND4J_EXPERIMENTAL__=true") + set(CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -D__ND4J_EXPERIMENTAL__=true") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__ND4J_EXPERIMENTAL__=true") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__ND4J_EXPERIMENTAL__=true") + set(EXPM " -D__ND4J_EXPERIMENTAL__=true") endif() - if (CMAKE_BUILD_TYPE STREQUAL "Release") - if(CUDA_VERSION VERSION_GREATER "9.2") # cuda 10 - if ("${COMPUTE}" STREQUAL "all") - if (APPLE) - list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -Xfatbin -compress-all -gencode arch=compute_30,code=sm_30 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60) - else() - list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -Xfatbin -compress-all -gencode arch=compute_30,code=sm_30 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70) - endif() + + # the only difference for debug mode here is host/device debug symbols + set(CMAKE_CUDA_FLAGS_DEBUG " -G -g") + + # we need -fPIC on Linux/GCC + if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") + message("Enabling fPIC...") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=-fPIC") + endif() + + if(CUDA_VERSION VERSION_GREATER "9.2") # cuda 10 + if ("${COMPUTE}" STREQUAL "all") + if (APPLE) + set(CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -DCUDA_10 ${EXPM} -w --cudart=static --expt-extended-lambda -Xfatbin -compress-all -gencode arch=compute_30,code=sm_30 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60") else() - list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w --cudart=static --expt-extended-lambda -O3 -Xfatbin -compress-all -arch=compute_${COMPUTE} -code=sm_${COMPUTE}) - endif() - elseif(CUDA_VERSION VERSION_GREATER "8.0") # cuda 9 - if ("${COMPUTE}" STREQUAL "all") - if (APPLE) - list(APPEND CUDA_NVCC_FLAGS -DCUDA_9 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -Xfatbin -compress-all -gencode arch=compute_30,code=sm_30 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60) - else() - list(APPEND CUDA_NVCC_FLAGS -DCUDA_9 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -Xfatbin -compress-all -gencode arch=compute_30,code=sm_30 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70) - endif() - else() - list(APPEND CUDA_NVCC_FLAGS -DCUDA_9 ${EXPM} -w --cudart=static --expt-extended-lambda -O3 -Xfatbin -compress-all -arch=compute_${COMPUTE} -code=sm_${COMPUTE}) - endif() - elseif (CUDA_VERSION VERSION_GREATER "7.5") # cuda 8.0 - if ("${COMPUTE}" STREQUAL "all") - list(APPEND CUDA_NVCC_FLAGS -DCUDA_8 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -Xfatbin -compress-all -gencode arch=compute_30,code=sm_30 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60) - else() - list(APPEND CUDA_NVCC_FLAGS -DCUDA_8 ${EXPM} -w --cudart=static --expt-extended-lambda -O3 -Xfatbin -compress-all -arch=compute_${COMPUTE} -code=sm_${COMPUTE}) + set(CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -DCUDA_10 ${EXPM} -w --cudart=static --expt-extended-lambda -Xfatbin -compress-all -gencode arch=compute_30,code=sm_30 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70") endif() else() - if ("${COMPUTE}" STREQUAL "all") - list(APPEND CUDA_NVCC_FLAGS -DCUDA_75 ${EXPM} --cudart=static --expt-extended-lambda -O3 -Xfatbin -compress-all -gencode arch=compute_30,code=sm_30 -gencode arch=compute_52,code=sm_52 ) - else() - list(APPEND CUDA_NVCC_FLAGS -DCUDA_75 ${EXPM} --cudart=static --expt-extended-lambda -O3 -Xfatbin -compress-all -arch=compute_${COMPUTE} -code=sm_${COMPUTE}) - endif() + set(CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -DCUDA_10 ${EXPM} -w --cudart=static --expt-extended-lambda -arch=compute_${COMPUTE} -code=sm_${COMPUTE}") + endif() + elseif(CUDA_VERSION VERSION_GREATER "8.0") # cuda 9 + if ("${COMPUTE}" STREQUAL "all") + if (APPLE) + set(CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -DCUDA_9 ${EXPM} -w --cudart=static --expt-extended-lambda -Xfatbin -compress-all -gencode arch=compute_30,code=sm_30 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60") + else() + set(CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -DCUDA_9 ${EXPM} -w --cudart=static --expt-extended-lambda -Xfatbin -compress-all -gencode arch=compute_30,code=sm_30 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70") + endif() + else() + set(CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -DCUDA_9 ${EXPM} -w --cudart=static --expt-extended-lambda -Xfatbin -compress-all -arch=compute_${COMPUTE} -code=sm_${COMPUTE}") + endif() + elseif (CUDA_VERSION VERSION_GREATER "7.5") # cuda 8.0 + if ("${COMPUTE}" STREQUAL "all") + set(CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -DCUDA_8 ${EXPM} -w --cudart=static --expt-extended-lambda -Xfatbin -compress-all -gencode arch=compute_30,code=sm_30 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60") + else() + set(CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -DCUDA_8 ${EXPM} -w --cudart=static --expt-extended-lambda --Xfatbin -compress-all -arch=compute_${COMPUTE} -code=sm_${COMPUTE}") endif() - else() - # debug only - if (LINUX) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--export-dynamic -rdynamic") - SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --export-dynamic") - endif() - - if(CUDA_VERSION VERSION_GREATER "9.2") # cuda 9 - message("CUDA 10 Debug build") - if ("${COMPUTE}" STREQUAL "all") - if (APPLE) - list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w -G -g --cudart=static --expt-extended-lambda -Xfatbin -compress-all -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62) - elseif() - list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w -G -g --cudart=static --expt-extended-lambda -Xfatbin -compress-all -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62 -gencode arch=compute_70,code=sm_70) - endif() - else() - list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w -G -g --cudart=static --expt-extended-lambda -Xfatbin -compress-all -arch=compute_${COMPUTE} -code=sm_${COMPUTE}) - endif() - elseif(CUDA_VERSION VERSION_GREATER "8.0") # cuda 9 - if ("${COMPUTE}" STREQUAL "all") - if (APPLE) - list(APPEND CUDA_NVCC_FLAGS -DCUDA_9 ${EXPM} -w -G -g --cudart=static --expt-extended-lambda -Xfatbin -compress-all -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62) - elseif() - list(APPEND CUDA_NVCC_FLAGS -DCUDA_9 ${EXPM} -w -G -g --cudart=static --expt-extended-lambda -Xfatbin -compress-all -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62 -gencode arch=compute_70,code=sm_70) - endif() - else() - list(APPEND CUDA_NVCC_FLAGS -DCUDA_9 ${EXPM} -w -G -g --cudart=static --expt-extended-lambda -Xfatbin -compress-all -arch=compute_${COMPUTE} -code=sm_${COMPUTE}) - endif() - elseif (CUDA_VERSION VERSION_GREATER "7.5") # cuda 8 - if ("${COMPUTE}" STREQUAL "all") - list(APPEND CUDA_NVCC_FLAGS -DCUDA_8 ${EXPM} -w -G -g --cudart=static --expt-extended-lambda -Xfatbin -compress-all -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62) - else() - list(APPEND CUDA_NVCC_FLAGS -DCUDA_8 ${EXPM} -w -G -g --cudart=static --expt-extended-lambda -Xfatbin -compress-all -arch=compute_${COMPUTE} -code=sm_${COMPUTE}) - endif() + if ("${COMPUTE}" STREQUAL "all") + set(CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -DCUDA_75 ${EXPM} -w --cudart=static --expt-extended-lambda -Xfatbin -compress-all -gencode arch=compute_30,code=sm_30 -gencode arch=compute_52,code=sm_52") else() - if ("${COMPUTE}" STREQUAL "all") - list(APPEND CUDA_NVCC_FLAGS -DCUDA_75 ${EXPM} -w -G -g --cudart=static --expt-extended-lambda -Xfatbin -compress-all -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_53,code=sm_53) - else() - list(APPEND CUDA_NVCC_FLAGS -DCUDA_75 ${EXPM} -w -G -g --cudart=static --expt-extended-lambda -Xfatbin -compress-all -arch=compute_${COMPUTE} -code=sm_${COMPUTE}) - endif() + set(CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -DCUDA_75 ${EXPM} -w --cudart=static --expt-extended-lambda -Xfatbin -compress-all -arch=compute_${COMPUTE} -code=sm_${COMPUTE}") endif() endif() @@ -264,30 +209,37 @@ if(CUDA_BLAS) file(GLOB_RECURSE LOOPS_SOURCES false ../include/loops/impl/*.cpp ../include/loops/*.h) file(GLOB_RECURSE LOOPS_SOURCES_CUDA false ../include/loops/*.cu) - - CUDA_ADD_LIBRARY(${LIBND4J_NAME} SHARED cuda/NativeOps.cu cuda/NativeOpExecutioner.cu cuda/BlasVersionHelper.cu Environment.cpp ${LOOPS_SOURCES_CUDA} + add_library(nd4jobj OBJECT cuda/NativeOps.cu cuda/NativeOpExecutioner.cu cuda/BlasVersionHelper.cu Environment.cpp ${LOOPS_SOURCES_CUDA} ${CUSTOMOPS_HELPERS_SOURCES} ${HELPERS_SOURCES} ${EXEC_SOURCES} ../include/cnpy/cnpy.cpp ../include/nd4jmemset.h ../include/nd4jmalloc.h cpu/GraphExecutioner.cpp cuda/NDArray.cu cpu/NDArrayFactory.cpp Environment.h ${LOOPS_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES} ${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${INDEXING_SOURCES} ${EXCEPTIONS_SOURCES} ${OPS_SOURCES} ${PERF_SOURCES}) + add_library(${LIBND4J_NAME} SHARED $) + + message("MSVC runtime for library: ${MSVC_RT_LIB}") + + # static library is built only if we're going to build tests, skip otherwise + if (BUILD_TESTS) + add_library(${LIBND4J_NAME}static STATIC $) + set_property(TARGET ${LIBND4J_NAME}static PROPERTY MSVC_RUNTIME_LIBRARY "${MSVC_RT_LIB}$<$:Debug>") + install(TARGETS ${LIBND4J_NAME}static DESTINATION .) + endif() + + # on windows we want to make sure we use MT or MD, but since we use it in one lib, we must use it everywhere to avoid conflicts + set_property(TARGET nd4jobj PROPERTY MSVC_RUNTIME_LIBRARY "${MSVC_RT_LIB}$<$:Debug>") + set_property(TARGET ${LIBND4J_NAME} PROPERTY MSVC_RUNTIME_LIBRARY "${MSVC_RT_LIB}$<$:Debug>") + if(WIN32) message("CUDA on Windows: enabling /EHsc") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /bigobj /std:c++14") - SET_TARGET_PROPERTIES(${LIBND4J_NAME} PROPERTIES COMPILER_FLAGS "/EHsc /bigobj /std:c++14") endif() target_link_libraries(${LIBND4J_NAME} ${CUDA_LIBRARIES} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_cusolver_LIBRARY}) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/cuda) install(TARGETS ${LIBND4J_NAME} DESTINATION .) - - add_custom_command( - TARGET ${LIBND4J_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy - $ - ${PROJECT_BINARY_DIR}/../../tests_cpu/) endif(CUDA_FOUND) elseif(CPU_BLAS) @@ -334,16 +286,14 @@ elseif(CPU_BLAS) if(IOS) add_library(${LIBND4J_NAME} STATIC $) else() - add_library(${LIBND4J_NAME}static STATIC $) + # static library is built only if we're going to build tests, skip otherwise + if (BUILD_TESTS) + add_library(${LIBND4J_NAME}static STATIC $) + endif() + add_library(${LIBND4J_NAME} SHARED $) endif() - #if(WIN32) - # message("CPU on Windows: enabling /EHsc") - # SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /bigobj /std:c++14") - # SET_TARGET_PROPERTIES(${LIBND4J_NAME} PROPERTIES COMPILER_FLAGS "/EHsc /bigobj /std:c++14") - #endif() - # we're including {MKLDNN} here in case of building from sources. in future that'll replace {MKLDNN_LIBRARIES}. same applies to BLAS if (NOT BLAS_LIBRARIES) set(BLAS_LIBRARIES "") @@ -374,7 +324,6 @@ elseif(CPU_BLAS) SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -export-dynamic") endif() - #install(TARGETS mySharedLib DESTINATION /some/full/path) install(TARGETS ${LIBND4J_NAME} DESTINATION .) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/cpu) endif() diff --git a/libnd4j/blas/NDArray.h b/libnd4j/blas/NDArray.h index d89ef8c72..daa9d9328 100644 --- a/libnd4j/blas/NDArray.h +++ b/libnd4j/blas/NDArray.h @@ -2103,7 +2103,7 @@ T& NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) { template T& NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w) { - if (rankOf() != 4 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2), w >= sizeAt(3)) + if (rankOf() != 4 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2) || w >= sizeAt(3)) throw std::invalid_argument("NDArray::t(i,j,k,w): one of input indexes is out of array length or rank!=4 !"); if (DataTypeUtils::fromT() != _dataType) throw std::invalid_argument("NDArray::t(i,j,k,w): type of array is not equal to template type T!"); diff --git a/libnd4j/include/op_boilerplate.h b/libnd4j/include/op_boilerplate.h index 4a6561f3b..52685a2c9 100644 --- a/libnd4j/include/op_boilerplate.h +++ b/libnd4j/include/op_boilerplate.h @@ -1242,7 +1242,9 @@ #if defined(_MSC_VER) || defined(_WIN64) || defined(_WIN32) || defined(__CLION_IDE__) || defined(__VSCODE__) #define NOT_EXCLUDED(NAME) 1>0 #else -#define NOT_EXCLUDED(NAME) defined(LIBND4J_ALL_OPS) || defined(NAME) +// for now we don't want minifier mechanics working +//#define NOT_EXCLUDED(NAME) defined(LIBND4J_ALL_OPS) || defined(NAME) +#define NOT_EXCLUDED(NAME) 1>0 #endif #ifdef __JAVACPP_HACK__ diff --git a/libnd4j/tests_cpu/CMakeLists.txt b/libnd4j/tests_cpu/CMakeLists.txt index 3d58617b1..5de17a2d1 100644 --- a/libnd4j/tests_cpu/CMakeLists.txt +++ b/libnd4j/tests_cpu/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.6) +cmake_minimum_required(VERSION 3.15) project(tests_cpu) # Download and unpack googletest at configure time diff --git a/libnd4j/tests_cpu/CMakeLists.txt.in b/libnd4j/tests_cpu/CMakeLists.txt.in index 8bc138871..a3cba4d27 100644 --- a/libnd4j/tests_cpu/CMakeLists.txt.in +++ b/libnd4j/tests_cpu/CMakeLists.txt.in @@ -5,9 +5,10 @@ project(googletest-download NONE) include(ExternalProject) ExternalProject_Add(googletest GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG release-1.8.1 + GIT_TAG release-1.10.0 SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/googletest-src" BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/googletest-build" + CMAKE_ARGS "" CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" diff --git a/libnd4j/tests_cpu/layers_tests/CMakeLists.txt b/libnd4j/tests_cpu/layers_tests/CMakeLists.txt index 52fa0ca17..9ee58797e 100644 --- a/libnd4j/tests_cpu/layers_tests/CMakeLists.txt +++ b/libnd4j/tests_cpu/layers_tests/CMakeLists.txt @@ -30,31 +30,30 @@ if (CUDA_BLAS) if(WIN32) message("CUDA on Windows: enabling /EHsc") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /FS") - SET_TARGET_PROPERTIES(${LIBND4J_NAME} PROPERTIES COMPILER_FLAGS "/EHsc") endif() if ("${COMPUTE}" STREQUAL "all") - list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -gencode arch=compute_30,code=sm_30 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70) + set(CMAKE_CUDA_FLAGS " -DCUDA_10 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -gencode arch=compute_30,code=sm_30 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70") else() - list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w -G -g --cudart=static --expt-extended-lambda -arch=compute_${COMPUTE} -code=sm_${COMPUTE}) + set(CMAKE_CUDA_FLAGS " -DCUDA_10 ${EXPM} -w -G -g --expt-extended-lambda -arch=compute_${COMPUTE} -code=sm_${COMPUTE}") endif() endif() # -fsanitize=address # -fsanitize=leak if (APPLE) - set(CMAKE_CXX_FLAGS " -fPIC -std=c++11 -fmax-errors=2 -D__APPLE_OS__=true") + set(CMAKE_CXX_FLAGS " -fPIC -fmax-errors=2 -D__APPLE_OS__=true") elseif(WIN32) if (CPU_BLAS) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fPIC -march=native -mtune=native -O3") endif() if (CPU_BLAS AND LINUX) - set(CMAKE_CXX_FLAGS " -fPIC -std=c++11 -fmax-errors=2") + set(CMAKE_CXX_FLAGS " -fPIC -fmax-errors=2") endif() else() set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") - set(CMAKE_CXX_FLAGS " -fPIC -std=c++11 -fmax-errors=2") + set(CMAKE_CXX_FLAGS " -fPIC -fmax-errors=2") if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64*") set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -mcpu=native") else() @@ -68,14 +67,6 @@ else() endif() endif() -# TODO: get rid of this once problem confirmed solved -#if (APPLE) -# if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") -# if ("${CMAKE_C_COMPILER_VERSION}" VERSION_GREATER 6.0 OR "${CMAKE_C_COMPILER_VERSION}" VERSION_EQUAL 6.0) -# SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wa,-mavx512f -fmax-errors=1") -# endif() -# endif() -#endif() # tests are always compiled with all ops included SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLIBND4J_ALL_OPS=true -DBUILD_TESTS=true") @@ -141,6 +132,15 @@ if (CPU_BLAS) add_executable(runtests ${TEST_SOURCES}) target_link_libraries(runtests ${LIBND4J_NAME}static ${MKLDNN_LIBRARIES} ${OPENBLAS_LIBRARIES} ${MKLDNN} ${BLAS_LIBRARIES} ${CPU_FEATURES} gtest gtest_main) elseif(CUDA_BLAS) - CUDA_ADD_EXECUTABLE(runtests ${TEST_SOURCES}) - target_link_libraries(runtests ${LIBND4J_NAME} ${CUDA_LIBRARIES} gtest gtest_main) + + add_executable(runtests ${TEST_SOURCES}) + + message("MSVC runtime for tests: ${MSVC_RT_LIB}") + + # applies to windows only + set_property(TARGET runtests PROPERTY MSVC_RUNTIME_LIBRARY "${MSVC_RT_LIB}$<$:Debug>") + set_property(TARGET gtest PROPERTY MSVC_RUNTIME_LIBRARY "${MSVC_RT_LIB}$<$:Debug>") + set_property(TARGET gtest_main PROPERTY MSVC_RUNTIME_LIBRARY "${MSVC_RT_LIB}$<$:Debug>") + + target_link_libraries(runtests ${LIBND4J_NAME}static ${CUDA_LIBRARIES} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_cusolver_LIBRARY} gtest gtest_main) endif() \ No newline at end of file