diff --git a/cavis-native/cavis-native-lib/build.gradle b/cavis-native/cavis-native-lib/build.gradle index dcbabf612..759d86205 100644 --- a/cavis-native/cavis-native-lib/build.gradle +++ b/cavis-native/cavis-native-lib/build.gradle @@ -251,8 +251,7 @@ chipList.each { String thisChip -> '--chip-extension', avxExtension, '-j', "${host_cores}", // '--helper', 'mkldnn', - '--helper', 'cudnn', - '--tests'] + '--helper', 'cudnn'] } else if (thisChip.equals('cuda') && osdetector.os.startsWith("linux")) { //cuDNN requires CUDA it.buildCommand = ['bash', 'buildnativeoperations.sh', '-V', @@ -273,6 +272,7 @@ chipList.each { String thisChip -> '-j', "${host_cores}", '--helper', 'mkldnn'] } + if(project.hasProperty("nativeTests") it.buildCommand += "--tests+") } //Run the parser on the InfoMap in Nd4j$ChipPresets and listed header files in @Platform diff --git a/cavis-native/cavis-native-lib/src/main/cpp/blas/CMakeLists.txt b/cavis-native/cavis-native-lib/src/main/cpp/blas/CMakeLists.txt index e37b2a5ef..d33caee06 100644 --- a/cavis-native/cavis-native-lib/src/main/cpp/blas/CMakeLists.txt +++ b/cavis-native/cavis-native-lib/src/main/cpp/blas/CMakeLists.txt @@ -161,13 +161,8 @@ if(HAVE_ARMCOMPUTE) file(GLOB_RECURSE CUSTOMOPS_ARMCOMPUTE_SOURCES false ops/declarable/platform/armcompute/*.cpp ops/declarable/platform/armcompute/*.h) endif() -if(SD_CUDA) - message("Build cublas") - if(NOT DEFINED ${CMAKE_CUDA_ARCHITECTURES}) - set(CMAKE_CUDA_ARCHITECTURES 75) - endif() - message(STATUS "CUDA architectures set to ${CMAKE_CUDA_ARCHITECTURES}") +if(SD_CUDA) find_package(CUDAToolkit) enable_language(CUDA) @@ -178,6 +173,8 @@ if(SD_CUDA) #Enable features prio C++17 add_definitions(-D_HAS_AUTO_PTR_ETC=1) + set(CMAKE_CUDA_RUNTIME_LIBRARY "shared") + #This basically kills instrinsic activated through SD_F16C=true #if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") # set (CMAKE_CXX_FLAGS "") @@ -194,7 +191,9 @@ if(SD_CUDA) set(EXPM " -D__ND4J_EXPERIMENTAL__=true") endif() - + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 75) + endif() # the only difference for debug mode here is host/device debug symbols set(CMAKE_CUDA_FLAGS_DEBUG " -G -g") @@ -217,7 +216,6 @@ if(SD_CUDA) endif() #set(CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -DCUDA_VERSION_MAJOR=${CUDA_VERSION_MAJOR} ${EXPM} -w --cudart=static --expt-extended-lambda -Xfatbin -compress-all ") - set(CMAKE_CUDA_ARCHITECTURES "all-major") set(CMAKE_CUDA_RUNTIME_LIBRARY "shared") #set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --generate-code \"arch=compute_53,code=[compute_53,sm_53]\" " ) #set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --generate-code \"arch=compute_61,code=[compute_61,sm_61]\" " ) @@ -240,6 +238,7 @@ if(SD_CUDA) message("CUDA_PROPAGATE_HOST_FLAGS = ${CUDA_PROPAGATE_HOST_FLAGS}") message("CUDA_ARCH_FLAGS = ${CUDA_ARCH_FLAGS}") message("CUDAHOSTCXX = ${CUDAHOSTCXX}") + message("CMAKE_CUDA_ARCHITECTURES = ${CMAKE_CUDA_ARCHITECTURES}") file(GLOB_RECURSE PERF_SOURCES false performance/*.cpp performance/*.h) file(GLOB_RECURSE EXCEPTIONS_SOURCES false exceptions/*.cpp exceptions/*.h)