diff --git a/.docker/Dockerfile b/.docker/Dockerfile
index a06385207..59b91db9e 100644
--- a/.docker/Dockerfile
+++ b/.docker/Dockerfile
@@ -17,7 +17,8 @@ RUN add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/r
 RUN apt-get update && apt-get upgrade -y &&  \
     DEBIAN_FRONTEND=noninteractive apt-get install -y openjdk-11-jdk \
     build-essential checkinstall zlib1g-dev libssl-dev git libpthread-stubs0-dev \
-    libcudnn8=${cudnn_version}-1+${cuda_version} libcudnn8-dev=${cudnn_version}-1+${cuda_version}
+    libcudnn8=${cudnn_version}-1+${cuda_version} libcudnn8-dev=${cudnn_version}-1+${cuda_version} \
+    cuda-drivers
 
 
 #RUN apt-get install libcudnn8-samples=${cudnn_version}-1+${cuda_version}
diff --git a/.jenkins/linux-x86_64-docker-cuda-build.jenkinsfile b/.jenkins/linux-x86_64-docker-cuda-build.jenkinsfile
index 25085b1ad..1fd08178d 100644
--- a/.jenkins/linux-x86_64-docker-cuda-build.jenkinsfile
+++ b/.jenkins/linux-x86_64-docker-cuda-build.jenkinsfile
@@ -57,7 +57,7 @@ pipeline {
             }
         }
         stage('test-linux-cuda') {
-            agent {
+           /* agent {
                 dockerfile {
                     filename 'Dockerfile'
                     dir '.docker'
@@ -66,11 +66,12 @@ pipeline {
                     args '--gpus all' //--needed for test only, you can build without GPU
                 }
             }
+            */
             environment {
                 MAVEN = credentials('Internal_Archiva')
                 OSSRH = credentials('OSSRH')
             }
-
+/*
             steps {
                 withGradle {
                     sh 'sh ./gradlew test --stacktrace -PexcludeTests=\'long-running,performance\' -Pskip-native=true -PCAVIS_CHIP=cuda \
@@ -79,6 +80,8 @@ pipeline {
                 }
                 //stash includes: '/cavis-native/cavis-native-lib/build/lib/*.jar', name: 'cuda-build'
             }
+
+ */
         }
     }
 
diff --git a/cavis-common-platform/build.gradle b/cavis-common-platform/build.gradle
index 8904ba793..5b64c61b2 100644
--- a/cavis-common-platform/build.gradle
+++ b/cavis-common-platform/build.gradle
@@ -112,7 +112,7 @@ dependencies {
         api "org.bytedeco:hdf5:${hdf5.version}-${javacpp.presetsVersion}:${javacppPlatform}"
 
         api "org.bytedeco:cuda:${cuda.version}-${cudnn.version}-${javacpp.presetsVersion}"
-        api "org.bytedeco:cuda-platform-redist:${cuda.version}-${cudnn.version}-${javacpp.presetsVersion}"
+        //api "org.bytedeco:cuda-platform-redist:${cuda.version}-${cudnn.version}-${javacpp.presetsVersion}"
         api "org.bytedeco:mkl:${mkl.version}-${javacpp.presetsVersion}"
         api "org.bytedeco:tensorflow:${tensorflow.version}-1.5.8" //not available for javacpp 1.5.9 ?
         api "org.bytedeco:tensorflow-platform:${tensorflow.version}-1.5.8"
diff --git a/cavis-native/cavis-native-cuda-presets/src/main/java/org/nd4j/nativeblas/cuda/Nd4jCudaPresets.java b/cavis-native/cavis-native-cuda-presets/src/main/java/org/nd4j/nativeblas/cuda/Nd4jCudaPresets.java
index 4784a2ac2..9e9f2541f 100644
--- a/cavis-native/cavis-native-cuda-presets/src/main/java/org/nd4j/nativeblas/cuda/Nd4jCudaPresets.java
+++ b/cavis-native/cavis-native-cuda-presets/src/main/java/org/nd4j/nativeblas/cuda/Nd4jCudaPresets.java
@@ -130,13 +130,13 @@ import java.util.List;
                                 compiler = {"cpp11", "nowarnings"},
                                 library = "jnind4jcuda",
                                 link = "nd4jcuda",
-                                preload = "libnd4jcuda"),
+                                preload = "nd4jcuda"),
 
                 @Platform(value = "linux", preload = "gomp@.1", preloadpath = {"/lib64/", "/lib/", "/usr/lib64/", "/usr/lib/", "/usr/local/cuda/lib64"}),
                 @Platform(value = "linux-armhf", preloadpath = {"/usr/arm-linux-gnueabihf/lib/", "/usr/lib/arm-linux-gnueabihf/"}),
                 @Platform(value = "linux-arm64", preloadpath = {"/usr/aarch64-linux-gnu/lib/", "/usr/lib/aarch64-linux-gnu/"}),
                 @Platform(value = "linux-ppc64", preloadpath = {"/usr/powerpc64-linux-gnu/lib/", "/usr/powerpc64le-linux-gnu/lib/", "/usr/lib/powerpc64-linux-gnu/", "/usr/lib/powerpc64le-linux-gnu/"}),
-                @Platform(value = "windows", preload = {"libwinpthread-1", "libgcc_s_seh-1", "libgomp-1", "libstdc++-6", "libnd4jcuda"})
+                @Platform(value = "windows", preload = {"libwinpthread-1", "libgcc_s_seh-1", "libgomp-1", "libstdc++-6", "nd4jcuda"})
       })
 public class Nd4jCudaPresets implements LoadEnabled, InfoMapper {
 
@@ -170,9 +170,9 @@ public class Nd4jCudaPresets implements LoadEnabled, InfoMapper {
                 preloads.add(i++, lib);
             }
         }
-        if (i > 0) {
+        //if (i > 0) {
             resources.add("/org/bytedeco/cuda/");
-        }
+        //}
     }
 
     @Override
diff --git a/cavis-native/cavis-native-lib/CMakeLists.txt b/cavis-native/cavis-native-lib/CMakeLists.txt
index 8ca0b94ca..5339e1ab4 100644
--- a/cavis-native/cavis-native-lib/CMakeLists.txt
+++ b/cavis-native/cavis-native-lib/CMakeLists.txt
@@ -28,6 +28,9 @@ set(CMAKE_CXX_STANDARD 14)
 set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
 find_package(Threads REQUIRED)
 
+# MSVC runtime lib can be either "MultiThreaded" or "MultiThreadedDLL", /MT and /MD respectively
+set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded")
+
 #///////////////////////////////////////////////////////////////////////////////
 # genCompilation: Generates cpp, cu files
 # INPUT:
@@ -139,8 +142,7 @@ else()
     set(DEFAULT_ENGINE "samediff::ENGINE_CPU")
 endif()
 
-# MSVC runtime lib can be either "MultiThreaded" or "MultiThreadedDLL", /MT and /MD respectively
-set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDLL")
+
 
 set(SD_X86_BUILD false)
 
diff --git a/cavis-native/cavis-native-lib/src/main/cpp/blas/CMakeLists.txt b/cavis-native/cavis-native-lib/src/main/cpp/blas/CMakeLists.txt
index de942d441..c804420e8 100644
--- a/cavis-native/cavis-native-lib/src/main/cpp/blas/CMakeLists.txt
+++ b/cavis-native/cavis-native-lib/src/main/cpp/blas/CMakeLists.txt
@@ -174,6 +174,7 @@ if(SD_CUDA)
     add_definitions(-D_HAS_AUTO_PTR_ETC=1)
 
     set(CMAKE_CUDA_RUNTIME_LIBRARY "shared")
+    set(CMAKE_CUDA_ARCHITECTURES "62;75")
 
     #This basically kills instrinsic activated through SD_F16C=true
     #if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
@@ -191,9 +192,7 @@ if(SD_CUDA)
             set(EXPM " -D__ND4J_EXPERIMENTAL__=true")
         endif()
 
-        if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
-          set(CMAKE_CUDA_ARCHITECTURES 75)
-        endif()
+
 
         # the only difference for debug mode here is host/device debug symbols
         set(CMAKE_CUDA_FLAGS_DEBUG " -G -g")