diff --git a/.github/workflows/run-gpu-integration-tests-self-hosted.yml b/.github/workflows/run-gpu-integration-tests-self-hosted.yml index caeb13de3..95b38130d 100644 --- a/.github/workflows/run-gpu-integration-tests-self-hosted.yml +++ b/.github/workflows/run-gpu-integration-tests-self-hosted.yml @@ -38,19 +38,21 @@ jobs: mkdir -p ${GITHUB_WORKSPACE}/cache export CUDA_VISIBLE_DEVICES=0 echo "Running tests for cuda 11.0" - export PATH="/opt/protobuf/bin:/usr/local/cuda-11.2/bin:$PATH" + export PATH="/opt/protobuf/bin:/usr/local/cuda-11/bin:$PATH" mvn -Djavacpp.platform=linux-x86_64 -Dlibnd4j.chip=cuda -pl ":nd4j-cuda-11.0,:deeplearning4j-cuda-11.0,:libnd4j" --also-make -Pcuda clean --batch-mode install -DskipTests + echo "Running small tests for cuda 11.0" mvn -Djunit.jupiter.execution.parallel.enabled=false -Dtest.offheap.size=6g -Pcuda -Dlibnd4j.chip=cuda -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -DexcludedGroups="long-running-tests, large-resources, distributed-systems" -DskipTestResourceEnforcement=true -Ptestresources -Pintegration-tests -Pnd4j-tests-cuda clean test --fail-never -rf :nd4j - #mvn -Pcuda -Dlibnd4j.chip=cuda -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -Dgroups="long-running-tests, large-resources, distributed-systems" -Ptestresources -Pnd4j-tests-cuda -Dtest.offheap.size=14g -Dtest.heap.size=6g -Dsurefire.parallel.forcedTimeout=200 -Dsurefire.parallel.timeout=200 -Dsurefire.timeout=200 -Dsurefire.exitTimeout=200 test --fail-never -rf :nd4j - echo "Running tests for cuda 11.2" - ${GITHUB_WORKSPACE}/change-cuda-versions.sh 11.2 - echo "Changed cuda to 11.2" - export PATH="/opt/protobuf/bin:/usr/local/cuda-11.2/bin:$PATH" - echo "Updated path for 11.2" - echo "Installing jars for 11.2" - mvn -Djavacpp.platform=linux-x86_64 -Dlibnd4j.chip=cuda -pl ":nd4j-cuda-11.2,:deeplearning4j-cuda-11.2,:libnd4j" --also-make -Pcuda clean --batch-mode install -DskipTests + echo "Running large tests for cuda 11.0" + mvn -Pcuda -Dlibnd4j.chip=cuda -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -Dgroups="long-running-tests, large-resources, distributed-systems" -Ptestresources -Pnd4j-tests-cuda -Dtest.offheap.size=14g -Dtest.heap.size=6g -Dsurefire.parallel.forcedTimeout=200 -Dsurefire.parallel.timeout=200 -Dsurefire.timeout=200 -Dsurefire.exitTimeout=200 test --fail-never -rf :nd4j + #echo "Running tests for cuda 11.2" + #${GITHUB_WORKSPACE}/change-cuda-versions.sh 11.2 + #echo "Changed cuda to 11.2" + #export PATH="/opt/protobuf/bin:/usr/local/cuda-11.2/bin:$PATH" + #echo "Updated path for 11.2" + #echo "Installing jars for 11.2" + #mvn -Djavacpp.platform=linux-x86_64 -Dlibnd4j.chip=cuda -pl ":nd4j-cuda-11.2,:deeplearning4j-cuda-11.2,:libnd4j" --also-make -Pcuda clean --batch-mode install -DskipTests echo "Installed jars for 11.2, running smaller tests for cuda 11.2" - mvn -Djunit.jupiter.execution.parallel.enabled=false -Dtest.offheap.size=4g -Pcuda -Dlibnd4j.chip=cuda -Dlibnd4j.chip=cuda -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -DexcludedGroups="long-running-tests, large-resources, distributed-systems" -DskipTestResourceEnforcement=true -Ptestresources -Pintegration-tests -Pnd4j-tests-cuda clean test --fail-never -rf :nd4j + #mvn -Djunit.jupiter.execution.parallel.enabled=false -Dtest.offheap.size=4g -Pcuda -Dlibnd4j.chip=cuda -Dlibnd4j.chip=cuda -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -DexcludedGroups="long-running-tests, large-resources, distributed-systems" -DskipTestResourceEnforcement=true -Ptestresources -Pintegration-tests -Pnd4j-tests-cuda clean test --fail-never -rf :nd4j #echo "Running larger for cuda 11.2" #mvn -Pcuda -Dlibnd4j.chip=cuda -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -Dgroups="long-running-tests, large-resources, distributed-systems" -Ptestresources -Pnd4j-tests-cuda -Dtest.offheap.size=14g -Dtest.heap.size=6g -Dsurefire.parallel.forcedTimeout=200 -Dsurefire.parallel.timeout=200 -Dsurefire.timeout=200 -Dsurefire.exitTimeout=200 test --fail-never -rf :nd4j