GPU test fixes
parent
6b681187e5
commit
2ef0d796e0
|
@ -39,8 +39,8 @@ jobs:
|
||||||
echo "Running tests for cuda 11.0"
|
echo "Running tests for cuda 11.0"
|
||||||
export PATH="/opt/protobuf/bin:/usr/local/cuda-11.2/bin:$PATH"
|
export PATH="/opt/protobuf/bin:/usr/local/cuda-11.2/bin:$PATH"
|
||||||
mvn -Djavacpp.platform=linux-x86_64 -Dlibnd4j.chip=cuda -pl ":nd4j-cuda-11.0,:deeplearning4j-cuda-11.0,:libnd4j" --also-make -Pcuda clean --batch-mode install -DskipTests
|
mvn -Djavacpp.platform=linux-x86_64 -Dlibnd4j.chip=cuda -pl ":nd4j-cuda-11.0,:deeplearning4j-cuda-11.0,:libnd4j" --also-make -Pcuda clean --batch-mode install -DskipTests
|
||||||
mvn -Dtest.offheap.size=2g -Dtest.heap.size=2g -Pcuda -Dlibnd4j.chip=cuda -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -DexcludedGroups="long-running-tests, large-resources, distributed-systems" -DskipTestResourceEnforcement=true -Ptestresources -Pintegration-tests -Pnd4j-tests-cuda clean test --fail-never -rf :nd4j
|
mvn -Dtest.offheap.size=6g -Dtest.heap.size=2g -Pcuda -Dlibnd4j.chip=cuda -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -DexcludedGroups="long-running-tests, large-resources, distributed-systems" -DskipTestResourceEnforcement=true -Ptestresources -Pintegration-tests -Pnd4j-tests-cuda clean test --fail-never -rf :nd4j
|
||||||
mvn -Pcuda -Dlibnd4j.chip=cuda -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -Dgroups="long-running-tests, large-resources, distributed-systems" -Ptestresources -Pnd4j-tests-cuda -Dtest.offheap.size=14g -Dtest.heap.size=6g -Dsurefire.parallel.forcedTimeout=200 -Dsurefire.parallel.timeout=200 -Dsurefire.timeout=200 -Dsurefire.exitTimeout=200 test --fail-never -rf :nd4j
|
#mvn -Pcuda -Dlibnd4j.chip=cuda -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -Dgroups="long-running-tests, large-resources, distributed-systems" -Ptestresources -Pnd4j-tests-cuda -Dtest.offheap.size=14g -Dtest.heap.size=6g -Dsurefire.parallel.forcedTimeout=200 -Dsurefire.parallel.timeout=200 -Dsurefire.timeout=200 -Dsurefire.exitTimeout=200 test --fail-never -rf :nd4j
|
||||||
echo "Running tests for cuda 11.2"
|
echo "Running tests for cuda 11.2"
|
||||||
${GITHUB_WORKSPACE}/change-cuda-versions.sh 11.2
|
${GITHUB_WORKSPACE}/change-cuda-versions.sh 11.2
|
||||||
echo "Changed cuda to 11.2"
|
echo "Changed cuda to 11.2"
|
||||||
|
@ -49,7 +49,7 @@ jobs:
|
||||||
echo "Installing jars for 11.2"
|
echo "Installing jars for 11.2"
|
||||||
mvn -Djavacpp.platform=linux-x86_64 -Dlibnd4j.chip=cuda -pl ":nd4j-cuda-11.2,:deeplearning4j-cuda-11.2,:libnd4j" --also-make -Pcuda clean --batch-mode install -DskipTests
|
mvn -Djavacpp.platform=linux-x86_64 -Dlibnd4j.chip=cuda -pl ":nd4j-cuda-11.2,:deeplearning4j-cuda-11.2,:libnd4j" --also-make -Pcuda clean --batch-mode install -DskipTests
|
||||||
echo "Installed jars for 11.2, running smaller tests for cuda 11.2"
|
echo "Installed jars for 11.2, running smaller tests for cuda 11.2"
|
||||||
mvn -Dtest.offheap.size=2g -Dtest.heap.size=2g -Pcuda -Dlibnd4j.chip=cuda -Dlibnd4j.chip=cuda -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -DexcludedGroups="long-running-tests, large-resources, distributed-systems" -DskipTestResourceEnforcement=true -Ptestresources -Pintegration-tests -Pnd4j-tests-cuda clean test --fail-never -rf :nd4j
|
mvn -Dtest.offheap.size=4g -Dtest.heap.size=2g -Pcuda -Dlibnd4j.chip=cuda -Dlibnd4j.chip=cuda -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -DexcludedGroups="long-running-tests, large-resources, distributed-systems" -DskipTestResourceEnforcement=true -Ptestresources -Pintegration-tests -Pnd4j-tests-cuda clean test --fail-never -rf :nd4j
|
||||||
echo "Running larger for cuda 11.2"
|
#echo "Running larger for cuda 11.2"
|
||||||
mvn -Pcuda -Dlibnd4j.chip=cuda -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -Dgroups="long-running-tests, large-resources, distributed-systems" -Ptestresources -Pnd4j-tests-cuda -Dtest.offheap.size=14g -Dtest.heap.size=6g -Dsurefire.parallel.forcedTimeout=200 -Dsurefire.parallel.timeout=200 -Dsurefire.timeout=200 -Dsurefire.exitTimeout=200 test --fail-never -rf :nd4j
|
#mvn -Pcuda -Dlibnd4j.chip=cuda -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -Dgroups="long-running-tests, large-resources, distributed-systems" -Ptestresources -Pnd4j-tests-cuda -Dtest.offheap.size=14g -Dtest.heap.size=6g -Dsurefire.parallel.forcedTimeout=200 -Dsurefire.parallel.timeout=200 -Dsurefire.timeout=200 -Dsurefire.exitTimeout=200 test --fail-never -rf :nd4j
|
||||||
|
|
||||||
|
|
|
@ -56,8 +56,26 @@
|
||||||
<profile>
|
<profile>
|
||||||
<id>nd4j-tests-cpu</id>
|
<id>nd4j-tests-cpu</id>
|
||||||
</profile>
|
</profile>
|
||||||
|
<!-- For running unit tests with nd4j-cuda-8.0: "mvn clean test -P test-nd4j-cuda-8.0" -->
|
||||||
<profile>
|
<profile>
|
||||||
<id>nd4j-tests-cuda</id>
|
<id>nd4j-tests-cuda</id>
|
||||||
|
<activation>
|
||||||
|
<activeByDefault>false</activeByDefault>
|
||||||
|
</activation>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.deeplearning4j</groupId>
|
||||||
|
<artifactId>dl4j-test-resources</artifactId>
|
||||||
|
<version>${dl4j-test-resources.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.nd4j</groupId>
|
||||||
|
<artifactId>nd4j-cuda-11.0</artifactId>
|
||||||
|
<version>${nd4j.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
</profile>
|
</profile>
|
||||||
</profiles>
|
</profiles>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -117,8 +117,26 @@
|
||||||
<profile>
|
<profile>
|
||||||
<id>nd4j-tests-cpu</id>
|
<id>nd4j-tests-cpu</id>
|
||||||
</profile>
|
</profile>
|
||||||
|
<!-- For running unit tests with nd4j-cuda-8.0: "mvn clean test -P test-nd4j-cuda-8.0" -->
|
||||||
<profile>
|
<profile>
|
||||||
<id>nd4j-tests-cuda</id>
|
<id>nd4j-tests-cuda</id>
|
||||||
|
<activation>
|
||||||
|
<activeByDefault>false</activeByDefault>
|
||||||
|
</activation>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.deeplearning4j</groupId>
|
||||||
|
<artifactId>dl4j-test-resources</artifactId>
|
||||||
|
<version>${dl4j-test-resources.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.nd4j</groupId>
|
||||||
|
<artifactId>nd4j-cuda-11.0</artifactId>
|
||||||
|
<version>${nd4j.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
</profile>
|
</profile>
|
||||||
</profiles>
|
</profiles>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -74,8 +74,26 @@
|
||||||
<profile>
|
<profile>
|
||||||
<id>nd4j-tests-cpu</id>
|
<id>nd4j-tests-cpu</id>
|
||||||
</profile>
|
</profile>
|
||||||
|
<!-- For running unit tests with nd4j-cuda-8.0: "mvn clean test -P test-nd4j-cuda-8.0" -->
|
||||||
<profile>
|
<profile>
|
||||||
<id>nd4j-tests-cuda</id>
|
<id>nd4j-tests-cuda</id>
|
||||||
|
<activation>
|
||||||
|
<activeByDefault>false</activeByDefault>
|
||||||
|
</activation>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.deeplearning4j</groupId>
|
||||||
|
<artifactId>dl4j-test-resources</artifactId>
|
||||||
|
<version>${dl4j-test-resources.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.nd4j</groupId>
|
||||||
|
<artifactId>nd4j-cuda-11.0</artifactId>
|
||||||
|
<version>${nd4j.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
</profile>
|
</profile>
|
||||||
</profiles>
|
</profiles>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -310,8 +310,26 @@
|
||||||
<profile>
|
<profile>
|
||||||
<id>nd4j-tests-cpu</id>
|
<id>nd4j-tests-cpu</id>
|
||||||
</profile>
|
</profile>
|
||||||
|
<!-- For running unit tests with nd4j-cuda-8.0: "mvn clean test -P test-nd4j-cuda-8.0" -->
|
||||||
<profile>
|
<profile>
|
||||||
<id>nd4j-tests-cuda</id>
|
<id>nd4j-tests-cuda</id>
|
||||||
|
<activation>
|
||||||
|
<activeByDefault>false</activeByDefault>
|
||||||
|
</activation>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.deeplearning4j</groupId>
|
||||||
|
<artifactId>dl4j-test-resources</artifactId>
|
||||||
|
<version>${dl4j-test-resources.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.nd4j</groupId>
|
||||||
|
<artifactId>nd4j-cuda-11.0</artifactId>
|
||||||
|
<version>${nd4j.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
</profile>
|
</profile>
|
||||||
</profiles>
|
</profiles>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -127,8 +127,26 @@
|
||||||
<profile>
|
<profile>
|
||||||
<id>nd4j-tests-cpu</id>
|
<id>nd4j-tests-cpu</id>
|
||||||
</profile>
|
</profile>
|
||||||
|
<!-- For running unit tests with nd4j-cuda-8.0: "mvn clean test -P test-nd4j-cuda-8.0" -->
|
||||||
<profile>
|
<profile>
|
||||||
<id>nd4j-tests-cuda</id>
|
<id>nd4j-tests-cuda</id>
|
||||||
|
<activation>
|
||||||
|
<activeByDefault>false</activeByDefault>
|
||||||
|
</activation>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.deeplearning4j</groupId>
|
||||||
|
<artifactId>dl4j-test-resources</artifactId>
|
||||||
|
<version>${dl4j-test-resources.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.nd4j</groupId>
|
||||||
|
<artifactId>nd4j-cuda-11.0</artifactId>
|
||||||
|
<version>${nd4j.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
</profile>
|
</profile>
|
||||||
</profiles>
|
</profiles>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -57,6 +57,28 @@
|
||||||
<activation>
|
<activation>
|
||||||
<activeByDefault>false</activeByDefault>
|
<activeByDefault>false</activeByDefault>
|
||||||
</activation>
|
</activation>
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-surefire-plugin</artifactId>
|
||||||
|
<version>${maven-surefire-plugin.version}</version>
|
||||||
|
<inherited>true</inherited>
|
||||||
|
<!-- GPU tests fail automatically on multi gpu-->
|
||||||
|
<configuration>
|
||||||
|
<environmentVariables>
|
||||||
|
<CUDA_VISIBLE_DEVICES>0</CUDA_VISIBLE_DEVICES>
|
||||||
|
</environmentVariables>
|
||||||
|
<parallelMavenExecution>
|
||||||
|
false
|
||||||
|
</parallelMavenExecution>
|
||||||
|
<trimStackTrace>false</trimStackTrace>
|
||||||
|
<useFile>false</useFile>
|
||||||
|
<reuseForks>false</reuseForks>
|
||||||
|
<forkCount>1</forkCount>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.deeplearning4j</groupId>
|
<groupId>org.deeplearning4j</groupId>
|
||||||
|
|
|
@ -193,8 +193,26 @@
|
||||||
<profile>
|
<profile>
|
||||||
<id>nd4j-tests-cpu</id>
|
<id>nd4j-tests-cpu</id>
|
||||||
</profile>
|
</profile>
|
||||||
|
<!-- For running unit tests with nd4j-cuda-8.0: "mvn clean test -P test-nd4j-cuda-8.0" -->
|
||||||
<profile>
|
<profile>
|
||||||
<id>nd4j-tests-cuda</id>
|
<id>nd4j-tests-cuda</id>
|
||||||
|
<activation>
|
||||||
|
<activeByDefault>false</activeByDefault>
|
||||||
|
</activation>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.deeplearning4j</groupId>
|
||||||
|
<artifactId>dl4j-test-resources</artifactId>
|
||||||
|
<version>${dl4j-test-resources.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.nd4j</groupId>
|
||||||
|
<artifactId>nd4j-cuda-11.0</artifactId>
|
||||||
|
<version>${nd4j.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
</profile>
|
</profile>
|
||||||
</profiles>
|
</profiles>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -49,11 +49,31 @@
|
||||||
</modules>
|
</modules>
|
||||||
</profile>
|
</profile>
|
||||||
|
|
||||||
|
<profile>
|
||||||
|
<id>nd4j-tests-cpu</id>
|
||||||
|
</profile>
|
||||||
<profile>
|
<profile>
|
||||||
<id>nd4j-tests-cpu</id>
|
<id>nd4j-tests-cpu</id>
|
||||||
</profile>
|
</profile>
|
||||||
<profile>
|
<profile>
|
||||||
<id>nd4j-tests-cuda</id>
|
<id>nd4j-tests-cuda</id>
|
||||||
|
<activation>
|
||||||
|
<activeByDefault>false</activeByDefault>
|
||||||
|
</activation>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.deeplearning4j</groupId>
|
||||||
|
<artifactId>dl4j-test-resources</artifactId>
|
||||||
|
<version>${dl4j-test-resources.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.nd4j</groupId>
|
||||||
|
<artifactId>nd4j-cuda-11.0</artifactId>
|
||||||
|
<version>${nd4j.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
</profile>
|
</profile>
|
||||||
</profiles>
|
</profiles>
|
||||||
</project>
|
</project>
|
||||||
|
|
Loading…
Reference in New Issue