GPU test fixes
This commit is contained in:
		
							parent
							
								
									6b681187e5
								
							
						
					
					
						commit
						2ef0d796e0
					
				| @ -39,8 +39,8 @@ jobs: | ||||
|           echo "Running tests for cuda 11.0" | ||||
|           export PATH="/opt/protobuf/bin:/usr/local/cuda-11.2/bin:$PATH" | ||||
|           mvn  -Djavacpp.platform=linux-x86_64 -Dlibnd4j.chip=cuda -pl ":nd4j-cuda-11.0,:deeplearning4j-cuda-11.0,:libnd4j"  --also-make  -Pcuda clean  --batch-mode install  -DskipTests | ||||
|           mvn  -Dtest.offheap.size=2g -Dtest.heap.size=2g -Pcuda -Dlibnd4j.chip=cuda  -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -DexcludedGroups="long-running-tests, large-resources, distributed-systems"  -DskipTestResourceEnforcement=true -Ptestresources  -Pintegration-tests  -Pnd4j-tests-cuda   clean test --fail-never -rf :nd4j | ||||
|           mvn  -Pcuda -Dlibnd4j.chip=cuda -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -Dgroups="long-running-tests, large-resources, distributed-systems" -Ptestresources -Pnd4j-tests-cuda  -Dtest.offheap.size=14g -Dtest.heap.size=6g  -Dsurefire.parallel.forcedTimeout=200 -Dsurefire.parallel.timeout=200  -Dsurefire.timeout=200 -Dsurefire.exitTimeout=200 test --fail-never -rf :nd4j | ||||
|           mvn  -Dtest.offheap.size=6g -Dtest.heap.size=2g -Pcuda -Dlibnd4j.chip=cuda  -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -DexcludedGroups="long-running-tests, large-resources, distributed-systems"  -DskipTestResourceEnforcement=true -Ptestresources  -Pintegration-tests  -Pnd4j-tests-cuda   clean test --fail-never -rf :nd4j | ||||
|           #mvn  -Pcuda -Dlibnd4j.chip=cuda -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -Dgroups="long-running-tests, large-resources, distributed-systems" -Ptestresources -Pnd4j-tests-cuda  -Dtest.offheap.size=14g -Dtest.heap.size=6g  -Dsurefire.parallel.forcedTimeout=200 -Dsurefire.parallel.timeout=200  -Dsurefire.timeout=200 -Dsurefire.exitTimeout=200 test --fail-never -rf :nd4j | ||||
|           echo "Running tests for cuda 11.2" | ||||
|           ${GITHUB_WORKSPACE}/change-cuda-versions.sh 11.2 | ||||
|           echo "Changed cuda to 11.2" | ||||
| @ -49,7 +49,7 @@ jobs: | ||||
|           echo "Installing jars for 11.2" | ||||
|           mvn  -Djavacpp.platform=linux-x86_64  -Dlibnd4j.chip=cuda -pl ":nd4j-cuda-11.2,:deeplearning4j-cuda-11.2,:libnd4j"  --also-make  -Pcuda clean  --batch-mode install  -DskipTests | ||||
|           echo "Installed jars for 11.2, running smaller tests for cuda 11.2" | ||||
|           mvn -Dtest.offheap.size=2g -Dtest.heap.size=2g -Pcuda -Dlibnd4j.chip=cuda -Dlibnd4j.chip=cuda -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -DexcludedGroups="long-running-tests, large-resources, distributed-systems"  -DskipTestResourceEnforcement=true -Ptestresources  -Pintegration-tests  -Pnd4j-tests-cuda   clean test --fail-never -rf :nd4j | ||||
|           echo "Running larger for cuda 11.2" | ||||
|           mvn -Pcuda -Dlibnd4j.chip=cuda  -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -Dgroups="long-running-tests, large-resources, distributed-systems" -Ptestresources -Pnd4j-tests-cuda  -Dtest.offheap.size=14g -Dtest.heap.size=6g  -Dsurefire.parallel.forcedTimeout=200 -Dsurefire.parallel.timeout=200  -Dsurefire.timeout=200 -Dsurefire.exitTimeout=200 test --fail-never -rf :nd4j | ||||
|           mvn -Dtest.offheap.size=4g -Dtest.heap.size=2g -Pcuda -Dlibnd4j.chip=cuda -Dlibnd4j.chip=cuda -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -DexcludedGroups="long-running-tests, large-resources, distributed-systems"  -DskipTestResourceEnforcement=true -Ptestresources  -Pintegration-tests  -Pnd4j-tests-cuda   clean test --fail-never -rf :nd4j | ||||
|           #echo "Running larger for cuda 11.2" | ||||
|           #mvn -Pcuda -Dlibnd4j.chip=cuda  -Dorg.nd4j.strumpf.resource.dirs=${GITHUB_WORKSPACE}/resources -Dorg.nd4j.test.resources.cache.dir=${GITHUB_WORKSPACE}/cache -Dgroups="long-running-tests, large-resources, distributed-systems" -Ptestresources -Pnd4j-tests-cuda  -Dtest.offheap.size=14g -Dtest.heap.size=6g  -Dsurefire.parallel.forcedTimeout=200 -Dsurefire.parallel.timeout=200  -Dsurefire.timeout=200 -Dsurefire.exitTimeout=200 test --fail-never -rf :nd4j | ||||
| 
 | ||||
|  | ||||
| @ -56,8 +56,26 @@ | ||||
|         <profile> | ||||
|             <id>nd4j-tests-cpu</id> | ||||
|         </profile> | ||||
|         <!-- For running unit tests with nd4j-cuda-8.0: "mvn clean test -P test-nd4j-cuda-8.0" --> | ||||
|         <profile> | ||||
|             <id>nd4j-tests-cuda</id> | ||||
|             <activation> | ||||
|                 <activeByDefault>false</activeByDefault> | ||||
|             </activation> | ||||
|             <dependencies> | ||||
|                 <dependency> | ||||
|                     <groupId>org.deeplearning4j</groupId> | ||||
|                     <artifactId>dl4j-test-resources</artifactId> | ||||
|                     <version>${dl4j-test-resources.version}</version> | ||||
|                     <scope>test</scope> | ||||
|                 </dependency> | ||||
|                 <dependency> | ||||
|                     <groupId>org.nd4j</groupId> | ||||
|                     <artifactId>nd4j-cuda-11.0</artifactId> | ||||
|                     <version>${nd4j.version}</version> | ||||
|                     <scope>test</scope> | ||||
|                 </dependency> | ||||
|             </dependencies> | ||||
|         </profile> | ||||
|     </profiles> | ||||
| </project> | ||||
|  | ||||
| @ -117,8 +117,26 @@ | ||||
|         <profile> | ||||
|             <id>nd4j-tests-cpu</id> | ||||
|         </profile> | ||||
|         <!-- For running unit tests with nd4j-cuda-8.0: "mvn clean test -P test-nd4j-cuda-8.0" --> | ||||
|         <profile> | ||||
|             <id>nd4j-tests-cuda</id> | ||||
|             <activation> | ||||
|                 <activeByDefault>false</activeByDefault> | ||||
|             </activation> | ||||
|             <dependencies> | ||||
|                 <dependency> | ||||
|                     <groupId>org.deeplearning4j</groupId> | ||||
|                     <artifactId>dl4j-test-resources</artifactId> | ||||
|                     <version>${dl4j-test-resources.version}</version> | ||||
|                     <scope>test</scope> | ||||
|                 </dependency> | ||||
|                 <dependency> | ||||
|                     <groupId>org.nd4j</groupId> | ||||
|                     <artifactId>nd4j-cuda-11.0</artifactId> | ||||
|                     <version>${nd4j.version}</version> | ||||
|                     <scope>test</scope> | ||||
|                 </dependency> | ||||
|             </dependencies> | ||||
|         </profile> | ||||
|     </profiles> | ||||
| </project> | ||||
|  | ||||
| @ -74,8 +74,26 @@ | ||||
|         <profile> | ||||
|             <id>nd4j-tests-cpu</id> | ||||
|         </profile> | ||||
|         <!-- For running unit tests with nd4j-cuda-8.0: "mvn clean test -P test-nd4j-cuda-8.0" --> | ||||
|         <profile> | ||||
|             <id>nd4j-tests-cuda</id> | ||||
|             <activation> | ||||
|                 <activeByDefault>false</activeByDefault> | ||||
|             </activation> | ||||
|             <dependencies> | ||||
|                 <dependency> | ||||
|                     <groupId>org.deeplearning4j</groupId> | ||||
|                     <artifactId>dl4j-test-resources</artifactId> | ||||
|                     <version>${dl4j-test-resources.version}</version> | ||||
|                     <scope>test</scope> | ||||
|                 </dependency> | ||||
|                 <dependency> | ||||
|                     <groupId>org.nd4j</groupId> | ||||
|                     <artifactId>nd4j-cuda-11.0</artifactId> | ||||
|                     <version>${nd4j.version}</version> | ||||
|                     <scope>test</scope> | ||||
|                 </dependency> | ||||
|             </dependencies> | ||||
|         </profile> | ||||
|     </profiles> | ||||
| </project> | ||||
|  | ||||
| @ -310,8 +310,26 @@ | ||||
|         <profile> | ||||
|             <id>nd4j-tests-cpu</id> | ||||
|         </profile> | ||||
|         <!-- For running unit tests with nd4j-cuda-8.0: "mvn clean test -P test-nd4j-cuda-8.0" --> | ||||
|         <profile> | ||||
|             <id>nd4j-tests-cuda</id> | ||||
|             <activation> | ||||
|                 <activeByDefault>false</activeByDefault> | ||||
|             </activation> | ||||
|             <dependencies> | ||||
|                 <dependency> | ||||
|                     <groupId>org.deeplearning4j</groupId> | ||||
|                     <artifactId>dl4j-test-resources</artifactId> | ||||
|                     <version>${dl4j-test-resources.version}</version> | ||||
|                     <scope>test</scope> | ||||
|                 </dependency> | ||||
|                 <dependency> | ||||
|                     <groupId>org.nd4j</groupId> | ||||
|                     <artifactId>nd4j-cuda-11.0</artifactId> | ||||
|                     <version>${nd4j.version}</version> | ||||
|                     <scope>test</scope> | ||||
|                 </dependency> | ||||
|             </dependencies> | ||||
|         </profile> | ||||
|     </profiles> | ||||
| </project> | ||||
|  | ||||
| @ -127,8 +127,26 @@ | ||||
|         <profile> | ||||
|             <id>nd4j-tests-cpu</id> | ||||
|         </profile> | ||||
|         <!-- For running unit tests with nd4j-cuda-8.0: "mvn clean test -P test-nd4j-cuda-8.0" --> | ||||
|         <profile> | ||||
|             <id>nd4j-tests-cuda</id> | ||||
|             <activation> | ||||
|                 <activeByDefault>false</activeByDefault> | ||||
|             </activation> | ||||
|             <dependencies> | ||||
|                 <dependency> | ||||
|                     <groupId>org.deeplearning4j</groupId> | ||||
|                     <artifactId>dl4j-test-resources</artifactId> | ||||
|                     <version>${dl4j-test-resources.version}</version> | ||||
|                     <scope>test</scope> | ||||
|                 </dependency> | ||||
|                 <dependency> | ||||
|                     <groupId>org.nd4j</groupId> | ||||
|                     <artifactId>nd4j-cuda-11.0</artifactId> | ||||
|                     <version>${nd4j.version}</version> | ||||
|                     <scope>test</scope> | ||||
|                 </dependency> | ||||
|             </dependencies> | ||||
|         </profile> | ||||
|     </profiles> | ||||
| </project> | ||||
|  | ||||
| @ -20,8 +20,8 @@ | ||||
|   --> | ||||
| 
 | ||||
| <project xmlns="http://maven.apache.org/POM/4.0.0" | ||||
|     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||||
|     xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||||
|          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||||
|          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||||
| 
 | ||||
|     <modelVersion>4.0.0</modelVersion> | ||||
| 
 | ||||
| @ -57,6 +57,28 @@ | ||||
|             <activation> | ||||
|                 <activeByDefault>false</activeByDefault> | ||||
|             </activation> | ||||
|             <build> | ||||
|                 <plugins> | ||||
|                     <plugin> | ||||
|                         <artifactId>maven-surefire-plugin</artifactId> | ||||
|                         <version>${maven-surefire-plugin.version}</version> | ||||
|                         <inherited>true</inherited> | ||||
|                         <!-- GPU tests fail automatically on multi gpu--> | ||||
|                         <configuration> | ||||
|                             <environmentVariables> | ||||
|                                 <CUDA_VISIBLE_DEVICES>0</CUDA_VISIBLE_DEVICES> | ||||
|                             </environmentVariables> | ||||
|                             <parallelMavenExecution> | ||||
|                                 false | ||||
|                             </parallelMavenExecution> | ||||
|                             <trimStackTrace>false</trimStackTrace> | ||||
|                             <useFile>false</useFile> | ||||
|                             <reuseForks>false</reuseForks> | ||||
|                             <forkCount>1</forkCount> | ||||
|                         </configuration> | ||||
|                     </plugin> | ||||
|                 </plugins> | ||||
|             </build> | ||||
|             <dependencies> | ||||
|                 <dependency> | ||||
|                     <groupId>org.deeplearning4j</groupId> | ||||
|  | ||||
| @ -193,8 +193,26 @@ | ||||
|         <profile> | ||||
|             <id>nd4j-tests-cpu</id> | ||||
|         </profile> | ||||
|         <!-- For running unit tests with nd4j-cuda-8.0: "mvn clean test -P test-nd4j-cuda-8.0" --> | ||||
|         <profile> | ||||
|             <id>nd4j-tests-cuda</id> | ||||
|             <activation> | ||||
|                 <activeByDefault>false</activeByDefault> | ||||
|             </activation> | ||||
|             <dependencies> | ||||
|                 <dependency> | ||||
|                     <groupId>org.deeplearning4j</groupId> | ||||
|                     <artifactId>dl4j-test-resources</artifactId> | ||||
|                     <version>${dl4j-test-resources.version}</version> | ||||
|                     <scope>test</scope> | ||||
|                 </dependency> | ||||
|                 <dependency> | ||||
|                     <groupId>org.nd4j</groupId> | ||||
|                     <artifactId>nd4j-cuda-11.0</artifactId> | ||||
|                     <version>${nd4j.version}</version> | ||||
|                     <scope>test</scope> | ||||
|                 </dependency> | ||||
|             </dependencies> | ||||
|         </profile> | ||||
|     </profiles> | ||||
| </project> | ||||
|  | ||||
| @ -20,8 +20,8 @@ | ||||
|   --> | ||||
| 
 | ||||
| <project xmlns="http://maven.apache.org/POM/4.0.0" | ||||
|     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||||
|     xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||||
|          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||||
|          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||||
| 
 | ||||
|     <modelVersion>4.0.0</modelVersion> | ||||
| 
 | ||||
| @ -49,11 +49,31 @@ | ||||
|             </modules> | ||||
|         </profile> | ||||
| 
 | ||||
|         <profile> | ||||
|             <id>nd4j-tests-cpu</id> | ||||
|         </profile> | ||||
|         <profile> | ||||
|             <id>nd4j-tests-cpu</id> | ||||
|         </profile> | ||||
|         <profile> | ||||
|             <id>nd4j-tests-cuda</id> | ||||
|             <activation> | ||||
|                 <activeByDefault>false</activeByDefault> | ||||
|             </activation> | ||||
|             <dependencies> | ||||
|                 <dependency> | ||||
|                     <groupId>org.deeplearning4j</groupId> | ||||
|                     <artifactId>dl4j-test-resources</artifactId> | ||||
|                     <version>${dl4j-test-resources.version}</version> | ||||
|                     <scope>test</scope> | ||||
|                 </dependency> | ||||
|                 <dependency> | ||||
|                     <groupId>org.nd4j</groupId> | ||||
|                     <artifactId>nd4j-cuda-11.0</artifactId> | ||||
|                     <version>${nd4j.version}</version> | ||||
|                     <scope>test</scope> | ||||
|                 </dependency> | ||||
|             </dependencies> | ||||
|         </profile> | ||||
|     </profiles> | ||||
| </project> | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user