2021-02-01 21:31:45 +09:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								/* ******************************************************************************
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 * 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-06 15:21:15 +03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								 * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  This  program  and  the  accompanying  materials  are  made  available  under  the 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  terms  of  the  Apache  License ,  Version  2.0  which  is  available  at 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  https : //www.apache.org/licenses/LICENSE-2.0.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 * 
							 
						 
					
						
							
								
									
										
										
										
											2021-02-01 21:31:45 +09:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								 *   See  the  NOTICE  file  distributed  with  this  work  for  additional 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *   information  regarding  copyright  ownership . 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-06 15:21:15 +03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								 *  Unless  required  by  applicable  law  or  agreed  to  in  writing ,  software 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  distributed  under  the  License  is  distributed  on  an  " AS IS "  BASIS ,  WITHOUT 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  WARRANTIES  OR  CONDITIONS  OF  ANY  KIND ,  either  express  or  implied .  See  the 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  License  for  the  specific  language  governing  permissions  and  limitations 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  under  the  License . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  SPDX - License - Identifier :  Apache - 2.0 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								//
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								//  @author raver119@gmail.com
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								//  @author Yurii Shyrma
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-03-02 12:49:41 +03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# include  <system/op_boilerplate.h> 
  
						 
					
						
							
								
									
										
										
										
											2019-06-06 15:21:15 +03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								# if NOT_EXCLUDED(OP_conv1d) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# include  <ops/declarable/DeclarableOp.h> 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# include  <ops/declarable/CustomOperations.h> 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# include  <ops/declarable/helpers/convolutions.h> 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-03-02 12:49:41 +03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								namespace  sd  {  
						 
					
						
							
								
									
										
											 
										
											
												Development updates (#9098)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Fix L2NormalizeVertex and eclipse#9054 (#513)
* update
* Fix L2NormalizeVertex
Fix eclipse#9054
* RL4J: Add async training and advantage actor-critic (#507)
* Added async training & Advantage Actor Critic
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Fix compiler error
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Renamed ActorCriticPolicy back to ACPolicy
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
* Python GIL overhaul (#517)
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Removed dead code (#9057)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* performance improvement (#9055)
* performance improvement
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* revert some changes
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Development updates (#9064)
 * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Cherry pick rl4j changes from most recent KonduitAI/deeplearning4j PR
* Update cherry pick again from last master revision.
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Ag pythongiloverhaul (#518)
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Removed dead code (#9057)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* performance improvement (#9055)
* performance improvement
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* revert some changes
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Development updates (#9064)
 * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Cherry pick rl4j changes from most recent KonduitAI/deeplearning4j PR
* Update cherry pick again from last master revision.
* Re update python4j
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Bump formatter-maven-plugin from 2.0.0 to 2.12.1 (#505)
Bumps [formatter-maven-plugin](https://github.com/revelc/formatter-maven-plugin) from 2.0.0 to 2.12.1.
- [Release notes](https://github.com/revelc/formatter-maven-plugin/releases)
- [Changelog](https://github.com/revelc/formatter-maven-plugin/blob/formatter-maven-plugin-2.12.1/CHANGELOG.md)
- [Commits](https://github.com/revelc/formatter-maven-plugin/compare/formatter-maven-plugin-2.0.0...formatter-maven-plugin-2.12.1)
Signed-off-by: dependabot-preview[bot] <support@dependabot.com>
Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com>
* Ag fix9060 (#519)
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Removed dead code (#9057)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* performance improvement (#9055)
* performance improvement
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* revert some changes
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Development updates (#9064)
 * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Added support for the archunit (#9062)
* Added support for the archunit
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Updated pom files
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Datavec code cleaup (#9071)
* removed unnecessary semicolons
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Use standard charset object
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Removed unused imports
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* WIP: Fix Conv1d causal case
* Add inital tests
* Update Conv1d tests to be a bit more robust
* Remove redundant test
* Reset from master
* Remove cuda definition (left over)
* Update rl4j again
* Update pom.xml
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Fixes 9061 (#521)
* Get rid of edge case in validation
* Added support for the archunit (#9062)
* Added support for the archunit
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Updated pom files
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Using embedded copying of an array instead of manual (#9073)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Datavec bulk operation (#9075)
* Bulk operation can be used instead of iteration inspection
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Redundant 'Collection.addAll()' call inspection
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Removed infinitely loop (#9076)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Revert "Merge eclipse changes" (#526)
* Revert rl4j to 72f5c18c830f62df2c04fbf8dc7b1353cc2d3182 (#527)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Fix L2NormalizeVertex and eclipse#9054 (#513)
* update
* Fix L2NormalizeVertex
Fix eclipse#9054
* RL4J: Add async training and advantage actor-critic (#507)
* Added async training & Advantage Actor Critic
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Fix compiler error
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Renamed ActorCriticPolicy back to ACPolicy
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
* Python GIL overhaul (#517)
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Removed dead code (#9057)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* performance improvement (#9055)
* performance improvement
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* revert some changes
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Development updates (#9064)
 * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Cherry pick rl4j changes from most recent KonduitAI/deeplearning4j PR
* Update cherry pick again from last master revision.
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Ag pythongiloverhaul (#518)
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Removed dead code (#9057)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* performance improvement (#9055)
* performance improvement
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* revert some changes
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Development updates (#9064)
 * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Cherry pick rl4j changes from most recent KonduitAI/deeplearning4j PR
* Update cherry pick again from last master revision.
* Re update python4j
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Bump formatter-maven-plugin from 2.0.0 to 2.12.1 (#505)
Bumps [formatter-maven-plugin](https://github.com/revelc/formatter-maven-plugin) from 2.0.0 to 2.12.1.
- [Release notes](https://github.com/revelc/formatter-maven-plugin/releases)
- [Changelog](https://github.com/revelc/formatter-maven-plugin/blob/formatter-maven-plugin-2.12.1/CHANGELOG.md)
- [Commits](https://github.com/revelc/formatter-maven-plugin/compare/formatter-maven-plugin-2.0.0...formatter-maven-plugin-2.12.1)
Signed-off-by: dependabot-preview[bot] <support@dependabot.com>
Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com>
* Ag fix9060 (#519)
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Removed dead code (#9057)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* performance improvement (#9055)
* performance improvement
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* revert some changes
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Development updates (#9064)
 * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Added support for the archunit (#9062)
* Added support for the archunit
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Updated pom files
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Datavec code cleaup (#9071)
* removed unnecessary semicolons
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Use standard charset object
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Removed unused imports
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* WIP: Fix Conv1d causal case
* Add inital tests
* Update Conv1d tests to be a bit more robust
* Remove redundant test
* Reset from master
* Remove cuda definition (left over)
* Update rl4j again
* Update pom.xml
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Fixes 9061 (#521)
* Get rid of edge case in validation
* Added support for the archunit (#9062)
* Added support for the archunit
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Updated pom files
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Using embedded copying of an array instead of manual (#9073)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Datavec bulk operation (#9075)
* Bulk operation can be used instead of iteration inspection
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Redundant 'Collection.addAll()' call inspection
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Removed infinitely loop (#9076)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* RL4J: Add async training and advantage actor-critic (#507)
* Added async training & Advantage Actor Critic
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Fix compiler error
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Renamed ActorCriticPolicy back to ACPolicy
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
(cherry picked from commit 72f5c18c830f62df2c04fbf8dc7b1353cc2d3182)
* RL4J: Add async training and advantage actor-critic (#507)
* Added async training & Advantage Actor Critic
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Fix compiler error
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Renamed ActorCriticPolicy back to ACPolicy
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
(cherry picked from commit 72f5c18c830f62df2c04fbf8dc7b1353cc2d3182)
* Revert rl4j to 72f5c18c830f62df2c04fbf8dc7b1353cc2d3182
* Delete jnind4jaurora.cpp
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com>
* RL4J: Add partial support for RNN (#514)
* Added partial recurrent support
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Made sure the RNN always see the observation in EpsGreedy
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Converted all line endings of rl4j-core to LF (#530)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* NDJ4: Bundle configuration files required by AOT compilation with GraalVM (#529)
* NDJ4: Bundle configuration files required by AOT compilation with GraalVM
* Update dependencies to just released JavaCPP and JavaCV 1.5.4
* Ag fixtests 831 (#523)
* Update UnderSamplingPreProcessorTest.java
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Development updates (#9064)
 * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Add proper annotation
* Fix classcast exception for recurrent model import case
* Update keras import to allow for proper handling of changing NCHW -> NHWC mid later
* Add output to test to ensure proper activation
* Fixes computation graphs to allow dimension ordering to change mid graph
* Add NHWC support for keras import.
* Update tests to pass /ignore out of date ones
* Add  multi RNNDataformat  support
* Update tests to make more pass.
Updates some tests to be correct, double checked existing models and updated reasons they may or may  not fail.
* Add back old default values to ensure legacy serialization works.  Replace null value default with sentinel value for default value overridden.
* Update layers to preserve changed values
* Exclude default value over ridden from comparison
* Fix conv1d import (no permute weights anymore)
* Update KerasConvolution1D.java
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* GPU compute capability  (#532)
* - GPU cpu capability flags
- CUDA MAJOR VERSION provided by cmake
Signed-off-by: AbdelRauf <rauf@konduit.ai>
* Readme
Signed-off-by: AbdelRauf <rauf@konduit.ai>
* Readme
Signed-off-by: AbdelRauf <rauf@konduit.ai>
* RL4J: Add new network implementation to help support recurrent networks (#531)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com>
Co-authored-by: Abdelrauf <qwr@live.ru>
											 
										 
										
											2020-09-23 19:11:29 +09:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    namespace  ops   { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        CUSTOM_OP_IMPL ( conv1d ,  2 ,  1 ,  false ,  0 ,  5 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  input    =  INPUT_VARIABLE ( 0 ) ;                                     // [bS, iW, iC] (NWC) or [bS, iC, iW] (NCW)
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  weights  =  INPUT_VARIABLE ( 1 ) ;                                     // [kW, iC, oC], [oC, iC, kW], [oC, kW, iC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  bias     =  block . width ( )  >  2  ?  INPUT_VARIABLE ( 2 )  :  nullptr ;       // [oC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  output   =  OUTPUT_NULLIFIED ( 0 ) ;                                    // [bS, oW, oC] (NWC) or [bS, oC, oW] (NCW)
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  kW  =  INT_ARG ( 0 )  >  0  ?  INT_ARG ( 0 )  :  static_cast < int > ( weights - > sizeAt ( 0 ) ) ; // filter(kernel) width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  sW  =  INT_ARG ( 1 ) ;                                                         // strides width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  pW  =  INT_ARG ( 2 ) ;                                                         // paddings width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  dW  =  INT_ARG ( 3 ) ;                                                         // dilations width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  paddingMode  =  INT_ARG ( 4 ) ;                                                // 0-VALID, 1-SAME, 2-CAUSAL
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  isNCW        =  block . getIArguments ( ) - > size ( )  >  5  ?  ! INT_ARG ( 5 )  :  1 ;       // INT_ARG(4): 0-NCW,  1-NWC
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  wFormat  =  block . getIArguments ( ) - > size ( )  >  6  ?  INT_ARG ( 6 )  :  0 ;            // 0 - [kW, iC, oC], 1 - [oC, iC, kW], 2 - [oC, kW, iC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            const  int  rank  =  3 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( input - > rankOf ( )    = =  rank ,  0 ,  " CUSTOM CONV1D OP: rank of input array must be equal to %i, but got %i instead ! " ,  rank ,  input - > rankOf ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( weights - > rankOf ( )  = =  rank ,  0 ,  " CUSTOM CONV1D OP: rank of weights array must be equal to %i, but got %i instead ! " ,  rank ,  weights - > rankOf ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  indIOioC ,  indIiW ,  indWoC ( 0  = =  wFormat  ?  2  :  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if ( ! isNCW )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                indIOioC  =  2 ;  indIiW  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                indIOioC  =  1 ;  indIiW  =  2 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  bS  =  input - > sizeAt ( 0 ) ;                         // batch size
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  iW  =  input - > sizeAt ( indIiW ) ;                    // input width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  iC  =  input - > sizeAt ( indIOioC ) ;                  // input channels
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  oC  =  weights - > sizeAt ( indWoC ) ;                  // output channels
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            std : : vector < Nd4jLong >  expectedWeightsShape  =  0  = =  wFormat  ?  std : : vector < Nd4jLong > ( { kW ,  iC ,  oC } )  :  ( 1  = =  wFormat  ?  std : : vector < Nd4jLong > ( { oC ,  iC ,  kW } )  :  std : : vector < Nd4jLong > ( { oC ,  kW ,  iC } ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( weights - > isSameShape ( expectedWeightsShape ) ,  0 ,  " CUSTOM CONV1D OP: wrong shape of weights array, expected is %s, but got %s instead ! " ,  ShapeUtils : : shapeAsString ( expectedWeightsShape ) . c_str ( ) ,  ShapeUtils : : shapeAsString ( weights ) . c_str ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  ( bias ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( bias - > rankOf ( )  < =  2  & &  oC  = =  bias - > lengthOf ( ) ,  0 ,  " CUSTOM CONV1D OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead ! " ,  oC ,  bias - > rankOf ( ) ,  bias - > lengthOf ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            std : : vector < Nd4jLong >  reshapeForInput ,  reshapeForOutput ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if ( ! isNCW )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                reshapeForInput    =  { input - > sizeAt ( 0 ) ,  1 ,  input - > sizeAt ( 1 ) ,  input - > sizeAt ( 2 ) } ;                   // [bS, iW, iC] -> [bS, 1, iW, iC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                reshapeForOutput   =  { output - > sizeAt ( 0 ) ,  1 ,  output - > sizeAt ( 1 ) ,  output - > sizeAt ( 2 ) } ;                // [bS, oW, oC] -> [bS, 1, oW, oC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                reshapeForInput    =  { input - > sizeAt ( 0 ) ,   input - > sizeAt ( 1 ) ,   1 ,  input - > sizeAt ( 2 ) } ;                 // [bS, iC, iW] -> [bS, iC, 1, iW]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                reshapeForOutput   =  { output - > sizeAt ( 0 ) ,  output - > sizeAt ( 1 ) ,  1 ,  output - > sizeAt ( 2 ) } ;                // [bS, oC, oW] -> [bS, oC, 1, oW]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  inputReshaped    =  input   - > reshape ( input - > ordering ( ) ,    reshapeForInput ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  outputReshaped   =  output  - > reshape ( output - > ordering ( ) ,   reshapeForOutput ,  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  weightsReshaped  =  weights - > reshape ( weights - > ordering ( ) ,  { 1 ,  weights - > sizeAt ( 0 ) ,  weights - > sizeAt ( 1 ) ,  weights - > sizeAt ( 2 ) } ) ;    // [kW, iC, oC] -> [1, kW, iC, oC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            sd : : ops : : conv2d  conv2d ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            const  Nd4jStatus  status  =  conv2d . execute ( { & inputReshaped ,  & weightsReshaped ,  bias } ,  { & outputReshaped } ,  { } ,  { 1 , kW ,   1 , sW ,   0 , pW ,   1 , dW ,   paddingMode ,  ! isNCW ,  wFormat } ,  { } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  ( status  ! =  ND4J_STATUS_OK ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                return  status ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            // ConvolutionUtils::conv2d(block, &inputReshaped, &weightsReshaped, bias, &outputReshaped, 1,kW,  1,sW,  0,pW,  1,dW,  paddingMode, isNCW, wFormat);
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  Status : : OK ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        DECLARE_SHAPE_FN ( conv1d )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  inputShapeInfo    =  inputShape - > at ( 0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  weightsShapeInfo  =  inputShape - > at ( 1 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            Nd4jLong  const *  biasShapeInfo     =  block . width ( )  >  2  ?  inputShape - > at ( 2 )  :  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  kW  =  INT_ARG ( 0 )  >  0  ?  INT_ARG ( 0 )  :  static_cast < int > ( shape : : sizeAt ( weightsShapeInfo ,  0 ) ) ;  // filter(kernel) width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  sW  =  INT_ARG ( 1 ) ;                                                         // strides width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  pW  =  INT_ARG ( 2 ) ;                                                         // paddings width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  dW  =  INT_ARG ( 3 ) ;                                                         // dilations width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  paddingMode  =  INT_ARG ( 4 ) ;                                                // 0-VALID, 1-SAME
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  isNCW   =  block . getIArguments ( ) - > size ( )  >  5  ?  ! INT_ARG ( 5 )  :  1 ;            // INT_ARG(4): 1-NWC, 0-NCW
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  wFormat  =  block . getIArguments ( ) - > size ( )  >  6  ?  INT_ARG ( 6 )  :  0 ;            // 0 - [kW, iC, oC], 1 - [oC, iC, kW], 2 - [oC, kW, iC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  indIOioC ,  indIiW ,  indWoC ( 0  = =  wFormat  ?  2  :  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if ( ! isNCW )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                indIOioC  =  2 ;  indIiW  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                indIOioC  =  1 ;  indIiW  =  2 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            const  int  rank  =  3 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( inputShapeInfo [ 0 ]    = =  rank ,  0 ,  " CUSTOM CONV1D OP: rank of input array must be equal to %i, but got %i instead ! " ,  rank ,  inputShapeInfo ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( weightsShapeInfo [ 0 ]  = =  rank ,  0 ,  " CUSTOM CONV1D OP: rank of weights array must be equal to %i, but got %i instead ! " ,  rank ,  weightsShapeInfo ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  bS  =  inputShapeInfo [ 1 ] ;                          // batch size
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  iW  =  inputShapeInfo [ indIiW + 1 ] ;                    // input width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  iC  =  inputShapeInfo [ indIOioC + 1 ] ;                    // input channels
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  oC  =  weightsShapeInfo [ indWoC + 1 ] ;                  // output channels
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            std : : vector < Nd4jLong >  expectedWeightsShape  =  0  = =  wFormat  ?  std : : vector < Nd4jLong > ( { kW ,  iC ,  oC } )  :  ( 1  = =  wFormat  ?  std : : vector < Nd4jLong > ( { oC ,  iC ,  kW } )  :  std : : vector < Nd4jLong > ( { oC ,  kW ,  iC } ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            //REQUIRE_TRUE(ShapeUtils::areShapesEqual(weightsShapeInfo, expectedWeightsShape), 0, "CUSTOM CONV1D OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weightsShapeInfo).c_str());
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  ( biasShapeInfo ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( biasShapeInfo [ 0 ]  < =  2  & &  oC  = =  shape : : length ( biasShapeInfo ) ,  0 ,  " CUSTOM CONV1D OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead ! " ,  oC ,  biasShapeInfo [ 0 ] ,  shape : : length ( biasShapeInfo ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  oH ,  oW ;                                          // output height, width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ConvolutionUtils : : calcOutSizePool2D ( oH , oW ,   1 , kW ,   1 , sW ,   0 , pW ,   1 , dW ,   1 , iW ,  paddingMode ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            Nd4jLong *  outputShapeInfo  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ALLOCATE ( outputShapeInfo ,  block . getWorkspace ( ) ,  shape : : shapeInfoLength ( rank ) ,  Nd4jLong ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            outputShapeInfo [ 0 ]  =  3 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            outputShapeInfo [ 1 ]  =  bS ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  ( isNCW )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                outputShapeInfo [ 2 ]  =  oC ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                outputShapeInfo [ 3 ]  =  oW ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            }  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                outputShapeInfo [ 2 ]  =  oW ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                outputShapeInfo [ 3 ]  =  oC ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ShapeUtils : : updateStridesAndType ( outputShapeInfo ,  weightsShapeInfo ,  shape : : order ( weightsShapeInfo ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  SHAPELIST ( CONSTANT ( outputShapeInfo ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        DECLARE_TYPES ( conv1d )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            getOpDescriptor ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    - > setAllowedInputTypes ( 0 ,  { ALL_FLOATS ,  ALL_INTS ,  DataType : : QINT8 ,  DataType : : QINT16 } ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    - > setAllowedInputTypes ( 1 ,  { ALL_FLOATS } ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    - > setAllowedInputTypes ( 2 ,  { ALL_FLOATS } ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    - > setAllowedOutputTypes ( 0 ,  { ALL_FLOATS } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-06 15:21:15 +03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								//////////////////////////////////////////////////////////////////////////
  
						 
					
						
							
								
									
										
											 
										
											
												Development updates (#9098)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Fix L2NormalizeVertex and eclipse#9054 (#513)
* update
* Fix L2NormalizeVertex
Fix eclipse#9054
* RL4J: Add async training and advantage actor-critic (#507)
* Added async training & Advantage Actor Critic
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Fix compiler error
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Renamed ActorCriticPolicy back to ACPolicy
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
* Python GIL overhaul (#517)
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Removed dead code (#9057)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* performance improvement (#9055)
* performance improvement
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* revert some changes
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Development updates (#9064)
 * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Cherry pick rl4j changes from most recent KonduitAI/deeplearning4j PR
* Update cherry pick again from last master revision.
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Ag pythongiloverhaul (#518)
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Removed dead code (#9057)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* performance improvement (#9055)
* performance improvement
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* revert some changes
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Development updates (#9064)
 * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Cherry pick rl4j changes from most recent KonduitAI/deeplearning4j PR
* Update cherry pick again from last master revision.
* Re update python4j
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Bump formatter-maven-plugin from 2.0.0 to 2.12.1 (#505)
Bumps [formatter-maven-plugin](https://github.com/revelc/formatter-maven-plugin) from 2.0.0 to 2.12.1.
- [Release notes](https://github.com/revelc/formatter-maven-plugin/releases)
- [Changelog](https://github.com/revelc/formatter-maven-plugin/blob/formatter-maven-plugin-2.12.1/CHANGELOG.md)
- [Commits](https://github.com/revelc/formatter-maven-plugin/compare/formatter-maven-plugin-2.0.0...formatter-maven-plugin-2.12.1)
Signed-off-by: dependabot-preview[bot] <support@dependabot.com>
Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com>
* Ag fix9060 (#519)
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Removed dead code (#9057)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* performance improvement (#9055)
* performance improvement
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* revert some changes
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Development updates (#9064)
 * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Added support for the archunit (#9062)
* Added support for the archunit
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Updated pom files
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Datavec code cleaup (#9071)
* removed unnecessary semicolons
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Use standard charset object
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Removed unused imports
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* WIP: Fix Conv1d causal case
* Add inital tests
* Update Conv1d tests to be a bit more robust
* Remove redundant test
* Reset from master
* Remove cuda definition (left over)
* Update rl4j again
* Update pom.xml
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Fixes 9061 (#521)
* Get rid of edge case in validation
* Added support for the archunit (#9062)
* Added support for the archunit
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Updated pom files
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Using embedded copying of an array instead of manual (#9073)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Datavec bulk operation (#9075)
* Bulk operation can be used instead of iteration inspection
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Redundant 'Collection.addAll()' call inspection
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Removed infinitely loop (#9076)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Revert "Merge eclipse changes" (#526)
* Revert rl4j to 72f5c18c830f62df2c04fbf8dc7b1353cc2d3182 (#527)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Fix L2NormalizeVertex and eclipse#9054 (#513)
* update
* Fix L2NormalizeVertex
Fix eclipse#9054
* RL4J: Add async training and advantage actor-critic (#507)
* Added async training & Advantage Actor Critic
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Fix compiler error
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Renamed ActorCriticPolicy back to ACPolicy
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
* Python GIL overhaul (#517)
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Removed dead code (#9057)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* performance improvement (#9055)
* performance improvement
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* revert some changes
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Development updates (#9064)
 * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Cherry pick rl4j changes from most recent KonduitAI/deeplearning4j PR
* Update cherry pick again from last master revision.
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Ag pythongiloverhaul (#518)
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Removed dead code (#9057)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* performance improvement (#9055)
* performance improvement
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* revert some changes
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Development updates (#9064)
 * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Cherry pick rl4j changes from most recent KonduitAI/deeplearning4j PR
* Update cherry pick again from last master revision.
* Re update python4j
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Bump formatter-maven-plugin from 2.0.0 to 2.12.1 (#505)
Bumps [formatter-maven-plugin](https://github.com/revelc/formatter-maven-plugin) from 2.0.0 to 2.12.1.
- [Release notes](https://github.com/revelc/formatter-maven-plugin/releases)
- [Changelog](https://github.com/revelc/formatter-maven-plugin/blob/formatter-maven-plugin-2.12.1/CHANGELOG.md)
- [Commits](https://github.com/revelc/formatter-maven-plugin/compare/formatter-maven-plugin-2.0.0...formatter-maven-plugin-2.12.1)
Signed-off-by: dependabot-preview[bot] <support@dependabot.com>
Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com>
* Ag fix9060 (#519)
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Removed dead code (#9057)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* performance improvement (#9055)
* performance improvement
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* revert some changes
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Development updates (#9064)
 * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Added support for the archunit (#9062)
* Added support for the archunit
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Updated pom files
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Datavec code cleaup (#9071)
* removed unnecessary semicolons
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Use standard charset object
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Removed unused imports
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* WIP: Fix Conv1d causal case
* Add inital tests
* Update Conv1d tests to be a bit more robust
* Remove redundant test
* Reset from master
* Remove cuda definition (left over)
* Update rl4j again
* Update pom.xml
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Fixes 9061 (#521)
* Get rid of edge case in validation
* Added support for the archunit (#9062)
* Added support for the archunit
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Updated pom files
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Using embedded copying of an array instead of manual (#9073)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Datavec bulk operation (#9075)
* Bulk operation can be used instead of iteration inspection
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Redundant 'Collection.addAll()' call inspection
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Removed infinitely loop (#9076)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* RL4J: Add async training and advantage actor-critic (#507)
* Added async training & Advantage Actor Critic
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Fix compiler error
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Renamed ActorCriticPolicy back to ACPolicy
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
(cherry picked from commit 72f5c18c830f62df2c04fbf8dc7b1353cc2d3182)
* RL4J: Add async training and advantage actor-critic (#507)
* Added async training & Advantage Actor Critic
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Fix compiler error
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Renamed ActorCriticPolicy back to ACPolicy
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
(cherry picked from commit 72f5c18c830f62df2c04fbf8dc7b1353cc2d3182)
* Revert rl4j to 72f5c18c830f62df2c04fbf8dc7b1353cc2d3182
* Delete jnind4jaurora.cpp
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com>
* RL4J: Add partial support for RNN (#514)
* Added partial recurrent support
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Made sure the RNN always see the observation in EpsGreedy
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Converted all line endings of rl4j-core to LF (#530)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* NDJ4: Bundle configuration files required by AOT compilation with GraalVM (#529)
* NDJ4: Bundle configuration files required by AOT compilation with GraalVM
* Update dependencies to just released JavaCPP and JavaCV 1.5.4
* Ag fixtests 831 (#523)
* Update UnderSamplingPreProcessorTest.java
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Development updates (#9064)
 * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Add proper annotation
* Fix classcast exception for recurrent model import case
* Update keras import to allow for proper handling of changing NCHW -> NHWC mid later
* Add output to test to ensure proper activation
* Fixes computation graphs to allow dimension ordering to change mid graph
* Add NHWC support for keras import.
* Update tests to pass /ignore out of date ones
* Add  multi RNNDataformat  support
* Update tests to make more pass.
Updates some tests to be correct, double checked existing models and updated reasons they may or may  not fail.
* Add back old default values to ensure legacy serialization works.  Replace null value default with sentinel value for default value overridden.
* Update layers to preserve changed values
* Exclude default value over ridden from comparison
* Fix conv1d import (no permute weights anymore)
* Update KerasConvolution1D.java
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* GPU compute capability  (#532)
* - GPU cpu capability flags
- CUDA MAJOR VERSION provided by cmake
Signed-off-by: AbdelRauf <rauf@konduit.ai>
* Readme
Signed-off-by: AbdelRauf <rauf@konduit.ai>
* Readme
Signed-off-by: AbdelRauf <rauf@konduit.ai>
* RL4J: Add new network implementation to help support recurrent networks (#531)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com>
Co-authored-by: Abdelrauf <qwr@live.ru>
											 
										 
										
											2020-09-23 19:11:29 +09:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        CUSTOM_OP_IMPL ( conv1d_bp ,  3 ,  2 ,  false ,  0 ,  5 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  input    =  INPUT_VARIABLE ( 0 ) ;                                                 // [bS, iW, iC] (NWC) or [bS, iC, iW] (NCW)
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  weights  =  INPUT_VARIABLE ( 1 ) ;                                                 // [kW, iC, oC], [oC, iC, kW], [oC, kW, iC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  bias     =  block . width ( )  >  3  ?  INPUT_VARIABLE ( 2 )  :  nullptr ;                   // [oC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  gradO    =  block . width ( )  >  3  ?  INPUT_VARIABLE ( 3 )  :  INPUT_VARIABLE ( 2 ) ;         // [bS, oW, oC] (NWC) or [bS, oC, oW] (NCW), epsilon_next
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  gradI  =  OUTPUT_NULLIFIED ( 0 ) ;                                                  // [bS, iW, iC] (NWC) or [bS, iC, iW] (NCW), epsilon
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  gradW  =  OUTPUT_NULLIFIED ( 1 ) ;                                                  // [kW, iC, oC], [oC, iC, kW], [oC, kW, iC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  gradB  =  block . width ( )  >  3  ?  OUTPUT_NULLIFIED ( 2 )  :  nullptr ;                    // [oC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  kW  =  INT_ARG ( 0 )  >  0  ?  INT_ARG ( 0 )  :  static_cast < int > ( weights - > sizeAt ( 0 ) ) ; // filter(kernel) width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  sW  =  INT_ARG ( 1 ) ;                                                         // strides width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  pW  =  INT_ARG ( 2 ) ;                                                         // paddings width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  dW  =  INT_ARG ( 3 ) ;                                                         // dilations width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  paddingMode  =  INT_ARG ( 4 ) ;                                                // 0-VALID, 1-SAME, 2-CAUSAL
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  isNCW   =  block . getIArguments ( ) - > size ( )  >  5  ?  ! INT_ARG ( 5 )  :  1 ;            // INT_ARG(4): 1-NWC, 0-NCW
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  wFormat  =  block . getIArguments ( ) - > size ( )  >  6  ?  INT_ARG ( 6 )  :  0 ;            // 0 - [kW, iC, oC], 1 - [oC, iC, kW], 2 - [oC, kW, iC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            const  int  rank  =  3 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( input - > rankOf ( )    = =  rank ,  0 ,  " CUSTOM CONV1D_BP OP: rank of input array must be equal to %i, but got %i instead ! " ,  rank ,  input - > rankOf ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( weights - > rankOf ( )  = =  rank ,  0 ,  " CUSTOM CONV1D_BP OP: rank of weights array must be equal to %i, but got %i instead ! " ,  rank ,  weights - > rankOf ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( gradO - > rankOf ( )    = =  rank ,  0 ,  " CUSTOM CONV1D_BP OP: rank of output gradients (next epsilon) array must be equal to %i, but got %i instead ! " ,  rank ,  gradO - > rankOf ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  indIOioC ,  indIiW ,  indWoC ( 0  = =  wFormat  ?  2  :  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if ( ! isNCW )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                indIOioC  =  2 ;  indIiW  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                indIOioC  =  1 ;  indIiW  =  2 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            const  int  bS  =  input - > sizeAt ( 0 ) ;                           // batch size
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            const  int  iW  =  input - > sizeAt ( indIiW ) ;                      // input width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            const  int  iC  =  input - > sizeAt ( indIOioC ) ;                    // input channels
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            const  int  oC  =  weights - > sizeAt ( indWoC ) ;                     // output channels
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  trueoH ,  trueoW ;           // true output height, width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ConvolutionUtils : : calcOutSizePool2D ( trueoH , trueoW ,  1 , kW ,  1 , sW ,  0 , pW ,  1 , dW ,  1 , iW ,  paddingMode ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            std : : vector < Nd4jLong >  expectedGradOShape    =  ShapeUtils : : composeShapeUsingDimsAndIdx ( { bS , oC , trueoW ,   0 , indIOioC , indIiW } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            std : : vector < Nd4jLong >  expectedWeightsShape  =  0  = =  wFormat  ?  std : : vector < Nd4jLong > ( { kW ,  iC ,  oC } )  :  ( 1  = =  wFormat  ?  std : : vector < Nd4jLong > ( { oC ,  iC ,  kW } )  :  std : : vector < Nd4jLong > ( { oC ,  kW ,  iC } ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( gradO - > isSameShape ( expectedGradOShape ) ,  0 ,   " CUSTOM CONV1D_BP OP: wrong shape of output gradients (next epsilon) array, expected is %s, but got %s instead ! " ,  ShapeUtils : : shapeAsString ( expectedGradOShape ) . c_str ( ) ,  ShapeUtils : : shapeAsString ( gradO ) . c_str ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( weights - > isSameShape ( expectedWeightsShape ) ,  0 ,  " CUSTOM CONV1D_BP OP: wrong shape of weights array, expected is %s, but got %s instead ! " ,  ShapeUtils : : shapeAsString ( expectedWeightsShape ) . c_str ( ) ,  ShapeUtils : : shapeAsString ( weights ) . c_str ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if ( bias ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( bias - > rankOf ( )  < =  2  & &  oC  = =  bias - > lengthOf ( ) ,  0 ,  " CUSTOM CONV1D_BP OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead ! " ,  oC ,  bias - > rankOf ( ) ,  bias - > lengthOf ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            std : : vector < Nd4jLong >  reshapeForInput ,  reshapeForGradO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if ( ! isNCW )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                reshapeForInput    =  { input - > sizeAt ( 0 ) ,  1 ,  input - > sizeAt ( 1 ) ,  input - > sizeAt ( 2 ) } ;                   // [bS, iW, iC] -> [bS, 1, iW, iC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                reshapeForGradO    =  { gradO - > sizeAt ( 0 ) ,  1 ,  gradO - > sizeAt ( 1 ) ,  gradO - > sizeAt ( 2 ) } ;                   // [bS, oW, oC] -> [bS, 1, oW, oC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                reshapeForInput    =  { input - > sizeAt ( 0 ) ,  input - > sizeAt ( 1 ) ,  1 ,  input - > sizeAt ( 2 ) } ;                   // [bS, iC, iW] -> [bS, iC, 1, iW]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                reshapeForGradO    =  { gradO - > sizeAt ( 0 ) ,  gradO - > sizeAt ( 1 ) ,  1 ,  gradO - > sizeAt ( 2 ) } ;                   // [bS, oC, oW] -> [bS, oC, 1, oW]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  inputReshaped    =  input   - > reshape ( input - > ordering ( ) ,   reshapeForInput ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  gradIReshaped    =  gradI   - > reshape ( gradI - > ordering ( ) ,   reshapeForInput ,  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  gradOReshaped    =  gradO   - > reshape ( gradO - > ordering ( ) ,   reshapeForGradO ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  weightsReshaped  =  weights - > reshape ( weights - > ordering ( ) , { 1 ,  weights - > sizeAt ( 0 ) ,  weights - > sizeAt ( 1 ) ,  weights - > sizeAt ( 2 ) } ) ;        // [kW, iC, oC] -> [1, kW, iC, oC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  gradWReshaped    =  gradW   - > reshape ( gradW - > ordering ( ) ,   { 1 ,  weights - > sizeAt ( 0 ) ,  weights - > sizeAt ( 1 ) ,  weights - > sizeAt ( 2 ) } ,  false ) ; // [kW, iC, oC] -> [1, kW, iC, oC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            sd : : ops : : conv2d_bp  conv2dBP ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  status  =  conv2dBP . execute ( { & inputReshaped ,  & weightsReshaped ,  bias ,  & gradOReshaped } ,  { & gradIReshaped ,  & gradWReshaped ,  gradB } ,  { } ,  { 1 , kW ,   1 , sW ,   0 , pW ,   1 , dW ,   paddingMode ,  ! isNCW ,  wFormat } ,  { } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  ( status  ! =  ND4J_STATUS_OK ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                return  status ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            // ConvolutionUtils::conv2dBP(block, &inputReshaped, &weightsReshaped, bias, &gradOReshaped, &gradIReshaped, &gradWReshaped, gradB, 1,kW,  1,sW,  0,pW,  1,dW,  paddingMode, isNCW, wFormat);
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  Status : : OK ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        DECLARE_SHAPE_FN ( conv1d_bp )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  inputShapeInfo    =  inputShape - > at ( 0 ) ;                                                // [bS, iW, iC] (NWC) or [bS, iC, iW] (NCW)
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  weightsShapeInfo  =  inputShape - > at ( 1 ) ;                                                // [kW, iC, oC], [oC, iC, kW], [oC, kW, iC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            Nd4jLong  const *  biasShapeInfo     =  block . width ( )  >  3  ?  inputShape - > at ( 2 )  :  nullptr ;             // [oC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            Nd4jLong  const *  gradOShapeInfo    =  block . width ( )  >  3  ?  inputShape - > at ( 3 )  :  inputShape - > at ( 2 ) ;   // [bS, oW, oC] (NWC) or [bS, oC, oW] (NCW), epsilon_next
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            const  int  rank  =  3 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( inputShapeInfo [ 0 ]    = =  rank ,  0 ,  " CUSTOM CONV1D_BP OP: rank of input array must be equal to %i, but got %i instead ! " ,  rank ,  inputShapeInfo [ 0 ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( weightsShapeInfo [ 0 ]  = =  rank ,  0 ,  " CUSTOM CONV1D_BP OP: rank of weights array must be equal to %i, but got %i instead ! " ,  rank ,  weightsShapeInfo [ 0 ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( gradOShapeInfo [ 0 ]    = =  rank ,  0 ,  " CUSTOM CONV1D_BP OP: rank of output gradients (next epsilon) array must be equal to %i, but got %i instead ! " ,  rank ,  gradOShapeInfo [ 0 ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  kW  =  INT_ARG ( 0 )  >  0  ?  INT_ARG ( 0 )  :  static_cast < int > ( shape : : sizeAt ( weightsShapeInfo ,  0 ) ) ; // filter(kernel) width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  sW  =  INT_ARG ( 1 ) ;                                                         // strides width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  pW  =  INT_ARG ( 2 ) ;                                                         // paddings width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  dW  =  INT_ARG ( 3 ) ;                                                         // dilations width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  paddingMode  =  INT_ARG ( 4 ) ;                                                // 0-VALID, 1-SAME
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  isNCW   =  block . getIArguments ( ) - > size ( )  >  5  ?  ! INT_ARG ( 5 )  :  1 ;            // INT_ARG(4): 1-NWC, 0-NCW
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  wFormat  =  block . getIArguments ( ) - > size ( )  >  6  ?  INT_ARG ( 6 )  :  0 ;            // 0 - [kW, iC, oC], 1 - [oC, iC, kW], 2 - [oC, kW, iC]
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  indIOioC ,  indIiW ,  indWoC ( 0  = =  wFormat  ?  2  :  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if ( ! isNCW )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                indIOioC  =  2 ;  indIiW  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                indIOioC  =  1 ;  indIiW  =  2 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            const  int  bS  =  inputShapeInfo [ 1 ] ;                             // batch size
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            const  int  iW  =  inputShapeInfo [ indIiW + 1 ] ;                      // input width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            const  int  iC  =  inputShapeInfo [ indIOioC + 1 ] ;                    // input channels
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            const  int  oC  =  weightsShapeInfo [ indWoC + 1 ] ;                    // output channels
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            int  trueoH ,  trueoW ;           // true output height, width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ConvolutionUtils : : calcOutSizePool2D ( trueoH , trueoW ,  1 , kW ,  1 , sW ,  0 , pW ,  1 , dW ,  1 , iW ,  paddingMode ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            std : : vector < Nd4jLong >  expectedGradOShape    =  ShapeUtils : : composeShapeUsingDimsAndIdx ( { bS , oC , trueoW ,   0 , indIOioC , indIiW } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            std : : vector < Nd4jLong >  expectedWeightsShape  =  0  = =  wFormat  ?  std : : vector < Nd4jLong > ( { kW ,  iC ,  oC } )  :  ( 1  = =  wFormat  ?  std : : vector < Nd4jLong > ( { oC ,  iC ,  kW } )  :  std : : vector < Nd4jLong > ( { oC ,  kW ,  iC } ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( ShapeUtils : : areShapesEqual ( gradOShapeInfo ,  expectedGradOShape ) ,  0 ,   " CUSTOM CONV1D_BP OP: wrong shape of output gradients (next epsilon) array, expected is %s, but got %s instead ! " ,  ShapeUtils : : shapeAsString ( expectedGradOShape ) . c_str ( ) ,  ShapeUtils : : shapeAsString ( gradOShapeInfo ) . c_str ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( ShapeUtils : : areShapesEqual ( weightsShapeInfo ,  expectedWeightsShape ) ,  0 ,  " CUSTOM CONV1D_BP OP: wrong shape of weights array, expected is %s, but got %s instead ! " ,  ShapeUtils : : shapeAsString ( expectedWeightsShape ) . c_str ( ) ,  ShapeUtils : : shapeAsString ( weightsShapeInfo ) . c_str ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if ( biasShapeInfo ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            REQUIRE_TRUE ( biasShapeInfo [ 0 ]  < =  2  & &  oC  = =  shape : : length ( biasShapeInfo ) ,  0 ,  " CUSTOM CONV1D_BP OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead ! " ,  oC ,  biasShapeInfo [ 0 ] ,  shape : : length ( biasShapeInfo ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  gradIshapeInfo  =  ShapeBuilders : : copyShapeInfoAndType ( inputShapeInfo ,   gradOShapeInfo ,  false ,  block . getWorkspace ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            auto  gradWshapeInfo  =  ShapeBuilders : : copyShapeInfoAndType ( weightsShapeInfo ,  gradOShapeInfo ,  false ,  block . getWorkspace ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if ( biasShapeInfo )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                auto  gradBshapeInfo  =  ShapeBuilders : : copyShapeInfoAndType ( biasShapeInfo ,   gradOShapeInfo ,  false ,  block . getWorkspace ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                return  SHAPELIST ( CONSTANT ( gradIshapeInfo ) ,  CONSTANT ( gradWshapeInfo ) ,  CONSTANT ( gradBshapeInfo ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  SHAPELIST ( CONSTANT ( gradIshapeInfo ) ,  CONSTANT ( gradWshapeInfo ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        DECLARE_TYPES ( conv1d_bp )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            getOpDescriptor ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    - > setAllowedInputTypes ( 0 ,  { ALL_FLOATS ,  ALL_INTS ,  DataType : : QINT8 ,  DataType : : QINT16 } ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    - > setAllowedInputTypes ( 1 ,  { ALL_FLOATS } ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    - > setAllowedInputTypes ( 2 ,  { ALL_FLOATS } ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    - > setAllowedInputTypes ( 3 ,  { ALL_FLOATS } ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    - > setAllowedOutputTypes ( 0 ,  { ALL_FLOATS } ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    - > setAllowedOutputTypes ( 1 ,  { ALL_FLOATS } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-06 15:21:15 +03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif