2019-06-06 14:21:15 +02:00
|
|
|
/*******************************************************************************
|
|
|
|
* Copyright (c) 2015-2018 Skymind, Inc.
|
|
|
|
*
|
|
|
|
* This program and the accompanying materials are made available under the
|
|
|
|
* terms of the Apache License, Version 2.0 which is available at
|
|
|
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
|
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
|
|
* License for the specific language governing permissions and limitations
|
|
|
|
* under the License.
|
|
|
|
*
|
|
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
|
|
******************************************************************************/
|
|
|
|
|
|
|
|
//
|
|
|
|
// @author raver119@gmail.com
|
|
|
|
//
|
|
|
|
|
|
|
|
#ifndef LIBND4J_PLATFORM_MATH_H
|
|
|
|
#define LIBND4J_PLATFORM_MATH_H
|
|
|
|
|
|
|
|
#include <math.h>
|
|
|
|
#include <cmath>
|
2020-03-02 10:49:41 +01:00
|
|
|
#include <system/op_boilerplate.h>
|
2019-06-06 14:21:15 +02:00
|
|
|
#include <types/types.h>
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
#include <types/float16.h>
|
|
|
|
#include <types/bfloat16.h>
|
|
|
|
|
|
|
|
union BPAIR {
|
|
|
|
struct {
|
|
|
|
bfloat16 H;
|
|
|
|
bfloat16 L;
|
|
|
|
} B;
|
|
|
|
int W;
|
|
|
|
|
|
|
|
__host__ __device__
|
|
|
|
BPAIR() {};
|
|
|
|
|
|
|
|
__host__ __device__
|
|
|
|
~BPAIR() {};
|
|
|
|
};
|
|
|
|
|
|
|
|
#define math_def __host__ __device__
|
Development updates (#9098)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Fix L2NormalizeVertex and eclipse#9054 (#513)
* update
* Fix L2NormalizeVertex
Fix eclipse#9054
* RL4J: Add async training and advantage actor-critic (#507)
* Added async training & Advantage Actor Critic
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Fix compiler error
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Renamed ActorCriticPolicy back to ACPolicy
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
* Python GIL overhaul (#517)
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Removed dead code (#9057)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* performance improvement (#9055)
* performance improvement
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* revert some changes
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Development updates (#9064)
* Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Cherry pick rl4j changes from most recent KonduitAI/deeplearning4j PR
* Update cherry pick again from last master revision.
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Ag pythongiloverhaul (#518)
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Removed dead code (#9057)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* performance improvement (#9055)
* performance improvement
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* revert some changes
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Development updates (#9064)
* Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Cherry pick rl4j changes from most recent KonduitAI/deeplearning4j PR
* Update cherry pick again from last master revision.
* Re update python4j
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Bump formatter-maven-plugin from 2.0.0 to 2.12.1 (#505)
Bumps [formatter-maven-plugin](https://github.com/revelc/formatter-maven-plugin) from 2.0.0 to 2.12.1.
- [Release notes](https://github.com/revelc/formatter-maven-plugin/releases)
- [Changelog](https://github.com/revelc/formatter-maven-plugin/blob/formatter-maven-plugin-2.12.1/CHANGELOG.md)
- [Commits](https://github.com/revelc/formatter-maven-plugin/compare/formatter-maven-plugin-2.0.0...formatter-maven-plugin-2.12.1)
Signed-off-by: dependabot-preview[bot] <support@dependabot.com>
Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com>
* Ag fix9060 (#519)
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Removed dead code (#9057)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* performance improvement (#9055)
* performance improvement
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* revert some changes
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Development updates (#9064)
* Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Added support for the archunit (#9062)
* Added support for the archunit
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Updated pom files
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Datavec code cleaup (#9071)
* removed unnecessary semicolons
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Use standard charset object
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Removed unused imports
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* WIP: Fix Conv1d causal case
* Add inital tests
* Update Conv1d tests to be a bit more robust
* Remove redundant test
* Reset from master
* Remove cuda definition (left over)
* Update rl4j again
* Update pom.xml
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Fixes 9061 (#521)
* Get rid of edge case in validation
* Added support for the archunit (#9062)
* Added support for the archunit
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Updated pom files
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Using embedded copying of an array instead of manual (#9073)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Datavec bulk operation (#9075)
* Bulk operation can be used instead of iteration inspection
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Redundant 'Collection.addAll()' call inspection
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Removed infinitely loop (#9076)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Revert "Merge eclipse changes" (#526)
* Revert rl4j to 72f5c18c830f62df2c04fbf8dc7b1353cc2d3182 (#527)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Fix L2NormalizeVertex and eclipse#9054 (#513)
* update
* Fix L2NormalizeVertex
Fix eclipse#9054
* RL4J: Add async training and advantage actor-critic (#507)
* Added async training & Advantage Actor Critic
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Fix compiler error
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Renamed ActorCriticPolicy back to ACPolicy
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
* Python GIL overhaul (#517)
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Removed dead code (#9057)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* performance improvement (#9055)
* performance improvement
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* revert some changes
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Development updates (#9064)
* Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Cherry pick rl4j changes from most recent KonduitAI/deeplearning4j PR
* Update cherry pick again from last master revision.
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Ag pythongiloverhaul (#518)
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Removed dead code (#9057)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* performance improvement (#9055)
* performance improvement
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* revert some changes
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Development updates (#9064)
* Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Cherry pick rl4j changes from most recent KonduitAI/deeplearning4j PR
* Update cherry pick again from last master revision.
* Re update python4j
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Bump formatter-maven-plugin from 2.0.0 to 2.12.1 (#505)
Bumps [formatter-maven-plugin](https://github.com/revelc/formatter-maven-plugin) from 2.0.0 to 2.12.1.
- [Release notes](https://github.com/revelc/formatter-maven-plugin/releases)
- [Changelog](https://github.com/revelc/formatter-maven-plugin/blob/formatter-maven-plugin-2.12.1/CHANGELOG.md)
- [Commits](https://github.com/revelc/formatter-maven-plugin/compare/formatter-maven-plugin-2.0.0...formatter-maven-plugin-2.12.1)
Signed-off-by: dependabot-preview[bot] <support@dependabot.com>
Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com>
* Ag fix9060 (#519)
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Removed dead code (#9057)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* performance improvement (#9055)
* performance improvement
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* revert some changes
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Development updates (#9064)
* Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Added support for the archunit (#9062)
* Added support for the archunit
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Updated pom files
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Datavec code cleaup (#9071)
* removed unnecessary semicolons
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Use standard charset object
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Removed unused imports
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* WIP: Fix Conv1d causal case
* Add inital tests
* Update Conv1d tests to be a bit more robust
* Remove redundant test
* Reset from master
* Remove cuda definition (left over)
* Update rl4j again
* Update pom.xml
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* Fixes 9061 (#521)
* Get rid of edge case in validation
* Added support for the archunit (#9062)
* Added support for the archunit
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Updated pom files
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Using embedded copying of an array instead of manual (#9073)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Datavec bulk operation (#9075)
* Bulk operation can be used instead of iteration inspection
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Redundant 'Collection.addAll()' call inspection
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
* Removed infinitely loop (#9076)
Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
* RL4J: Add async training and advantage actor-critic (#507)
* Added async training & Advantage Actor Critic
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Fix compiler error
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Renamed ActorCriticPolicy back to ACPolicy
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
(cherry picked from commit 72f5c18c830f62df2c04fbf8dc7b1353cc2d3182)
* RL4J: Add async training and advantage actor-critic (#507)
* Added async training & Advantage Actor Critic
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Fix compiler error
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Renamed ActorCriticPolicy back to ACPolicy
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
(cherry picked from commit 72f5c18c830f62df2c04fbf8dc7b1353cc2d3182)
* Revert rl4j to 72f5c18c830f62df2c04fbf8dc7b1353cc2d3182
* Delete jnind4jaurora.cpp
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com>
* RL4J: Add partial support for RNN (#514)
* Added partial recurrent support
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Made sure the RNN always see the observation in EpsGreedy
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Converted all line endings of rl4j-core to LF (#530)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* NDJ4: Bundle configuration files required by AOT compilation with GraalVM (#529)
* NDJ4: Bundle configuration files required by AOT compilation with GraalVM
* Update dependencies to just released JavaCPP and JavaCV 1.5.4
* Ag fixtests 831 (#523)
* Update UnderSamplingPreProcessorTest.java
* Development updates (#9053)
* RL4J: Add generic update rule (#502)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
* Shyrma reduce (#481)
* - start working on improving of cpu legacy code for reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving legacy loops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - still working on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further work on improving reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - testing speed run of new reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - working on improvement of default loop for reduce op
Signed-off-by: Yurii <iuriish@yahoo.com>
* - update signatures of stuff which calls reduce ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - make corrections in cuda reduce kernels
Signed-off-by: Yurii <iuriish@yahoo.com>
* - change loop for default case in broadcast legacy ops
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment some shape stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - comment unnecessary prints in RNGtests
Signed-off-by: Yurii <iuriish@yahoo.com>
* - finish to resolve conflicts after master has been merged
Signed-off-by: Yurii <iuriish@yahoo.com>
* - get rid of some compilation mistakes of cuda stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor changes
Signed-off-by: Yurii <iuriish@yahoo.com>
* - further search for bug causing crash on java test
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add scalar case in reduce_ ... exec stuff
Signed-off-by: Yurii <iuriish@yahoo.com>
* - minor corrections in NAtiveOps.cu
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add switch to scalar case execReduceXD functions
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
* - correct cuda mirrorPad
Signed-off-by: Yurii <iuriish@yahoo.com>
* - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce
Signed-off-by: Yurii <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
* Add support for CUDA 11.0 (#492)
* Add support for CUDA 11.0
* libnd4j tweaks for CUDA 11
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* bindings update, again?
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy
* update API to match CUDA 8
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* * Update version of JavaCPP Presets for CPython
* C++ updated for cuDNN 8.0
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one more test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* 128-bit alignment for workspaces
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* change seed in 1 test
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Fix dependecy duplication in python4j-parent pom
* Fix group id for in python4j-numpy
* few tests tweaked
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* Remove macosx-x86_64-gpu from nd4j-tests-tensorflow
* few minor tweaks for IndexReduce
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
* one test removed
Signed-off-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: raver119@gmail.com <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* Development updates (#9064)
* Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL
Signed-off-by: Samuel Audet <samuel.audet@gmail.com>
* Add proper annotation
* Fix classcast exception for recurrent model import case
* Update keras import to allow for proper handling of changing NCHW -> NHWC mid later
* Add output to test to ensure proper activation
* Fixes computation graphs to allow dimension ordering to change mid graph
* Add NHWC support for keras import.
* Update tests to pass /ignore out of date ones
* Add multi RNNDataformat support
* Update tests to make more pass.
Updates some tests to be correct, double checked existing models and updated reasons they may or may not fail.
* Add back old default values to ensure legacy serialization works. Replace null value default with sentinel value for default value overridden.
* Update layers to preserve changed values
* Exclude default value over ridden from comparison
* Fix conv1d import (no permute weights anymore)
* Update KerasConvolution1D.java
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
* GPU compute capability (#532)
* - GPU cpu capability flags
- CUDA MAJOR VERSION provided by cmake
Signed-off-by: AbdelRauf <rauf@konduit.ai>
* Readme
Signed-off-by: AbdelRauf <rauf@konduit.ai>
* Readme
Signed-off-by: AbdelRauf <rauf@konduit.ai>
* RL4J: Add new network implementation to help support recurrent networks (#531)
Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com>
Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com>
Co-authored-by: Yurii Shyrma <iuriish@yahoo.com>
Co-authored-by: raver119 <raver119@gmail.com>
Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com>
Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com>
Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com>
Co-authored-by: Abdelrauf <qwr@live.ru>
2020-09-23 12:11:29 +02:00
|
|
|
#if CUDA_VERSION_MAJOR == 8
|
2019-06-06 14:21:15 +02:00
|
|
|
typedef union {
|
|
|
|
struct {
|
|
|
|
half H;
|
|
|
|
half L;
|
|
|
|
} B;
|
|
|
|
int W;
|
|
|
|
} PAIR;
|
|
|
|
#else
|
|
|
|
struct HALFS{
|
|
|
|
half H;
|
|
|
|
half L;
|
|
|
|
|
|
|
|
__host__ __device__
|
|
|
|
HALFS() {};
|
|
|
|
|
|
|
|
__host__ __device__
|
|
|
|
~HALFS() {};
|
|
|
|
};
|
|
|
|
union PAIR {
|
|
|
|
HALFS B;
|
|
|
|
int W;
|
|
|
|
|
|
|
|
__host__ __device__
|
|
|
|
PAIR() {};
|
|
|
|
|
|
|
|
__host__ __device__
|
|
|
|
~PAIR(){}
|
|
|
|
|
|
|
|
};
|
|
|
|
#endif // cuda_9
|
|
|
|
|
|
|
|
#else
|
|
|
|
#define math_def
|
|
|
|
#include <types/float16.h>
|
2020-02-21 12:31:00 +01:00
|
|
|
|
2019-06-06 14:21:15 +02:00
|
|
|
#endif
|
|
|
|
|
|
|
|
|
2020-03-02 10:49:41 +01:00
|
|
|
namespace sd {
|
2019-06-06 14:21:15 +02:00
|
|
|
namespace math {
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_exp(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_log(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_floor(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_ceil(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_round(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_cos(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_cosh(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_acos(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_acosh(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_sin(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_sinh(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_asin(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_sqrt(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_tanh(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_erf(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_erfc(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_atan(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_tan(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_atanh(T value);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_rint(T value);
|
|
|
|
|
2020-02-21 12:31:00 +01:00
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_rotl(T value, T shift);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_rotr(T value, T shift);
|
|
|
|
|
2019-06-06 14:21:15 +02:00
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_remainder(T val1, T val2);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_fmod(T val1, T val2);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_pow(T value, T power);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_atan2(T val1, T val2);
|
|
|
|
|
|
|
|
//////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_exp(float value) {
|
|
|
|
return expf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_exp(float16 val) {
|
|
|
|
#ifdef NATIVE_HALFS
|
|
|
|
return hexp(val.data);
|
|
|
|
#else
|
|
|
|
return static_cast<float16>(expf((float) val));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_exp(bfloat16 val) {
|
|
|
|
return static_cast<bfloat16>(expf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_exp(double value) {
|
|
|
|
return exp(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_exp(T value) {
|
|
|
|
return static_cast<T>(expf(static_cast<float>(value)));
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_pow(float16 value, float16 power) {
|
|
|
|
return static_cast<float16>(powf(static_cast<float>(value), static_cast<float>(power)));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_pow(bfloat16 value, bfloat16 power) {
|
|
|
|
return static_cast<bfloat16>(powf(static_cast<float>(value), static_cast<float>(power)));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_pow(float value, float power) {
|
|
|
|
return powf(value, power);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_pow(double value, double power) {
|
|
|
|
return pow(value, power);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_pow(T value, T power) {
|
|
|
|
return static_cast<T>(powf(static_cast<float>(value), static_cast<float>(power)));
|
|
|
|
}
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_fmod(float16 value, float16 power) {
|
|
|
|
return static_cast<float16>(fmodf(static_cast<float>(value), static_cast<float>(power)));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_fmod(bfloat16 value, bfloat16 power) {
|
|
|
|
return static_cast<bfloat16>(fmodf(static_cast<float>(value), static_cast<float>(power)));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_fmod(float value, float power) {
|
|
|
|
return fmodf(value, power);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_fmod(double value, double power) {
|
|
|
|
return fmod(value, power);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_fmod(T value, T power) {
|
|
|
|
return static_cast<T>(fmodf(static_cast<float>(value), static_cast<float>(power)));
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_atan2(float16 value, float16 power) {
|
|
|
|
return static_cast<float16>(atan2f(static_cast<float>(value), static_cast<float>(power)));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_atan2(float value, float power) {
|
|
|
|
return atan2f(value, power);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_atan2(double value, double power) {
|
|
|
|
return atan2(value, power);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_atan2(T value, T power) {
|
|
|
|
return static_cast<T>(atan2f(static_cast<float>(value), static_cast<float>(power)));
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_remainder(float16 value, float16 power) {
|
|
|
|
return static_cast<float16>(remainderf(static_cast<float>(value), static_cast<float>(power)));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_remainder(float value, float power) {
|
|
|
|
return remainderf(value, power);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_remainder(double value, double power) {
|
|
|
|
return remainder(value, power);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_remainder(T value, T power) {
|
|
|
|
return static_cast<T>(remainderf(static_cast<float>(value), static_cast<float>(power)));
|
|
|
|
}
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_log(float value) {
|
|
|
|
return logf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_log(float16 val) {
|
|
|
|
#ifdef NATIVE_HALFS
|
|
|
|
return hlog(val.data);
|
|
|
|
#else
|
|
|
|
return static_cast<float16>(logf((float) val));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_log(double value) {
|
|
|
|
return log(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_log(T value) {
|
|
|
|
return static_cast<T>(logf(static_cast<float>(value)));
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_floor(float value) {
|
|
|
|
return floorf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_floor(float16 val) {
|
|
|
|
#ifdef NATIVE_HALFS
|
|
|
|
return hfloor(val.data);
|
|
|
|
#else
|
|
|
|
return static_cast<float16>(floorf((float) val));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2020-02-18 15:01:43 +01:00
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_floor(bfloat16 value) {
|
|
|
|
return static_cast<bfloat16>(floorf((float)value));
|
|
|
|
}
|
|
|
|
|
2019-06-06 14:21:15 +02:00
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_floor(double value) {
|
|
|
|
return floor(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_floor(T value) {
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_ceil(float value) {
|
|
|
|
return ceilf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_ceil(float16 val) {
|
|
|
|
#ifdef NATIVE_HALFS
|
|
|
|
return hceil(val.data);
|
|
|
|
#else
|
|
|
|
return static_cast<float16>(ceilf((float) val));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2020-02-18 15:01:43 +01:00
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_ceil(bfloat16 value) {
|
|
|
|
return static_cast<bfloat16>(ceilf((float)value));
|
|
|
|
}
|
|
|
|
|
2019-06-06 14:21:15 +02:00
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_ceil(double value) {
|
|
|
|
return ceil(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_ceil(T value) {
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_round(float value) {
|
|
|
|
return roundf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_round(float16 val) {
|
|
|
|
return static_cast<float16>(roundf((float) val));
|
|
|
|
}
|
|
|
|
|
2020-02-18 15:01:43 +01:00
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_round(bfloat16 value) {
|
|
|
|
return static_cast<bfloat16>(roundf((float)value));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-06-06 14:21:15 +02:00
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_round(double value) {
|
|
|
|
return round(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_round(T value) {
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_rint(float value) {
|
|
|
|
return rintf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_rint(float16 val) {
|
|
|
|
#ifdef NATIVE_HALFS
|
|
|
|
return hrint(val.data);
|
|
|
|
#else
|
|
|
|
return static_cast<float16>(rintf((float) val));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_rint(bfloat16 val) {
|
|
|
|
return static_cast<bfloat16>(rintf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_rint(double value) {
|
|
|
|
return rint(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_rint(T value) {
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_cos(float value) {
|
|
|
|
return cosf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_cos(float16 val) {
|
|
|
|
#ifdef NATIVE_HALFS
|
|
|
|
return hcos(val.data);
|
|
|
|
#else
|
|
|
|
return static_cast<float16>(cosf((float) val));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_cos(bfloat16 val) {
|
|
|
|
return static_cast<bfloat16>(cosf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_cos(double value) {
|
|
|
|
return cos(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_sin(float value) {
|
|
|
|
return sinf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_sin(float16 val) {
|
|
|
|
#ifdef NATIVE_HALFS
|
|
|
|
return hsin(val.data);
|
|
|
|
#else
|
|
|
|
return static_cast<float16>(sinf((float) val));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_sin(bfloat16 val) {
|
|
|
|
return static_cast<bfloat16>(sinf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_sin(double value) {
|
|
|
|
return sin(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_sqrt(float value) {
|
|
|
|
return sqrtf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_sqrt(float16 val) {
|
|
|
|
#ifdef NATIVE_HALFS
|
|
|
|
return hsqrt(val.data);
|
|
|
|
#else
|
|
|
|
return static_cast<float16>(sqrtf((float) val));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_sqrt(bfloat16 val) {
|
|
|
|
return static_cast<float16>(sqrtf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_sqrt(double value) {
|
|
|
|
return sqrt(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_tanh(float value) {
|
|
|
|
return tanhf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_tanh(float16 val) {
|
|
|
|
return static_cast<float16>(tanhf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_tanh(bfloat16 val) {
|
|
|
|
return static_cast<bfloat16>(tanhf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_tanh(double value) {
|
|
|
|
return tanh(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_erf(float value) {
|
|
|
|
return erff(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_erf(float16 val) {
|
|
|
|
return static_cast<float16>(erff((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_erf(bfloat16 val) {
|
|
|
|
return static_cast<bfloat16>(erff((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_erf(double value) {
|
|
|
|
return erf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_erfc(float value) {
|
|
|
|
return erfcf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_erfc(float16 val) {
|
|
|
|
return static_cast<float16>(erfcf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_erfc(bfloat16 val) {
|
|
|
|
return static_cast<bfloat16>(erfcf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_erfc(double value) {
|
|
|
|
return erfc(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_acos(float value) {
|
|
|
|
return acosf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_acos(float16 val) {
|
|
|
|
return static_cast<float16>(acosf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_acos(bfloat16 val) {
|
|
|
|
return static_cast<bfloat16>(acosf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_acos(double value) {
|
|
|
|
return acos(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_sinh(float value) {
|
|
|
|
return sinhf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_sinh(float16 val) {
|
|
|
|
return static_cast<float16>(sinhf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_sinh(bfloat16 val) {
|
|
|
|
return static_cast<bfloat16>(sinhf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_sinh(double value) {
|
|
|
|
return sinh(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_acosh(float value) {
|
|
|
|
return acoshf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_acosh(float16 val) {
|
|
|
|
return static_cast<float16>(acoshf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_acosh(bfloat16 val) {
|
|
|
|
return static_cast<bfloat16>(acoshf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_acosh(double value) {
|
|
|
|
return acosh(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_cosh(float value) {
|
|
|
|
return coshf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_cosh(float16 val) {
|
|
|
|
return static_cast<float16>(coshf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_cosh(bfloat16 val) {
|
|
|
|
return static_cast<bfloat16>(coshf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_cosh(double value) {
|
|
|
|
return cosh(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_asin(float value) {
|
|
|
|
return asinf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_asin(float16 val) {
|
|
|
|
return static_cast<float16>(asinf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_asin(bfloat16 val) {
|
|
|
|
return static_cast<bfloat16>(asinf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_asin(double value) {
|
|
|
|
return asin(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_atan(float value) {
|
|
|
|
return atanf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_atan(float16 val) {
|
|
|
|
return static_cast<float16>(atanf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_atan(bfloat16 val) {
|
|
|
|
return static_cast<bfloat16>(atanf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_atan(double value) {
|
|
|
|
return atan(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_tan(float value) {
|
|
|
|
return tanf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_tan(float16 val) {
|
|
|
|
return static_cast<float16>(tanf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_tan(bfloat16 val) {
|
|
|
|
return static_cast<bfloat16>(tanf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_tan(double value) {
|
|
|
|
return tan(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float p_atanh(float value) {
|
|
|
|
return atanhf(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE float16 p_atanh(float16 val) {
|
|
|
|
return static_cast<float16>(atanhf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE bfloat16 p_atanh(bfloat16 val) {
|
|
|
|
return static_cast<bfloat16>(atanhf((float) val));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE double p_atanh(double value) {
|
|
|
|
return atanh(value);
|
|
|
|
}
|
2020-02-21 12:31:00 +01:00
|
|
|
|
|
|
|
/////////
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T _rotate_left(T value, T shift);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T _rotate_right(T value, T shift);
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE int8_t _rotate_left(int8_t value, int8_t shift) {
|
|
|
|
return value << shift | value >> (8 - shift);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE int8_t _rotate_right(int8_t value, int8_t shift) {
|
|
|
|
return value >> shift | value << (8 - shift);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE uint8_t _rotate_left(uint8_t value, uint8_t shift) {
|
|
|
|
return value << shift | value >> (8 - shift);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE uint8_t _rotate_right(uint8_t value, uint8_t shift) {
|
|
|
|
return value >> shift | value << (8 - shift);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE int16_t _rotate_left(int16_t value, int16_t shift) {
|
|
|
|
return value << shift | value >> (16 - shift);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE int16_t _rotate_right(int16_t value, int16_t shift) {
|
|
|
|
return value >> shift | value << (16 - shift);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE uint16_t _rotate_left(uint16_t value, uint16_t shift) {
|
|
|
|
return value << shift | value >> (16 - shift);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE uint16_t _rotate_right(uint16_t value, uint16_t shift) {
|
|
|
|
return value >> shift | value << (16 - shift);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE int _rotate_left(int value, int shift) {
|
|
|
|
return value << shift | value >> (32 - shift);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE int _rotate_right(int value, int shift) {
|
|
|
|
return value >> shift | value << (32 - shift);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE uint32_t _rotate_left(uint32_t value, uint32_t shift) {
|
|
|
|
return value << shift | value >> (32 - shift);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE uint32_t _rotate_right(uint32_t value, uint32_t shift) {
|
|
|
|
return value >> shift | value << (32 - shift);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE Nd4jLong _rotate_left(Nd4jLong value, Nd4jLong shift) {
|
|
|
|
return value << shift | value >> (64 - shift);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE Nd4jLong _rotate_right(Nd4jLong value, Nd4jLong shift) {
|
|
|
|
return value >> shift | value << (64 - shift);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE uint64_t _rotate_left(uint64_t value, uint64_t shift) {
|
2020-03-02 10:49:41 +01:00
|
|
|
#ifdef SD_ARM_BUILD
|
2020-02-21 12:31:00 +01:00
|
|
|
// TODO: eventually remove this once gcc fixes the bug
|
|
|
|
Nd4jLong val = _rotate_left<Nd4jLong>(*reinterpret_cast<Nd4jLong *>(&value), *reinterpret_cast<Nd4jLong *>(&shift));
|
|
|
|
return *reinterpret_cast<uint64_t *>(&val);
|
|
|
|
#else
|
|
|
|
return value << shift | value >> (64 - shift);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
math_def FORCEINLINE uint64_t _rotate_right(uint64_t value, uint64_t shift) {
|
2020-03-02 10:49:41 +01:00
|
|
|
#ifdef SD_ARM_BUILD
|
2020-02-21 12:31:00 +01:00
|
|
|
// TODO: eventually remove this once gcc fixes the bug
|
|
|
|
Nd4jLong val = _rotate_right<Nd4jLong>(*reinterpret_cast<Nd4jLong *>(&value), *reinterpret_cast<Nd4jLong *>(&shift));
|
|
|
|
return *reinterpret_cast<uint64_t *>(&val);
|
|
|
|
#else
|
|
|
|
return value >> shift | value << (64 - shift);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_rotl(T value, T shift) {
|
|
|
|
return _rotate_left<T>(value, shift);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
math_def FORCEINLINE T p_rotr(T value, T shift) {
|
|
|
|
return _rotate_right<T>(value, shift);
|
|
|
|
}
|
2019-06-06 14:21:15 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif //DEV_TESTS_PLATFORM_MATH_H
|