cavis/libnd4j/include/math/platformmath.h

875 lines
23 KiB
C
Raw Normal View History

2019-06-06 14:21:15 +02:00
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
//
// @author raver119@gmail.com
//
#ifndef LIBND4J_PLATFORM_MATH_H
#define LIBND4J_PLATFORM_MATH_H
#include <math.h>
#include <cmath>
#include <system/op_boilerplate.h>
2019-06-06 14:21:15 +02:00
#include <types/types.h>
#ifdef __CUDACC__
#include <types/float16.h>
#include <types/bfloat16.h>
union BPAIR {
struct {
bfloat16 H;
bfloat16 L;
} B;
int W;
__host__ __device__
BPAIR() {};
__host__ __device__
~BPAIR() {};
};
#define math_def __host__ __device__
Development updates (#9098) * RL4J: Add generic update rule (#502) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> * Shyrma reduce (#481) * - start working on improving of cpu legacy code for reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on improving legacy loops Signed-off-by: Yurii <iuriish@yahoo.com> * - still working on improving reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on improving reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - testing speed run of new reduce op Signed-off-by: Yurii <iuriish@yahoo.com> * - working on improvement of default loop for reduce op Signed-off-by: Yurii <iuriish@yahoo.com> * - update signatures of stuff which calls reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - make corrections in cuda reduce kernels Signed-off-by: Yurii <iuriish@yahoo.com> * - change loop for default case in broadcast legacy ops Signed-off-by: Yurii <iuriish@yahoo.com> * - comment some shape stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - comment unnecessary prints in RNGtests Signed-off-by: Yurii <iuriish@yahoo.com> * - finish to resolve conflicts after master has been merged Signed-off-by: Yurii <iuriish@yahoo.com> * - get rid of some compilation mistakes of cuda stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - minor changes Signed-off-by: Yurii <iuriish@yahoo.com> * - further search for bug causing crash on java test Signed-off-by: Yurii <iuriish@yahoo.com> * - add scalar case in reduce_ ... exec stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - minor corrections in NAtiveOps.cu Signed-off-by: Yurii <iuriish@yahoo.com> * - add switch to scalar case execReduceXD functions Signed-off-by: Yurii <iuriish@yahoo.com> * - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce Signed-off-by: Yurii <iuriish@yahoo.com> * - correct cuda mirrorPad Signed-off-by: Yurii <iuriish@yahoo.com> * - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce Signed-off-by: Yurii <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> * Add support for CUDA 11.0 (#492) * Add support for CUDA 11.0 * libnd4j tweaks for CUDA 11 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * bindings update, again? Signed-off-by: raver119@gmail.com <raver119@gmail.com> * * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy * update API to match CUDA 8 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * * Update version of JavaCPP Presets for CPython * C++ updated for cuDNN 8.0 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * 128-bit alignment for workspaces Signed-off-by: raver119@gmail.com <raver119@gmail.com> * change seed in 1 test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fix dependecy duplication in python4j-parent pom * Fix group id for in python4j-numpy * few tests tweaked Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Remove macosx-x86_64-gpu from nd4j-tests-tensorflow * few minor tweaks for IndexReduce Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one test removed Signed-off-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> * RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> * * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL Signed-off-by: Samuel Audet <samuel.audet@gmail.com> * Fix L2NormalizeVertex and eclipse#9054 (#513) * update * Fix L2NormalizeVertex Fix eclipse#9054 * RL4J: Add async training and advantage actor-critic (#507) * Added async training & Advantage Actor Critic Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> * Fix compiler error Signed-off-by: Samuel Audet <samuel.audet@gmail.com> * Renamed ActorCriticPolicy back to ACPolicy Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> Co-authored-by: Samuel Audet <samuel.audet@gmail.com> * Python GIL overhaul (#517) * Development updates (#9053) * RL4J: Add generic update rule (#502) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> * Shyrma reduce (#481) * - start working on improving of cpu legacy code for reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on improving legacy loops Signed-off-by: Yurii <iuriish@yahoo.com> * - still working on improving reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on improving reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - testing speed run of new reduce op Signed-off-by: Yurii <iuriish@yahoo.com> * - working on improvement of default loop for reduce op Signed-off-by: Yurii <iuriish@yahoo.com> * - update signatures of stuff which calls reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - make corrections in cuda reduce kernels Signed-off-by: Yurii <iuriish@yahoo.com> * - change loop for default case in broadcast legacy ops Signed-off-by: Yurii <iuriish@yahoo.com> * - comment some shape stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - comment unnecessary prints in RNGtests Signed-off-by: Yurii <iuriish@yahoo.com> * - finish to resolve conflicts after master has been merged Signed-off-by: Yurii <iuriish@yahoo.com> * - get rid of some compilation mistakes of cuda stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - minor changes Signed-off-by: Yurii <iuriish@yahoo.com> * - further search for bug causing crash on java test Signed-off-by: Yurii <iuriish@yahoo.com> * - add scalar case in reduce_ ... exec stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - minor corrections in NAtiveOps.cu Signed-off-by: Yurii <iuriish@yahoo.com> * - add switch to scalar case execReduceXD functions Signed-off-by: Yurii <iuriish@yahoo.com> * - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce Signed-off-by: Yurii <iuriish@yahoo.com> * - correct cuda mirrorPad Signed-off-by: Yurii <iuriish@yahoo.com> * - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce Signed-off-by: Yurii <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> * Add support for CUDA 11.0 (#492) * Add support for CUDA 11.0 * libnd4j tweaks for CUDA 11 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * bindings update, again? Signed-off-by: raver119@gmail.com <raver119@gmail.com> * * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy * update API to match CUDA 8 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * * Update version of JavaCPP Presets for CPython * C++ updated for cuDNN 8.0 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * 128-bit alignment for workspaces Signed-off-by: raver119@gmail.com <raver119@gmail.com> * change seed in 1 test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fix dependecy duplication in python4j-parent pom * Fix group id for in python4j-numpy * few tests tweaked Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Remove macosx-x86_64-gpu from nd4j-tests-tensorflow * few minor tweaks for IndexReduce Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one test removed Signed-off-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> * RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com> Co-authored-by: Yurii Shyrma <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> * Removed dead code (#9057) Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * performance improvement (#9055) * performance improvement Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * revert some changes Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Development updates (#9064) * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL Signed-off-by: Samuel Audet <samuel.audet@gmail.com> * Cherry pick rl4j changes from most recent KonduitAI/deeplearning4j PR * Update cherry pick again from last master revision. Co-authored-by: Samuel Audet <samuel.audet@gmail.com> Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com> Co-authored-by: Yurii Shyrma <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com> * Ag pythongiloverhaul (#518) * Development updates (#9053) * RL4J: Add generic update rule (#502) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> * Shyrma reduce (#481) * - start working on improving of cpu legacy code for reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on improving legacy loops Signed-off-by: Yurii <iuriish@yahoo.com> * - still working on improving reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on improving reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - testing speed run of new reduce op Signed-off-by: Yurii <iuriish@yahoo.com> * - working on improvement of default loop for reduce op Signed-off-by: Yurii <iuriish@yahoo.com> * - update signatures of stuff which calls reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - make corrections in cuda reduce kernels Signed-off-by: Yurii <iuriish@yahoo.com> * - change loop for default case in broadcast legacy ops Signed-off-by: Yurii <iuriish@yahoo.com> * - comment some shape stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - comment unnecessary prints in RNGtests Signed-off-by: Yurii <iuriish@yahoo.com> * - finish to resolve conflicts after master has been merged Signed-off-by: Yurii <iuriish@yahoo.com> * - get rid of some compilation mistakes of cuda stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - minor changes Signed-off-by: Yurii <iuriish@yahoo.com> * - further search for bug causing crash on java test Signed-off-by: Yurii <iuriish@yahoo.com> * - add scalar case in reduce_ ... exec stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - minor corrections in NAtiveOps.cu Signed-off-by: Yurii <iuriish@yahoo.com> * - add switch to scalar case execReduceXD functions Signed-off-by: Yurii <iuriish@yahoo.com> * - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce Signed-off-by: Yurii <iuriish@yahoo.com> * - correct cuda mirrorPad Signed-off-by: Yurii <iuriish@yahoo.com> * - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce Signed-off-by: Yurii <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> * Add support for CUDA 11.0 (#492) * Add support for CUDA 11.0 * libnd4j tweaks for CUDA 11 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * bindings update, again? Signed-off-by: raver119@gmail.com <raver119@gmail.com> * * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy * update API to match CUDA 8 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * * Update version of JavaCPP Presets for CPython * C++ updated for cuDNN 8.0 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * 128-bit alignment for workspaces Signed-off-by: raver119@gmail.com <raver119@gmail.com> * change seed in 1 test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fix dependecy duplication in python4j-parent pom * Fix group id for in python4j-numpy * few tests tweaked Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Remove macosx-x86_64-gpu from nd4j-tests-tensorflow * few minor tweaks for IndexReduce Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one test removed Signed-off-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> * RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com> Co-authored-by: Yurii Shyrma <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> * Removed dead code (#9057) Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * performance improvement (#9055) * performance improvement Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * revert some changes Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Development updates (#9064) * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL Signed-off-by: Samuel Audet <samuel.audet@gmail.com> * Cherry pick rl4j changes from most recent KonduitAI/deeplearning4j PR * Update cherry pick again from last master revision. * Re update python4j Co-authored-by: Samuel Audet <samuel.audet@gmail.com> Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com> Co-authored-by: Yurii Shyrma <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com> * Bump formatter-maven-plugin from 2.0.0 to 2.12.1 (#505) Bumps [formatter-maven-plugin](https://github.com/revelc/formatter-maven-plugin) from 2.0.0 to 2.12.1. - [Release notes](https://github.com/revelc/formatter-maven-plugin/releases) - [Changelog](https://github.com/revelc/formatter-maven-plugin/blob/formatter-maven-plugin-2.12.1/CHANGELOG.md) - [Commits](https://github.com/revelc/formatter-maven-plugin/compare/formatter-maven-plugin-2.0.0...formatter-maven-plugin-2.12.1) Signed-off-by: dependabot-preview[bot] <support@dependabot.com> Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com> * Ag fix9060 (#519) * Development updates (#9053) * RL4J: Add generic update rule (#502) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> * Shyrma reduce (#481) * - start working on improving of cpu legacy code for reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on improving legacy loops Signed-off-by: Yurii <iuriish@yahoo.com> * - still working on improving reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on improving reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - testing speed run of new reduce op Signed-off-by: Yurii <iuriish@yahoo.com> * - working on improvement of default loop for reduce op Signed-off-by: Yurii <iuriish@yahoo.com> * - update signatures of stuff which calls reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - make corrections in cuda reduce kernels Signed-off-by: Yurii <iuriish@yahoo.com> * - change loop for default case in broadcast legacy ops Signed-off-by: Yurii <iuriish@yahoo.com> * - comment some shape stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - comment unnecessary prints in RNGtests Signed-off-by: Yurii <iuriish@yahoo.com> * - finish to resolve conflicts after master has been merged Signed-off-by: Yurii <iuriish@yahoo.com> * - get rid of some compilation mistakes of cuda stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - minor changes Signed-off-by: Yurii <iuriish@yahoo.com> * - further search for bug causing crash on java test Signed-off-by: Yurii <iuriish@yahoo.com> * - add scalar case in reduce_ ... exec stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - minor corrections in NAtiveOps.cu Signed-off-by: Yurii <iuriish@yahoo.com> * - add switch to scalar case execReduceXD functions Signed-off-by: Yurii <iuriish@yahoo.com> * - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce Signed-off-by: Yurii <iuriish@yahoo.com> * - correct cuda mirrorPad Signed-off-by: Yurii <iuriish@yahoo.com> * - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce Signed-off-by: Yurii <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> * Add support for CUDA 11.0 (#492) * Add support for CUDA 11.0 * libnd4j tweaks for CUDA 11 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * bindings update, again? Signed-off-by: raver119@gmail.com <raver119@gmail.com> * * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy * update API to match CUDA 8 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * * Update version of JavaCPP Presets for CPython * C++ updated for cuDNN 8.0 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * 128-bit alignment for workspaces Signed-off-by: raver119@gmail.com <raver119@gmail.com> * change seed in 1 test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fix dependecy duplication in python4j-parent pom * Fix group id for in python4j-numpy * few tests tweaked Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Remove macosx-x86_64-gpu from nd4j-tests-tensorflow * few minor tweaks for IndexReduce Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one test removed Signed-off-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> * RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com> Co-authored-by: Yurii Shyrma <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> * Removed dead code (#9057) Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * performance improvement (#9055) * performance improvement Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * revert some changes Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Development updates (#9064) * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL Signed-off-by: Samuel Audet <samuel.audet@gmail.com> * Added support for the archunit (#9062) * Added support for the archunit Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Updated pom files Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Datavec code cleaup (#9071) * removed unnecessary semicolons Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Use standard charset object Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Removed unused imports Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * WIP: Fix Conv1d causal case * Add inital tests * Update Conv1d tests to be a bit more robust * Remove redundant test * Reset from master * Remove cuda definition (left over) * Update rl4j again * Update pom.xml Co-authored-by: Samuel Audet <samuel.audet@gmail.com> Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com> Co-authored-by: Yurii Shyrma <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com> * Fixes 9061 (#521) * Get rid of edge case in validation * Added support for the archunit (#9062) * Added support for the archunit Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Updated pom files Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Using embedded copying of an array instead of manual (#9073) Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Datavec bulk operation (#9075) * Bulk operation can be used instead of iteration inspection Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Redundant 'Collection.addAll()' call inspection Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Removed infinitely loop (#9076) Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com> * Revert "Merge eclipse changes" (#526) * Revert rl4j to 72f5c18c830f62df2c04fbf8dc7b1353cc2d3182 (#527) * RL4J: Add generic update rule (#502) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> * Shyrma reduce (#481) * - start working on improving of cpu legacy code for reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on improving legacy loops Signed-off-by: Yurii <iuriish@yahoo.com> * - still working on improving reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on improving reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - testing speed run of new reduce op Signed-off-by: Yurii <iuriish@yahoo.com> * - working on improvement of default loop for reduce op Signed-off-by: Yurii <iuriish@yahoo.com> * - update signatures of stuff which calls reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - make corrections in cuda reduce kernels Signed-off-by: Yurii <iuriish@yahoo.com> * - change loop for default case in broadcast legacy ops Signed-off-by: Yurii <iuriish@yahoo.com> * - comment some shape stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - comment unnecessary prints in RNGtests Signed-off-by: Yurii <iuriish@yahoo.com> * - finish to resolve conflicts after master has been merged Signed-off-by: Yurii <iuriish@yahoo.com> * - get rid of some compilation mistakes of cuda stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - minor changes Signed-off-by: Yurii <iuriish@yahoo.com> * - further search for bug causing crash on java test Signed-off-by: Yurii <iuriish@yahoo.com> * - add scalar case in reduce_ ... exec stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - minor corrections in NAtiveOps.cu Signed-off-by: Yurii <iuriish@yahoo.com> * - add switch to scalar case execReduceXD functions Signed-off-by: Yurii <iuriish@yahoo.com> * - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce Signed-off-by: Yurii <iuriish@yahoo.com> * - correct cuda mirrorPad Signed-off-by: Yurii <iuriish@yahoo.com> * - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce Signed-off-by: Yurii <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> * Add support for CUDA 11.0 (#492) * Add support for CUDA 11.0 * libnd4j tweaks for CUDA 11 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * bindings update, again? Signed-off-by: raver119@gmail.com <raver119@gmail.com> * * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy * update API to match CUDA 8 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * * Update version of JavaCPP Presets for CPython * C++ updated for cuDNN 8.0 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * 128-bit alignment for workspaces Signed-off-by: raver119@gmail.com <raver119@gmail.com> * change seed in 1 test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fix dependecy duplication in python4j-parent pom * Fix group id for in python4j-numpy * few tests tweaked Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Remove macosx-x86_64-gpu from nd4j-tests-tensorflow * few minor tweaks for IndexReduce Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one test removed Signed-off-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> * RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> * * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL Signed-off-by: Samuel Audet <samuel.audet@gmail.com> * Fix L2NormalizeVertex and eclipse#9054 (#513) * update * Fix L2NormalizeVertex Fix eclipse#9054 * RL4J: Add async training and advantage actor-critic (#507) * Added async training & Advantage Actor Critic Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> * Fix compiler error Signed-off-by: Samuel Audet <samuel.audet@gmail.com> * Renamed ActorCriticPolicy back to ACPolicy Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> Co-authored-by: Samuel Audet <samuel.audet@gmail.com> * Python GIL overhaul (#517) * Development updates (#9053) * RL4J: Add generic update rule (#502) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> * Shyrma reduce (#481) * - start working on improving of cpu legacy code for reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on improving legacy loops Signed-off-by: Yurii <iuriish@yahoo.com> * - still working on improving reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on improving reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - testing speed run of new reduce op Signed-off-by: Yurii <iuriish@yahoo.com> * - working on improvement of default loop for reduce op Signed-off-by: Yurii <iuriish@yahoo.com> * - update signatures of stuff which calls reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - make corrections in cuda reduce kernels Signed-off-by: Yurii <iuriish@yahoo.com> * - change loop for default case in broadcast legacy ops Signed-off-by: Yurii <iuriish@yahoo.com> * - comment some shape stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - comment unnecessary prints in RNGtests Signed-off-by: Yurii <iuriish@yahoo.com> * - finish to resolve conflicts after master has been merged Signed-off-by: Yurii <iuriish@yahoo.com> * - get rid of some compilation mistakes of cuda stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - minor changes Signed-off-by: Yurii <iuriish@yahoo.com> * - further search for bug causing crash on java test Signed-off-by: Yurii <iuriish@yahoo.com> * - add scalar case in reduce_ ... exec stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - minor corrections in NAtiveOps.cu Signed-off-by: Yurii <iuriish@yahoo.com> * - add switch to scalar case execReduceXD functions Signed-off-by: Yurii <iuriish@yahoo.com> * - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce Signed-off-by: Yurii <iuriish@yahoo.com> * - correct cuda mirrorPad Signed-off-by: Yurii <iuriish@yahoo.com> * - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce Signed-off-by: Yurii <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> * Add support for CUDA 11.0 (#492) * Add support for CUDA 11.0 * libnd4j tweaks for CUDA 11 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * bindings update, again? Signed-off-by: raver119@gmail.com <raver119@gmail.com> * * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy * update API to match CUDA 8 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * * Update version of JavaCPP Presets for CPython * C++ updated for cuDNN 8.0 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * 128-bit alignment for workspaces Signed-off-by: raver119@gmail.com <raver119@gmail.com> * change seed in 1 test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fix dependecy duplication in python4j-parent pom * Fix group id for in python4j-numpy * few tests tweaked Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Remove macosx-x86_64-gpu from nd4j-tests-tensorflow * few minor tweaks for IndexReduce Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one test removed Signed-off-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> * RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com> Co-authored-by: Yurii Shyrma <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> * Removed dead code (#9057) Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * performance improvement (#9055) * performance improvement Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * revert some changes Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Development updates (#9064) * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL Signed-off-by: Samuel Audet <samuel.audet@gmail.com> * Cherry pick rl4j changes from most recent KonduitAI/deeplearning4j PR * Update cherry pick again from last master revision. Co-authored-by: Samuel Audet <samuel.audet@gmail.com> Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com> Co-authored-by: Yurii Shyrma <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com> * Ag pythongiloverhaul (#518) * Development updates (#9053) * RL4J: Add generic update rule (#502) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> * Shyrma reduce (#481) * - start working on improving of cpu legacy code for reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on improving legacy loops Signed-off-by: Yurii <iuriish@yahoo.com> * - still working on improving reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on improving reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - testing speed run of new reduce op Signed-off-by: Yurii <iuriish@yahoo.com> * - working on improvement of default loop for reduce op Signed-off-by: Yurii <iuriish@yahoo.com> * - update signatures of stuff which calls reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - make corrections in cuda reduce kernels Signed-off-by: Yurii <iuriish@yahoo.com> * - change loop for default case in broadcast legacy ops Signed-off-by: Yurii <iuriish@yahoo.com> * - comment some shape stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - comment unnecessary prints in RNGtests Signed-off-by: Yurii <iuriish@yahoo.com> * - finish to resolve conflicts after master has been merged Signed-off-by: Yurii <iuriish@yahoo.com> * - get rid of some compilation mistakes of cuda stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - minor changes Signed-off-by: Yurii <iuriish@yahoo.com> * - further search for bug causing crash on java test Signed-off-by: Yurii <iuriish@yahoo.com> * - add scalar case in reduce_ ... exec stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - minor corrections in NAtiveOps.cu Signed-off-by: Yurii <iuriish@yahoo.com> * - add switch to scalar case execReduceXD functions Signed-off-by: Yurii <iuriish@yahoo.com> * - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce Signed-off-by: Yurii <iuriish@yahoo.com> * - correct cuda mirrorPad Signed-off-by: Yurii <iuriish@yahoo.com> * - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce Signed-off-by: Yurii <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> * Add support for CUDA 11.0 (#492) * Add support for CUDA 11.0 * libnd4j tweaks for CUDA 11 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * bindings update, again? Signed-off-by: raver119@gmail.com <raver119@gmail.com> * * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy * update API to match CUDA 8 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * * Update version of JavaCPP Presets for CPython * C++ updated for cuDNN 8.0 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * 128-bit alignment for workspaces Signed-off-by: raver119@gmail.com <raver119@gmail.com> * change seed in 1 test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fix dependecy duplication in python4j-parent pom * Fix group id for in python4j-numpy * few tests tweaked Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Remove macosx-x86_64-gpu from nd4j-tests-tensorflow * few minor tweaks for IndexReduce Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one test removed Signed-off-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> * RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com> Co-authored-by: Yurii Shyrma <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> * Removed dead code (#9057) Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * performance improvement (#9055) * performance improvement Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * revert some changes Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Development updates (#9064) * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL Signed-off-by: Samuel Audet <samuel.audet@gmail.com> * Cherry pick rl4j changes from most recent KonduitAI/deeplearning4j PR * Update cherry pick again from last master revision. * Re update python4j Co-authored-by: Samuel Audet <samuel.audet@gmail.com> Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com> Co-authored-by: Yurii Shyrma <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com> * Bump formatter-maven-plugin from 2.0.0 to 2.12.1 (#505) Bumps [formatter-maven-plugin](https://github.com/revelc/formatter-maven-plugin) from 2.0.0 to 2.12.1. - [Release notes](https://github.com/revelc/formatter-maven-plugin/releases) - [Changelog](https://github.com/revelc/formatter-maven-plugin/blob/formatter-maven-plugin-2.12.1/CHANGELOG.md) - [Commits](https://github.com/revelc/formatter-maven-plugin/compare/formatter-maven-plugin-2.0.0...formatter-maven-plugin-2.12.1) Signed-off-by: dependabot-preview[bot] <support@dependabot.com> Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com> * Ag fix9060 (#519) * Development updates (#9053) * RL4J: Add generic update rule (#502) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> * Shyrma reduce (#481) * - start working on improving of cpu legacy code for reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on improving legacy loops Signed-off-by: Yurii <iuriish@yahoo.com> * - still working on improving reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on improving reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - testing speed run of new reduce op Signed-off-by: Yurii <iuriish@yahoo.com> * - working on improvement of default loop for reduce op Signed-off-by: Yurii <iuriish@yahoo.com> * - update signatures of stuff which calls reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - make corrections in cuda reduce kernels Signed-off-by: Yurii <iuriish@yahoo.com> * - change loop for default case in broadcast legacy ops Signed-off-by: Yurii <iuriish@yahoo.com> * - comment some shape stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - comment unnecessary prints in RNGtests Signed-off-by: Yurii <iuriish@yahoo.com> * - finish to resolve conflicts after master has been merged Signed-off-by: Yurii <iuriish@yahoo.com> * - get rid of some compilation mistakes of cuda stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - minor changes Signed-off-by: Yurii <iuriish@yahoo.com> * - further search for bug causing crash on java test Signed-off-by: Yurii <iuriish@yahoo.com> * - add scalar case in reduce_ ... exec stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - minor corrections in NAtiveOps.cu Signed-off-by: Yurii <iuriish@yahoo.com> * - add switch to scalar case execReduceXD functions Signed-off-by: Yurii <iuriish@yahoo.com> * - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce Signed-off-by: Yurii <iuriish@yahoo.com> * - correct cuda mirrorPad Signed-off-by: Yurii <iuriish@yahoo.com> * - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce Signed-off-by: Yurii <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> * Add support for CUDA 11.0 (#492) * Add support for CUDA 11.0 * libnd4j tweaks for CUDA 11 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * bindings update, again? Signed-off-by: raver119@gmail.com <raver119@gmail.com> * * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy * update API to match CUDA 8 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * * Update version of JavaCPP Presets for CPython * C++ updated for cuDNN 8.0 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * 128-bit alignment for workspaces Signed-off-by: raver119@gmail.com <raver119@gmail.com> * change seed in 1 test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fix dependecy duplication in python4j-parent pom * Fix group id for in python4j-numpy * few tests tweaked Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Remove macosx-x86_64-gpu from nd4j-tests-tensorflow * few minor tweaks for IndexReduce Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one test removed Signed-off-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> * RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com> Co-authored-by: Yurii Shyrma <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> * Removed dead code (#9057) Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * performance improvement (#9055) * performance improvement Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * revert some changes Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Development updates (#9064) * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL Signed-off-by: Samuel Audet <samuel.audet@gmail.com> * Added support for the archunit (#9062) * Added support for the archunit Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Updated pom files Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Datavec code cleaup (#9071) * removed unnecessary semicolons Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Use standard charset object Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Removed unused imports Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * WIP: Fix Conv1d causal case * Add inital tests * Update Conv1d tests to be a bit more robust * Remove redundant test * Reset from master * Remove cuda definition (left over) * Update rl4j again * Update pom.xml Co-authored-by: Samuel Audet <samuel.audet@gmail.com> Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com> Co-authored-by: Yurii Shyrma <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com> * Fixes 9061 (#521) * Get rid of edge case in validation * Added support for the archunit (#9062) * Added support for the archunit Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Updated pom files Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Using embedded copying of an array instead of manual (#9073) Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Datavec bulk operation (#9075) * Bulk operation can be used instead of iteration inspection Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Redundant 'Collection.addAll()' call inspection Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Removed infinitely loop (#9076) Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com> * RL4J: Add async training and advantage actor-critic (#507) * Added async training & Advantage Actor Critic Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> * Fix compiler error Signed-off-by: Samuel Audet <samuel.audet@gmail.com> * Renamed ActorCriticPolicy back to ACPolicy Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> Co-authored-by: Samuel Audet <samuel.audet@gmail.com> (cherry picked from commit 72f5c18c830f62df2c04fbf8dc7b1353cc2d3182) * RL4J: Add async training and advantage actor-critic (#507) * Added async training & Advantage Actor Critic Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> * Fix compiler error Signed-off-by: Samuel Audet <samuel.audet@gmail.com> * Renamed ActorCriticPolicy back to ACPolicy Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> Co-authored-by: Samuel Audet <samuel.audet@gmail.com> (cherry picked from commit 72f5c18c830f62df2c04fbf8dc7b1353cc2d3182) * Revert rl4j to 72f5c18c830f62df2c04fbf8dc7b1353cc2d3182 * Delete jnind4jaurora.cpp Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com> Co-authored-by: Yurii Shyrma <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> Co-authored-by: Samuel Audet <samuel.audet@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com> Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com> * RL4J: Add partial support for RNN (#514) * Added partial recurrent support Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> * Made sure the RNN always see the observation in EpsGreedy Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> * Converted all line endings of rl4j-core to LF (#530) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> * NDJ4: Bundle configuration files required by AOT compilation with GraalVM (#529) * NDJ4: Bundle configuration files required by AOT compilation with GraalVM * Update dependencies to just released JavaCPP and JavaCV 1.5.4 * Ag fixtests 831 (#523) * Update UnderSamplingPreProcessorTest.java * Development updates (#9053) * RL4J: Add generic update rule (#502) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> * Shyrma reduce (#481) * - start working on improving of cpu legacy code for reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on improving legacy loops Signed-off-by: Yurii <iuriish@yahoo.com> * - still working on improving reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on improving reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - testing speed run of new reduce op Signed-off-by: Yurii <iuriish@yahoo.com> * - working on improvement of default loop for reduce op Signed-off-by: Yurii <iuriish@yahoo.com> * - update signatures of stuff which calls reduce ops Signed-off-by: Yurii <iuriish@yahoo.com> * - make corrections in cuda reduce kernels Signed-off-by: Yurii <iuriish@yahoo.com> * - change loop for default case in broadcast legacy ops Signed-off-by: Yurii <iuriish@yahoo.com> * - comment some shape stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - comment unnecessary prints in RNGtests Signed-off-by: Yurii <iuriish@yahoo.com> * - finish to resolve conflicts after master has been merged Signed-off-by: Yurii <iuriish@yahoo.com> * - get rid of some compilation mistakes of cuda stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - minor changes Signed-off-by: Yurii <iuriish@yahoo.com> * - further search for bug causing crash on java test Signed-off-by: Yurii <iuriish@yahoo.com> * - add scalar case in reduce_ ... exec stuff Signed-off-by: Yurii <iuriish@yahoo.com> * - minor corrections in NAtiveOps.cu Signed-off-by: Yurii <iuriish@yahoo.com> * - add switch to scalar case execReduceXD functions Signed-off-by: Yurii <iuriish@yahoo.com> * - add support for vectors old shape in ConstantShapeHelper::createShapeInfoWithNoUnitiesForReduce Signed-off-by: Yurii <iuriish@yahoo.com> * - correct cuda mirrorPad Signed-off-by: Yurii <iuriish@yahoo.com> * - add support for vectors old shape in cuda createShapeInfoWithNoUnitiesForReduce Signed-off-by: Yurii <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> * Add support for CUDA 11.0 (#492) * Add support for CUDA 11.0 * libnd4j tweaks for CUDA 11 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * bindings update, again? Signed-off-by: raver119@gmail.com <raver119@gmail.com> * * Update versions of JavaCPP Presets for FFmpeg, OpenBLAS, and NumPy * update API to match CUDA 8 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * * Update version of JavaCPP Presets for CPython * C++ updated for cuDNN 8.0 Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * 128-bit alignment for workspaces Signed-off-by: raver119@gmail.com <raver119@gmail.com> * change seed in 1 test Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fix dependecy duplication in python4j-parent pom * Fix group id for in python4j-numpy * few tests tweaked Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Remove macosx-x86_64-gpu from nd4j-tests-tensorflow * few minor tweaks for IndexReduce Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one test removed Signed-off-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> * RL4J: Add SyncTrainer and AgentLearnerBuilder for a few algorithms (#504) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com> Co-authored-by: Yurii Shyrma <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> * Development updates (#9064) * Update versions of JavaCPP Presets for OpenCV, FFmpeg, and MKL Signed-off-by: Samuel Audet <samuel.audet@gmail.com> * Add proper annotation * Fix classcast exception for recurrent model import case * Update keras import to allow for proper handling of changing NCHW -> NHWC mid later * Add output to test to ensure proper activation * Fixes computation graphs to allow dimension ordering to change mid graph * Add NHWC support for keras import. * Update tests to pass /ignore out of date ones * Add multi RNNDataformat support * Update tests to make more pass. Updates some tests to be correct, double checked existing models and updated reasons they may or may not fail. * Add back old default values to ensure legacy serialization works. Replace null value default with sentinel value for default value overridden. * Update layers to preserve changed values * Exclude default value over ridden from comparison * Fix conv1d import (no permute weights anymore) * Update KerasConvolution1D.java Co-authored-by: Samuel Audet <samuel.audet@gmail.com> Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com> Co-authored-by: Yurii Shyrma <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> * GPU compute capability (#532) * - GPU cpu capability flags - CUDA MAJOR VERSION provided by cmake Signed-off-by: AbdelRauf <rauf@konduit.ai> * Readme Signed-off-by: AbdelRauf <rauf@konduit.ai> * Readme Signed-off-by: AbdelRauf <rauf@konduit.ai> * RL4J: Add new network implementation to help support recurrent networks (#531) Signed-off-by: Alexandre Boulanger <aboulang2002@yahoo.com> Co-authored-by: Alexandre Boulanger <44292157+aboulang2002@users.noreply.github.com> Co-authored-by: Yurii Shyrma <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com> Co-authored-by: Samuel Audet <samuel.audet@gmail.com> Co-authored-by: Serhii Shepel <9946053+sshepel@users.noreply.github.com> Co-authored-by: dariuszzbyrad <dariusz.zbyrad@gmail.com> Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com> Co-authored-by: Abdelrauf <qwr@live.ru>
2020-09-23 12:11:29 +02:00
#if CUDA_VERSION_MAJOR == 8
2019-06-06 14:21:15 +02:00
typedef union {
struct {
half H;
half L;
} B;
int W;
} PAIR;
#else
struct HALFS{
half H;
half L;
__host__ __device__
HALFS() {};
__host__ __device__
~HALFS() {};
};
union PAIR {
HALFS B;
int W;
__host__ __device__
PAIR() {};
__host__ __device__
~PAIR(){}
};
#endif // cuda_9
#else
#define math_def
#include <types/float16.h>
2019-06-06 14:21:15 +02:00
#endif
namespace sd {
2019-06-06 14:21:15 +02:00
namespace math {
template <typename T>
math_def FORCEINLINE T p_exp(T value);
template <typename T>
math_def FORCEINLINE T p_log(T value);
template <typename T>
math_def FORCEINLINE T p_floor(T value);
template <typename T>
math_def FORCEINLINE T p_ceil(T value);
template <typename T>
math_def FORCEINLINE T p_round(T value);
template <typename T>
math_def FORCEINLINE T p_cos(T value);
template <typename T>
math_def FORCEINLINE T p_cosh(T value);
template <typename T>
math_def FORCEINLINE T p_acos(T value);
template <typename T>
math_def FORCEINLINE T p_acosh(T value);
template <typename T>
math_def FORCEINLINE T p_sin(T value);
template <typename T>
math_def FORCEINLINE T p_sinh(T value);
template <typename T>
math_def FORCEINLINE T p_asin(T value);
template <typename T>
math_def FORCEINLINE T p_sqrt(T value);
template <typename T>
math_def FORCEINLINE T p_tanh(T value);
template <typename T>
math_def FORCEINLINE T p_erf(T value);
template <typename T>
math_def FORCEINLINE T p_erfc(T value);
template <typename T>
math_def FORCEINLINE T p_atan(T value);
template <typename T>
math_def FORCEINLINE T p_tan(T value);
template <typename T>
math_def FORCEINLINE T p_atanh(T value);
template <typename T>
math_def FORCEINLINE T p_rint(T value);
template <typename T>
math_def FORCEINLINE T p_rotl(T value, T shift);
template <typename T>
math_def FORCEINLINE T p_rotr(T value, T shift);
2019-06-06 14:21:15 +02:00
template <typename T>
math_def FORCEINLINE T p_remainder(T val1, T val2);
template <typename T>
math_def FORCEINLINE T p_fmod(T val1, T val2);
template <typename T>
math_def FORCEINLINE T p_pow(T value, T power);
template <typename T>
math_def FORCEINLINE T p_atan2(T val1, T val2);
//////
template <>
math_def FORCEINLINE float p_exp(float value) {
return expf(value);
}
template <>
math_def FORCEINLINE float16 p_exp(float16 val) {
#ifdef NATIVE_HALFS
return hexp(val.data);
#else
return static_cast<float16>(expf((float) val));
#endif
}
template <>
math_def FORCEINLINE bfloat16 p_exp(bfloat16 val) {
return static_cast<bfloat16>(expf((float) val));
}
template <>
math_def FORCEINLINE double p_exp(double value) {
return exp(value);
}
template <typename T>
math_def FORCEINLINE T p_exp(T value) {
return static_cast<T>(expf(static_cast<float>(value)));
}
/////////
template <>
math_def FORCEINLINE float16 p_pow(float16 value, float16 power) {
return static_cast<float16>(powf(static_cast<float>(value), static_cast<float>(power)));
}
template <>
math_def FORCEINLINE bfloat16 p_pow(bfloat16 value, bfloat16 power) {
return static_cast<bfloat16>(powf(static_cast<float>(value), static_cast<float>(power)));
}
template <>
math_def FORCEINLINE float p_pow(float value, float power) {
return powf(value, power);
}
template <>
math_def FORCEINLINE double p_pow(double value, double power) {
return pow(value, power);
}
template <typename T>
math_def FORCEINLINE T p_pow(T value, T power) {
return static_cast<T>(powf(static_cast<float>(value), static_cast<float>(power)));
}
/////////
template <>
math_def FORCEINLINE float16 p_fmod(float16 value, float16 power) {
return static_cast<float16>(fmodf(static_cast<float>(value), static_cast<float>(power)));
}
template <>
math_def FORCEINLINE bfloat16 p_fmod(bfloat16 value, bfloat16 power) {
return static_cast<bfloat16>(fmodf(static_cast<float>(value), static_cast<float>(power)));
}
template <>
math_def FORCEINLINE float p_fmod(float value, float power) {
return fmodf(value, power);
}
template <>
math_def FORCEINLINE double p_fmod(double value, double power) {
return fmod(value, power);
}
template <typename T>
math_def FORCEINLINE T p_fmod(T value, T power) {
return static_cast<T>(fmodf(static_cast<float>(value), static_cast<float>(power)));
}
/////////
template <>
math_def FORCEINLINE float16 p_atan2(float16 value, float16 power) {
return static_cast<float16>(atan2f(static_cast<float>(value), static_cast<float>(power)));
}
template <>
math_def FORCEINLINE float p_atan2(float value, float power) {
return atan2f(value, power);
}
template <>
math_def FORCEINLINE double p_atan2(double value, double power) {
return atan2(value, power);
}
template <typename T>
math_def FORCEINLINE T p_atan2(T value, T power) {
return static_cast<T>(atan2f(static_cast<float>(value), static_cast<float>(power)));
}
/////////
template <>
math_def FORCEINLINE float16 p_remainder(float16 value, float16 power) {
return static_cast<float16>(remainderf(static_cast<float>(value), static_cast<float>(power)));
}
template <>
math_def FORCEINLINE float p_remainder(float value, float power) {
return remainderf(value, power);
}
template <>
math_def FORCEINLINE double p_remainder(double value, double power) {
return remainder(value, power);
}
template <typename T>
math_def FORCEINLINE T p_remainder(T value, T power) {
return static_cast<T>(remainderf(static_cast<float>(value), static_cast<float>(power)));
}
/////////
template <>
math_def FORCEINLINE float p_log(float value) {
return logf(value);
}
template <>
math_def FORCEINLINE float16 p_log(float16 val) {
#ifdef NATIVE_HALFS
return hlog(val.data);
#else
return static_cast<float16>(logf((float) val));
#endif
}
template <>
math_def FORCEINLINE double p_log(double value) {
return log(value);
}
template <typename T>
math_def FORCEINLINE T p_log(T value) {
return static_cast<T>(logf(static_cast<float>(value)));
}
/////////
template <>
math_def FORCEINLINE float p_floor(float value) {
return floorf(value);
}
template <>
math_def FORCEINLINE float16 p_floor(float16 val) {
#ifdef NATIVE_HALFS
return hfloor(val.data);
#else
return static_cast<float16>(floorf((float) val));
#endif
}
template <>
math_def FORCEINLINE bfloat16 p_floor(bfloat16 value) {
return static_cast<bfloat16>(floorf((float)value));
}
2019-06-06 14:21:15 +02:00
template <>
math_def FORCEINLINE double p_floor(double value) {
return floor(value);
}
template <typename T>
math_def FORCEINLINE T p_floor(T value) {
return value;
}
/////////
template <>
math_def FORCEINLINE float p_ceil(float value) {
return ceilf(value);
}
template <>
math_def FORCEINLINE float16 p_ceil(float16 val) {
#ifdef NATIVE_HALFS
return hceil(val.data);
#else
return static_cast<float16>(ceilf((float) val));
#endif
}
template <>
math_def FORCEINLINE bfloat16 p_ceil(bfloat16 value) {
return static_cast<bfloat16>(ceilf((float)value));
}
2019-06-06 14:21:15 +02:00
template <>
math_def FORCEINLINE double p_ceil(double value) {
return ceil(value);
}
template <typename T>
math_def FORCEINLINE T p_ceil(T value) {
return value;
}
/////////
template <>
math_def FORCEINLINE float p_round(float value) {
return roundf(value);
}
template <>
math_def FORCEINLINE float16 p_round(float16 val) {
return static_cast<float16>(roundf((float) val));
}
template <>
math_def FORCEINLINE bfloat16 p_round(bfloat16 value) {
return static_cast<bfloat16>(roundf((float)value));
}
2019-06-06 14:21:15 +02:00
template <>
math_def FORCEINLINE double p_round(double value) {
return round(value);
}
template <typename T>
math_def FORCEINLINE T p_round(T value) {
return value;
}
/////////
template <>
math_def FORCEINLINE float p_rint(float value) {
return rintf(value);
}
template <>
math_def FORCEINLINE float16 p_rint(float16 val) {
#ifdef NATIVE_HALFS
return hrint(val.data);
#else
return static_cast<float16>(rintf((float) val));
#endif
}
template <>
math_def FORCEINLINE bfloat16 p_rint(bfloat16 val) {
return static_cast<bfloat16>(rintf((float) val));
}
template <>
math_def FORCEINLINE double p_rint(double value) {
return rint(value);
}
template <typename T>
math_def FORCEINLINE T p_rint(T value) {
return value;
}
/////////
template <>
math_def FORCEINLINE float p_cos(float value) {
return cosf(value);
}
template <>
math_def FORCEINLINE float16 p_cos(float16 val) {
#ifdef NATIVE_HALFS
return hcos(val.data);
#else
return static_cast<float16>(cosf((float) val));
#endif
}
template <>
math_def FORCEINLINE bfloat16 p_cos(bfloat16 val) {
return static_cast<bfloat16>(cosf((float) val));
}
template <>
math_def FORCEINLINE double p_cos(double value) {
return cos(value);
}
/////////
template <>
math_def FORCEINLINE float p_sin(float value) {
return sinf(value);
}
template <>
math_def FORCEINLINE float16 p_sin(float16 val) {
#ifdef NATIVE_HALFS
return hsin(val.data);
#else
return static_cast<float16>(sinf((float) val));
#endif
}
template <>
math_def FORCEINLINE bfloat16 p_sin(bfloat16 val) {
return static_cast<bfloat16>(sinf((float) val));
}
template <>
math_def FORCEINLINE double p_sin(double value) {
return sin(value);
}
/////////
template <>
math_def FORCEINLINE float p_sqrt(float value) {
return sqrtf(value);
}
template <>
math_def FORCEINLINE float16 p_sqrt(float16 val) {
#ifdef NATIVE_HALFS
return hsqrt(val.data);
#else
return static_cast<float16>(sqrtf((float) val));
#endif
}
template <>
math_def FORCEINLINE bfloat16 p_sqrt(bfloat16 val) {
return static_cast<float16>(sqrtf((float) val));
}
template <>
math_def FORCEINLINE double p_sqrt(double value) {
return sqrt(value);
}
/////////
template <>
math_def FORCEINLINE float p_tanh(float value) {
return tanhf(value);
}
template <>
math_def FORCEINLINE float16 p_tanh(float16 val) {
return static_cast<float16>(tanhf((float) val));
}
template <>
math_def FORCEINLINE bfloat16 p_tanh(bfloat16 val) {
return static_cast<bfloat16>(tanhf((float) val));
}
template <>
math_def FORCEINLINE double p_tanh(double value) {
return tanh(value);
}
/////////
template <>
math_def FORCEINLINE float p_erf(float value) {
return erff(value);
}
template <>
math_def FORCEINLINE float16 p_erf(float16 val) {
return static_cast<float16>(erff((float) val));
}
template <>
math_def FORCEINLINE bfloat16 p_erf(bfloat16 val) {
return static_cast<bfloat16>(erff((float) val));
}
template <>
math_def FORCEINLINE double p_erf(double value) {
return erf(value);
}
/////////
template <>
math_def FORCEINLINE float p_erfc(float value) {
return erfcf(value);
}
template <>
math_def FORCEINLINE float16 p_erfc(float16 val) {
return static_cast<float16>(erfcf((float) val));
}
template <>
math_def FORCEINLINE bfloat16 p_erfc(bfloat16 val) {
return static_cast<bfloat16>(erfcf((float) val));
}
template <>
math_def FORCEINLINE double p_erfc(double value) {
return erfc(value);
}
/////////
template <>
math_def FORCEINLINE float p_acos(float value) {
return acosf(value);
}
template <>
math_def FORCEINLINE float16 p_acos(float16 val) {
return static_cast<float16>(acosf((float) val));
}
template <>
math_def FORCEINLINE bfloat16 p_acos(bfloat16 val) {
return static_cast<bfloat16>(acosf((float) val));
}
template <>
math_def FORCEINLINE double p_acos(double value) {
return acos(value);
}
/////////
template <>
math_def FORCEINLINE float p_sinh(float value) {
return sinhf(value);
}
template <>
math_def FORCEINLINE float16 p_sinh(float16 val) {
return static_cast<float16>(sinhf((float) val));
}
template <>
math_def FORCEINLINE bfloat16 p_sinh(bfloat16 val) {
return static_cast<bfloat16>(sinhf((float) val));
}
template <>
math_def FORCEINLINE double p_sinh(double value) {
return sinh(value);
}
/////////
template <>
math_def FORCEINLINE float p_acosh(float value) {
return acoshf(value);
}
template <>
math_def FORCEINLINE float16 p_acosh(float16 val) {
return static_cast<float16>(acoshf((float) val));
}
template <>
math_def FORCEINLINE bfloat16 p_acosh(bfloat16 val) {
return static_cast<bfloat16>(acoshf((float) val));
}
template <>
math_def FORCEINLINE double p_acosh(double value) {
return acosh(value);
}
/////////
template <>
math_def FORCEINLINE float p_cosh(float value) {
return coshf(value);
}
template <>
math_def FORCEINLINE float16 p_cosh(float16 val) {
return static_cast<float16>(coshf((float) val));
}
template <>
math_def FORCEINLINE bfloat16 p_cosh(bfloat16 val) {
return static_cast<bfloat16>(coshf((float) val));
}
template <>
math_def FORCEINLINE double p_cosh(double value) {
return cosh(value);
}
/////////
template <>
math_def FORCEINLINE float p_asin(float value) {
return asinf(value);
}
template <>
math_def FORCEINLINE float16 p_asin(float16 val) {
return static_cast<float16>(asinf((float) val));
}
template <>
math_def FORCEINLINE bfloat16 p_asin(bfloat16 val) {
return static_cast<bfloat16>(asinf((float) val));
}
template <>
math_def FORCEINLINE double p_asin(double value) {
return asin(value);
}
/////////
template <>
math_def FORCEINLINE float p_atan(float value) {
return atanf(value);
}
template <>
math_def FORCEINLINE float16 p_atan(float16 val) {
return static_cast<float16>(atanf((float) val));
}
template <>
math_def FORCEINLINE bfloat16 p_atan(bfloat16 val) {
return static_cast<bfloat16>(atanf((float) val));
}
template <>
math_def FORCEINLINE double p_atan(double value) {
return atan(value);
}
/////////
template <>
math_def FORCEINLINE float p_tan(float value) {
return tanf(value);
}
template <>
math_def FORCEINLINE float16 p_tan(float16 val) {
return static_cast<float16>(tanf((float) val));
}
template <>
math_def FORCEINLINE bfloat16 p_tan(bfloat16 val) {
return static_cast<bfloat16>(tanf((float) val));
}
template <>
math_def FORCEINLINE double p_tan(double value) {
return tan(value);
}
/////////
template <>
math_def FORCEINLINE float p_atanh(float value) {
return atanhf(value);
}
template <>
math_def FORCEINLINE float16 p_atanh(float16 val) {
return static_cast<float16>(atanhf((float) val));
}
template <>
math_def FORCEINLINE bfloat16 p_atanh(bfloat16 val) {
return static_cast<bfloat16>(atanhf((float) val));
}
template <>
math_def FORCEINLINE double p_atanh(double value) {
return atanh(value);
}
/////////
template <typename T>
math_def FORCEINLINE T _rotate_left(T value, T shift);
template <typename T>
math_def FORCEINLINE T _rotate_right(T value, T shift);
template <>
math_def FORCEINLINE int8_t _rotate_left(int8_t value, int8_t shift) {
return value << shift | value >> (8 - shift);
}
template <>
math_def FORCEINLINE int8_t _rotate_right(int8_t value, int8_t shift) {
return value >> shift | value << (8 - shift);
}
template <>
math_def FORCEINLINE uint8_t _rotate_left(uint8_t value, uint8_t shift) {
return value << shift | value >> (8 - shift);
}
template <>
math_def FORCEINLINE uint8_t _rotate_right(uint8_t value, uint8_t shift) {
return value >> shift | value << (8 - shift);
}
template <>
math_def FORCEINLINE int16_t _rotate_left(int16_t value, int16_t shift) {
return value << shift | value >> (16 - shift);
}
template <>
math_def FORCEINLINE int16_t _rotate_right(int16_t value, int16_t shift) {
return value >> shift | value << (16 - shift);
}
template <>
math_def FORCEINLINE uint16_t _rotate_left(uint16_t value, uint16_t shift) {
return value << shift | value >> (16 - shift);
}
template <>
math_def FORCEINLINE uint16_t _rotate_right(uint16_t value, uint16_t shift) {
return value >> shift | value << (16 - shift);
}
template <>
math_def FORCEINLINE int _rotate_left(int value, int shift) {
return value << shift | value >> (32 - shift);
}
template <>
math_def FORCEINLINE int _rotate_right(int value, int shift) {
return value >> shift | value << (32 - shift);
}
template <>
math_def FORCEINLINE uint32_t _rotate_left(uint32_t value, uint32_t shift) {
return value << shift | value >> (32 - shift);
}
template <>
math_def FORCEINLINE uint32_t _rotate_right(uint32_t value, uint32_t shift) {
return value >> shift | value << (32 - shift);
}
template <>
math_def FORCEINLINE Nd4jLong _rotate_left(Nd4jLong value, Nd4jLong shift) {
return value << shift | value >> (64 - shift);
}
template <>
math_def FORCEINLINE Nd4jLong _rotate_right(Nd4jLong value, Nd4jLong shift) {
return value >> shift | value << (64 - shift);
}
template <>
math_def FORCEINLINE uint64_t _rotate_left(uint64_t value, uint64_t shift) {
#ifdef SD_ARM_BUILD
// TODO: eventually remove this once gcc fixes the bug
Nd4jLong val = _rotate_left<Nd4jLong>(*reinterpret_cast<Nd4jLong *>(&value), *reinterpret_cast<Nd4jLong *>(&shift));
return *reinterpret_cast<uint64_t *>(&val);
#else
return value << shift | value >> (64 - shift);
#endif
}
template <>
math_def FORCEINLINE uint64_t _rotate_right(uint64_t value, uint64_t shift) {
#ifdef SD_ARM_BUILD
// TODO: eventually remove this once gcc fixes the bug
Nd4jLong val = _rotate_right<Nd4jLong>(*reinterpret_cast<Nd4jLong *>(&value), *reinterpret_cast<Nd4jLong *>(&shift));
return *reinterpret_cast<uint64_t *>(&val);
#else
return value >> shift | value << (64 - shift);
#endif
}
template <typename T>
math_def FORCEINLINE T p_rotl(T value, T shift) {
return _rotate_left<T>(value, shift);
}
template <typename T>
math_def FORCEINLINE T p_rotr(T value, T shift) {
return _rotate_right<T>(value, shift);
}
2019-06-06 14:21:15 +02:00
}
}
#endif //DEV_TESTS_PLATFORM_MATH_H