2019-06-06 14:21:15 +02:00
/*******************************************************************************
* Copyright ( c ) 2015 - 2018 Skymind , Inc .
*
* This program and the accompanying materials are made available under the
* terms of the Apache License , Version 2.0 which is available at
* https : //www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an " AS IS " BASIS , WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied . See the
* License for the specific language governing permissions and limitations
* under the License .
*
* SPDX - License - Identifier : Apache - 2.0
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
//
// Created by agibsonccc on 2/21/16.
//
# define __STDC_CONSTANT_MACROS
# include "../NativeOps.h"
# include "NativeOpExecutioner.h"
# include "../NDArray.h"
# include "../GraphExecutioner.h"
# include <graph/GraphHolder.h>
# include <templatemath.h>
# include <types/float8.h>
# include <loops/type_conversions.h>
# include <loops/aggregates.h>
# include <helpers/helper_ptrmap.h>
# include <helpers/logger.h>
# include <pointercast.h>
# include <pairwise_util.h>
# include <types/types.h>
# include <ops/declarable/helpers/transforms.h>
# include <exceptions/allocation_exception.h>
# include <fcntl.h>
# include <stdio.h>
# include <stdlib.h>
# ifndef _WIN32
# include <unistd.h>
# include <sys/mman.h>
# else
# include <io.h>
# include <helpers/mman.h>
# endif
# include <sys/types.h>
# include <ops/declarable/CustomOperations.h>
# include <errno.h>
char * name ;
bool nameSet = false ;
# ifdef __ND4J_EXPERIMENTAL__
bool experimentalSupport = true ;
# else
bool experimentalSupport = false ;
# endif
# include <ops/specials.h>
# include "../Environment.h"
# include <TAD.h>
# include <ops/declarable/OpRegistrator.h>
# include <graph/Context.h>
# include <graph/ResultWrapper.h>
# include <helpers/DebugHelper.h>
# include <helpers/ConstantTadHelper.h>
using namespace nd4j ;
NativeOps : : NativeOps ( ) {
//
}
void NativeOps : : setElementThreshold ( int num ) {
if ( num > 0 )
nd4j : : Environment : : getInstance ( ) - > setElementwiseThreshold ( num ) ;
}
void NativeOps : : setTADThreshold ( int num ) {
if ( num > 0 )
nd4j : : Environment : : getInstance ( ) - > setTadThreshold ( num ) ;
}
/**
*
* @ param opNum
* @ param hX
* @ param hXShapeInfo
* @ param extraParams
*/
void NativeOps : : execIndexReduceScalar ( Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * extraParams ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ) {
NativeOpExecutioner : : execIndexReduceScalar ( nullptr , opNum , hX , hXShapeInfo , dX , dXShapeInfo , extraParams , hZ , hZShapeInfo , dZ , dZShapeInfo ) ;
}
/**
*
* @ param opNum
* @ param hX
* @ param hXShapeInfo
* @ param extraParams
* @ param hZ
* @ param hZShapeInfo
* @ param dimension
* @ param dimensionLength
*/
void NativeOps : : execIndexReduce ( Nd4jPointer * extraPointers , int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * extraParams ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * hDimension , Nd4jLong * hDimensionShape ,
void * dDimension , Nd4jLong * dDimensionShape ) {
auto dimension = reinterpret_cast < int * > ( hDimension ) ;
int dimensionLength = static_cast < int > ( shape : : length ( hDimensionShape ) ) ;
auto tadPack = nd4j : : ConstantTadHelper : : getInstance ( ) - > tadForDimensions ( hXShapeInfo , dimension , dimensionLength ) ;
auto hTADShapeInfo = tadPack . primaryShapeInfo ( ) ;
auto hTADOffsets = tadPack . primaryOffsets ( ) ;
auto hz = reinterpret_cast < Nd4jLong * > ( hZ ) ;
NativeOpExecutioner : : execIndexReduce ( nullptr , opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
extraParams ,
hz ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
dimension ,
dimensionLength ,
hTADShapeInfo ,
hTADOffsets ) ;
}
/**
*
* @ param opNum
* @ param hX
* @ param hXShapeInfo
* @ param hY
* @ param hYShapeInfo
* @ param hZ
* @ param hZShapeInfo
* @ param dimension
* @ param dimensionLength
*/
void NativeOps : : execBroadcast ( Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * hY , Nd4jLong * hYShapeInfo ,
void * dY , Nd4jLong * dYShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * hDimension , Nd4jLong * hDimensionShape ,
void * dDimension , Nd4jLong * dDimensionShape ) {
auto dimension = reinterpret_cast < int * > ( hDimension ) ;
int dimensionLength = static_cast < int > ( shape : : length ( hDimensionShape ) ) ;
auto tadPackX = nd4j : : ConstantTadHelper : : getInstance ( ) - > tadForDimensions ( hXShapeInfo , dimension , dimensionLength ) ;
auto tadPackZ = nd4j : : ConstantTadHelper : : getInstance ( ) - > tadForDimensions ( hZShapeInfo , dimension , dimensionLength ) ;
auto hTADShapeInfo = tadPackX . primaryShapeInfo ( ) ;
auto hTADOffsets = tadPackX . primaryOffsets ( ) ;
auto hTADShapeInfoZ = tadPackZ . primaryShapeInfo ( ) ;
auto hTADOffsetsZ = tadPackZ . primaryOffsets ( ) ;
NativeOpExecutioner : : execBroadcast ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
hY ,
hYShapeInfo ,
dY ,
dYShapeInfo ,
hZ , hZShapeInfo ,
dZ , dZShapeInfo ,
dimension ,
dimensionLength , hTADShapeInfo , hTADOffsets , hTADShapeInfoZ , hTADOffsetsZ ) ;
}
void NativeOps : : execBroadcastBool ( Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * hY , Nd4jLong * hYShapeInfo ,
void * dY , Nd4jLong * dYShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * hDimension , Nd4jLong * hDimensionShape ,
void * dDimension , Nd4jLong * dDimensionShape ) {
auto dimension = reinterpret_cast < int * > ( hDimension ) ;
int dimensionLength = static_cast < int > ( shape : : length ( hDimensionShape ) ) ;
auto tadPackX = nd4j : : ConstantTadHelper : : getInstance ( ) - > tadForDimensions ( hXShapeInfo , dimension , dimensionLength ) ;
auto tadPackZ = nd4j : : ConstantTadHelper : : getInstance ( ) - > tadForDimensions ( hZShapeInfo , dimension , dimensionLength ) ;
auto hTADShapeInfo = tadPackX . primaryShapeInfo ( ) ;
auto hTADOffsets = tadPackX . primaryOffsets ( ) ;
auto hTADShapeInfoZ = tadPackZ . primaryShapeInfo ( ) ;
auto hTADOffsetsZ = tadPackZ . primaryOffsets ( ) ;
NativeOpExecutioner : : execBroadcastBool ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
hY ,
hYShapeInfo ,
dY ,
dYShapeInfo ,
hZ , hZShapeInfo ,
dZ , dZShapeInfo ,
dimension ,
dimensionLength , hTADShapeInfo , hTADOffsets , hTADShapeInfoZ , hTADOffsetsZ ) ;
}
/**
*
* @ param opNum
* @ param hX
* @ param hXShapeInfo
* @ param hY
* @ param hYShapeInfo
* @ param hZ
* @ param hZShapeInfo
* @ param extraParams
* @ param n
*/
void NativeOps : : execPairwiseTransform (
Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * hY , Nd4jLong * hYShapeInfo ,
void * dY , Nd4jLong * dYShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * extraParams ) {
NativeOpExecutioner : : execPairwiseTransform ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
hY ,
hYShapeInfo ,
dY ,
dYShapeInfo ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
extraParams ) ;
}
void NativeOps : : execPairwiseTransformBool (
Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * hY , Nd4jLong * hYShapeInfo ,
void * dY , Nd4jLong * dYShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * extraParams ) {
NativeOpExecutioner : : execPairwiseBoolTransform ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
hY ,
hYShapeInfo ,
dY ,
dYShapeInfo ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
extraParams ) ;
}
/**
*
* @ param opNum
* @ param hX
* @ param hXShapeInfo
* @ param extraParams
* @ param hZ
* @ param hZShapeInfo
*/
void NativeOps : : execReduceFloat (
Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * extraParams ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ) {
NativeOpExecutioner : : execReduceFloatScalar ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
extraParams ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ) ;
}
void NativeOps : : execReduceSame (
Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * extraParams ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ) {
NativeOpExecutioner : : execReduceSameScalar ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
extraParams ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ) ;
}
void NativeOps : : execReduceBool (
Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * extraParams ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ) {
NativeOpExecutioner : : execReduceBoolScalar ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
extraParams ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ) ;
}
void NativeOps : : execReduceLong (
Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * extraParams ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ) {
NativeOpExecutioner : : execReduceLongScalar ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
extraParams ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ) ;
}
/**
*
* @ param opNum
* @ param hX
* @ param hXShapeInfo
* @ param extraParams
* @ param hZ
* @ param hZShapeInfo
*/
void NativeOps : : execReduceFloat ( Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * extraParams ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * hDimension , Nd4jLong * hDimensionShape ,
void * dDimension , Nd4jLong * dDimensionShape ) {
auto dimension = reinterpret_cast < int * > ( hDimension ) ;
int dimensionLength = static_cast < int > ( shape : : length ( hDimensionShape ) ) ;
auto tadPackX = nd4j : : ConstantTadHelper : : getInstance ( ) - > tadForDimensions ( hXShapeInfo , dimension , dimensionLength ) ;
auto hTADShapeInfo = tadPackX . primaryShapeInfo ( ) ;
auto hTADOffsets = tadPackX . primaryOffsets ( ) ;
NativeOpExecutioner : : execReduceFloat ( nullptr , opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
extraParams ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
dimension ,
dimensionLength ,
hTADShapeInfo ,
hTADOffsets ) ;
}
void NativeOps : : execReduceBool ( Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * extraParams ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * hDimension , Nd4jLong * hDimensionShape ,
void * dDimension , Nd4jLong * dDimensionShape ) {
auto dimension = reinterpret_cast < int * > ( hDimension ) ;
int dimensionLength = static_cast < int > ( shape : : length ( hDimensionShape ) ) ;
auto tadPack = nd4j : : ConstantTadHelper : : getInstance ( ) - > tadForDimensions ( hXShapeInfo , dimension , dimensionLength ) ;
auto hTADShapeInfo = tadPack . primaryShapeInfo ( ) ;
auto hTADOffsets = tadPack . primaryOffsets ( ) ;
NativeOpExecutioner : : execReduceBool ( nullptr , opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
extraParams ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
dimension ,
dimensionLength ,
hTADShapeInfo ,
hTADOffsets ) ;
}
void NativeOps : : execReduceSame ( Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * extraParams ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * hDimension , Nd4jLong * hDimensionShape ,
void * dDimension , Nd4jLong * dDimensionShape ) {
auto dimension = reinterpret_cast < int * > ( hDimension ) ;
int dimensionLength = static_cast < int > ( shape : : length ( hDimensionShape ) ) ;
auto tadPack = nd4j : : ConstantTadHelper : : getInstance ( ) - > tadForDimensions ( hXShapeInfo , dimension , dimensionLength ) ;
auto hTADShapeInfo = tadPack . primaryShapeInfo ( ) ;
auto hTADOffsets = tadPack . primaryOffsets ( ) ;
NativeOpExecutioner : : execReduceSame ( nullptr , opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
extraParams ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
dimension ,
dimensionLength ,
hTADShapeInfo ,
hTADOffsets ) ;
}
void NativeOps : : execReduceLong ( Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * extraParams ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * hDimension , Nd4jLong * hDimensionShape ,
void * dDimension , Nd4jLong * dDimensionShape ) {
auto dimension = reinterpret_cast < int * > ( hDimension ) ;
int dimensionLength = static_cast < int > ( shape : : length ( hDimensionShape ) ) ;
auto tadPack = nd4j : : ConstantTadHelper : : getInstance ( ) - > tadForDimensions ( hXShapeInfo , dimension , dimensionLength ) ;
auto hTADShapeInfo = tadPack . primaryShapeInfo ( ) ;
auto hTADOffsets = tadPack . primaryOffsets ( ) ;
NativeOpExecutioner : : execReduceLong ( nullptr , opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
extraParams ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
dimension ,
dimensionLength ,
hTADShapeInfo ,
hTADOffsets ) ;
}
/**
*
* @ param opNum
* @ param hX
* @ param hXShapeInfo
* @ param extraParamsVals
* @ param hY
* @ param hYShapeInfo
* @ param hZ
* @ param hZShapeInfo
*/
void NativeOps : : execReduce3 ( Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * extraParams ,
void * hY , Nd4jLong * hYShapeInfo ,
void * dY , Nd4jLong * dYShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ) {
NativeOpExecutioner : : execReduce3 ( nullptr , opNum , hX , hXShapeInfo , dX , dXShapeInfo , extraParams , hY , hYShapeInfo , dY , dYShapeInfo , hZ , hZShapeInfo , dZ , dZShapeInfo ) ;
}
/**
*
* @ param opNum
* @ param hX
* @ param hXShapeInfo
* @ param extraParamsVals
* @ param hY
* @ param hYShapeInfo
*/
void NativeOps : : execReduce3Scalar ( Nd4jPointer * extraPointers , int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * extraParams ,
void * hY , Nd4jLong * hYShapeInfo ,
void * dY , Nd4jLong * dYShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ) {
NativeOpExecutioner : : execReduce3Scalar ( nullptr , opNum , hX , hXShapeInfo , dX , dXShapeInfo , extraParams , hY , hYShapeInfo , dY , dYShapeInfo , hZ , hZShapeInfo , dZ , dZShapeInfo ) ;
}
/**
*
* @ param opNum
* @ param hX
* @ param hXShapeInfo
* @ param extraParamsVals
* @ param hY
* @ param hYShapeInfo
* @ param hZ
* @ param hZShapeInfo
* @ param dimension
* @ param dimensionLength
*/
void NativeOps : : execReduce3 ( Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * extraParams ,
void * hY , Nd4jLong * hYShapeInfo ,
void * dY , Nd4jLong * dYShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * hDimension , Nd4jLong * hDimensionShape ,
void * dDimension , Nd4jLong * dDimensionShape ,
Nd4jLong * tadOnlyShapeInfo , Nd4jLong * tadOffsets ,
Nd4jLong * yTadOnlyShapeInfo , Nd4jLong * yTadOffsets ) {
auto dimension = reinterpret_cast < int * > ( hDimension ) ;
int dimensionLength = static_cast < int > ( shape : : length ( hDimensionShape ) ) ;
if ( extraPointers = = nullptr | | extraPointers [ 2 ] = = 0 ) {
NativeOpExecutioner : : execReduce3 ( LaunchContext : : defaultContext ( ) , opNum , hX , hXShapeInfo , dX , dXShapeInfo , extraParams , hY , hYShapeInfo , dY , dYShapeInfo , hZ , hZShapeInfo , dZ , dZShapeInfo , dimension , dimensionLength , tadOnlyShapeInfo , tadOffsets , yTadOnlyShapeInfo , yTadOffsets ) ;
} else {
// going tad-way
auto tadPack = nd4j : : ConstantTadHelper : : getInstance ( ) - > tadForDimensions ( hXShapeInfo , dimension , dimensionLength ) ;
auto hTADShapeInfo = tadPack . primaryShapeInfo ( ) ;
auto hTADOffsets = tadPack . primaryOffsets ( ) ;
NativeOpExecutioner : : execReduce3TAD ( LaunchContext : : defaultContext ( ) , opNum , hX , hXShapeInfo , dX , dXShapeInfo , extraParams , hY , hYShapeInfo , dY , dYShapeInfo , hZ , hZShapeInfo , dZ , dZShapeInfo , dimension , dimensionLength , hTADShapeInfo , hTADOffsets , nullptr , nullptr ) ;
}
}
/**
*
* @ param opNum
* @ param hX
* @ param hXShapeInfo
* @ param hZ
* @ param hZShapeInfo
* @ param hScalar
* @ param extraParams
* @ param n
*/
void NativeOps : : execScalar (
Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * hScalar , Nd4jLong * hScalarShapeInfo ,
void * dScalar , Nd4jLong * dScalarShapeInfo ,
void * extraParams ) {
NativeOpExecutioner : : execScalar ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
hScalar ,
hScalarShapeInfo ,
dScalar ,
dScalarShapeInfo ,
extraParams ) ;
}
void NativeOps : : execScalarBool (
Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * hScalar , Nd4jLong * hScalarShapeInfo ,
void * dScalar , Nd4jLong * dScalarShapeInfo ,
void * extraParams ) {
NativeOpExecutioner : : execScalarBool ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
hScalar ,
hScalarShapeInfo ,
dScalar ,
dScalarShapeInfo ,
extraParams ) ;
}
/**
*
* @ param opNum
* @ param hX
* @ param hXShapeInfo
* @ param extraParams
*/
void NativeOps : : execSummaryStatsScalar ( Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * extraParams ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
bool biasCorrected ) {
NativeOpExecutioner : : execSummaryStatsScalar ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
extraParams ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
biasCorrected ) ;
}
/**
*
* @ param opNum
* @ param hX
* @ param hXShapeInfo
* @ param extraParams
* @ param hZ
* @ param hZShapeInfo
*/
void NativeOps : : execSummaryStats ( Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * extraParams ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
bool biasCorrected ) {
NativeOpExecutioner : : execSummaryStats ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
extraParams ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
biasCorrected ) ;
}
/**
*
* @ param opNum
* @ param hX
* @ param hXShapeInfo
* @ param extraParams
* @ param hZ
* @ param hZShapeInfo
* @ param dimension
* @ param dimensionLength
*/
void NativeOps : : execSummaryStats ( Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * extraParams ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * hDimension , Nd4jLong * hDimensionShape ,
void * dDimension , Nd4jLong * dDimensionShape ,
bool biasCorrected ,
Nd4jLong * tadShapeInfo , Nd4jLong * tadOffsets ) {
auto dimension = reinterpret_cast < int * > ( hDimension ) ;
int dimensionLength = static_cast < int > ( shape : : length ( hDimensionShape ) ) ;
NativeOpExecutioner : : execSummaryStats ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
extraParams ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
dimension ,
dimensionLength ,
tadShapeInfo ,
tadOffsets ,
biasCorrected ) ;
}
/**
*
* @ param opNum
* @ param hX
* @ param hXShapeInfo
* @ param hZ
* @ param hZShapeInfo
* @ param extraParams
* @ param n
*/
void NativeOps : : execTransformFloat (
Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * extraParams ) {
NativeOpExecutioner : : execTransformFloat ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dZ ,
dXShapeInfo ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
extraParams ,
nullptr ,
nullptr ) ;
}
void NativeOps : : execTransformSame (
Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * extraParams ) {
NativeOpExecutioner : : execTransformSame ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
extraParams ,
nullptr ,
nullptr ) ;
}
void NativeOps : : execTransformBool (
Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * extraParams ) {
NativeOpExecutioner : : execTransformBool ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
extraParams ,
nullptr ,
nullptr ) ;
}
void NativeOps : : execTransformAny (
Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * extraParams ) {
NativeOpExecutioner : : execTransformAny ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
extraParams ,
nullptr ,
nullptr ) ;
}
void NativeOps : : execTransformStrict (
Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * extraParams ) {
NativeOpExecutioner : : execTransformStrict ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
extraParams ,
nullptr ,
nullptr ) ;
}
void NativeOps : : execReduce3All ( Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * extraParamsVals ,
void * hY , Nd4jLong * hYShapeInfo ,
void * dY , Nd4jLong * dYShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * hDimension , Nd4jLong * hDimensionShape ,
void * dDimension , Nd4jLong * dDimensionShape ,
Nd4jLong * xTadShapeInfo ,
Nd4jLong * xOffsets ,
Nd4jLong * yTadShapeInfo ,
Nd4jLong * yOffsets ) {
auto dimension = reinterpret_cast < int * > ( hDimension ) ;
int dimensionLength = static_cast < int > ( shape : : length ( hDimensionShape ) ) ;
NativeOpExecutioner : : execReduce3All ( nullptr , opNum , hX , hXShapeInfo , dX , dXShapeInfo , extraParamsVals , hY , hYShapeInfo , dY , dYShapeInfo , hZ , hZShapeInfo , dZ , dZShapeInfo , dimension , dimensionLength , xTadShapeInfo , xOffsets , yTadShapeInfo , yOffsets ) ;
}
template < typename T >
void flattenGeneric ( Nd4jPointer * extraPointers ,
int offset ,
char order ,
void * vresult ,
Nd4jLong * hZShapeInfo ,
void * vinput ,
Nd4jLong * inputShapeInfo ) {
auto hZ = reinterpret_cast < T * > ( vresult ) ;
auto input = reinterpret_cast < T * > ( vinput ) ;
int numOnes = 0 ;
auto shape = shape : : shapeOf ( inputShapeInfo ) ;
int wholeRank = shape : : rank ( inputShapeInfo ) ;
for ( int i = 0 ; i < wholeRank ; i + + ) {
if ( shape [ i ] = = 1 )
numOnes + + ;
}
//start at the given offset
hZ + = offset ;
char inputOrder = shape : : order ( inputShapeInfo ) ;
auto len = shape : : length ( inputShapeInfo ) ;
auto resultEleStride = shape : : elementWiseStride ( hZShapeInfo ) ;
auto inputEleStride = shape : : elementWiseStride ( inputShapeInfo ) ;
Nd4jLong numTads , stride ;
int dimension , dimensionLength ;
int rank = shape : : rank ( inputShapeInfo ) ;
auto xStride = shape : : stride ( inputShapeInfo ) ;
auto xShape = shape : : shapeOf ( inputShapeInfo ) ;
dimensionLength = 1 ;
if ( order = = ' f ' ) {
dimension = 0 ;
}
else {
dimension = rank - 1 ;
}
stride = xStride [ dimension ] ;
// numTads is product of length of all dimensions excluding
// the one we do the tad on
numTads = 1 ;
for ( int i = 0 ; i < rank ; i + + ) {
if ( i ! = dimension )
numTads * = xShape [ i ] ;
}
if ( inputOrder = = order ) {
if ( resultEleStride = = 1 & & inputEleStride = = 1 ) {
memcpy ( hZ , input , len * sizeof ( T ) ) ;
}
else if ( resultEleStride > = 1 & & inputEleStride > = 1 ) {
if ( len < ELEMENT_THRESHOLD ) {
PRAGMA_OMP_SIMD
for ( int i = 0 ; i < len ; i + + ) {
hZ [ i * resultEleStride ] = input [ i * inputEleStride ] ;
}
}
else {
PRAGMA_OMP_PARALLEL_FOR_SIMD
for ( int i = 0 ; i < len ; i + + ) {
hZ [ i * resultEleStride ] = input [ i * inputEleStride ] ;
}
}
}
else {
int idx = 0 ;
for ( int i = 0 ; i < len ; i + + )
hZ [ idx + + ] = input [ shape : : getIndexOffset ( i , inputShapeInfo , len ) ] ;
}
}
else {
int rank = shape : : rank ( inputShapeInfo ) ;
auto xShape = shape : : shapeOf ( inputShapeInfo ) ;
auto tadShape = xShape [ dimension ] ;
auto tadPack = nd4j : : ConstantTadHelper : : getInstance ( ) - > tadForDimensions ( inputShapeInfo , dimension ) ;
PRAGMA_OMP_PARALLEL_FOR
for ( int i = 0 ; i < numTads ; i + + ) {
Nd4jLong resultOffset ;
if ( order = = ' f ' ) {
// 1. get c ordering coordinates
auto cIndexCoordinates = new Nd4jLong [ rank - 1 ] ;
int divisor = 1 ;
for ( int dim = rank - 1 ; dim > 0 ; dim - - ) {
cIndexCoordinates [ dim - 1 ] = ( i / divisor ) % xShape [ dim ] ;
divisor * = xShape [ dim ] ;
}
// 2. convert to f ordering index
int fIndex = 0 ;
int multiplier = 1 ;
for ( int dim = 1 ; dim < = rank - 1 ; dim + + ) {
fIndex + = cIndexCoordinates [ dim - 1 ] * multiplier ;
multiplier * = xShape [ dim ] ;
}
resultOffset = fIndex * tadShape ;
delete [ ] cIndexCoordinates ;
}
else {
resultOffset = i * tadShape ;
}
auto tadOffset = tadPack . primaryOffsets ( ) [ i ] ;
for ( int j = 0 ; j < tadShape ; j + + ) {
// TAD are returned in C ordering always
hZ [ resultOffset + j ] = input [ tadOffset + j * stride ] ;
}
}
}
}
/**
* Concatneate multi array of the same shape together
* along a particular dimension
*/
void NativeOps : : concat (
Nd4jPointer * extraPointers ,
int dimension ,
int numArrays ,
Nd4jPointer * data , Nd4jPointer * inputShapeInfo ,
Nd4jPointer * ddata , Nd4jPointer * dinputShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
Nd4jPointer * tadPointers ,
Nd4jPointer * offsetPointers ) {
auto zType = nd4j : : ArrayOptions : : dataType ( hZShapeInfo ) ;
BUILD_SINGLE_SELECTOR ( zType , nd4j : : SpecialMethods , : : concatCpuGeneric ( dimension , numArrays , data , inputShapeInfo , hZ , hZShapeInfo ) , LIBND4J_TYPES ) ;
}
/**
* Concatneate multi array of the same shape together
* along a particular dimension
*/
void NativeOps : : specialConcat (
Nd4jPointer * extraPointers ,
int dimension ,
int numArrays ,
Nd4jPointer * data ,
Nd4jPointer * inputShapeInfo ,
void * hZ ,
Nd4jLong * hZShapeInfo ,
Nd4jPointer * tadPointers ,
Nd4jPointer * offsetPointers ) {
auto zType = nd4j : : ArrayOptions : : dataType ( hZShapeInfo ) ;
BUILD_SINGLE_SELECTOR ( zType , nd4j : : SpecialMethods , : : concatCpuGeneric ( dimension , numArrays , data , inputShapeInfo , hZ , hZShapeInfo ) , LIBND4J_TYPES ) ;
}
/**
* Append an input array
* to the end of a flat array
* in a particular order
* @ param offset the offset of the array to start at
* @ param order the order
* @ param hZ the hZ array
* @ param hZShapeInfo the shape info for te array
* @ param input the input for the array
* @ param inputShapeInfo the shape information for that array
*/
void NativeOps : : flatten (
Nd4jPointer * extraPointers ,
int offset ,
char order ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * input , Nd4jLong * inputShapeInfo ,
void * dinput , Nd4jLong * dinputShapeInfo ) {
auto xType = nd4j : : ArrayOptions : : dataType ( inputShapeInfo ) ;
auto zType = nd4j : : ArrayOptions : : dataType ( hZShapeInfo ) ;
if ( xType ! = zType )
throw std : : runtime_error ( " NativeOps::flatten requires all operands to have same data type " ) ;
BUILD_SINGLE_SELECTOR ( xType , flattenGeneric , ( extraPointers , offset , order , hZ , hZShapeInfo , input , inputShapeInfo ) , LIBND4J_TYPES ) ;
}
/**
* This is dummy method for JNI compatibility
* Since we ' ll use this from java , jni compiler would like to have method no matter what .
*/
void NativeOps : : initializeDevicesAndFunctions ( ) {
}
void NativeOps : : initializeFunctions ( Nd4jPointer * functions ) {
nd4j : : BlasHelper : : getInstance ( ) - > initializeFunctions ( functions ) ;
}
/**
* This method acquires memory chunk of requested size on host side
*
* @ param pointer pointer that ' ll be used for allocation
* @ param memorySize memory size , in bytes
* @ param flags optional parameter
*/
Nd4jPointer NativeOps : : mallocHost ( Nd4jLong memorySize , int flags ) {
Nd4jPointer pointer = ( Nd4jPointer ) malloc ( memorySize ) ;
if ( pointer = = 0 )
return 0L ;
return pointer ;
}
/**
* This method acquires memory chunk of requested size on specified device
*
* PLEASE NOTE : This method is NOT supported and has NO effect in CPU - based backend .
*
* @ param pointer pointer that ' ll be used for allocation
* @ param memorySize memory size , in bytes
* @ param ptrToDeviceId pointer to deviceId . For cuda that ' s just and int , for OpenCL that ' s pointer to device_id , etc
* @ param flags optional parameter
*/
Nd4jPointer NativeOps : : mallocDevice ( Nd4jLong memorySize , int deviceId , int flags ) {
// not supported
return 0L ;
}
/**
* This method releases previously allocated host memory space
*
* @ param pointer pointer that ' ll be freed
*/
int NativeOps : : freeHost ( Nd4jPointer pointer ) {
free ( reinterpret_cast < void * > ( pointer ) ) ;
return 1L ;
}
/**
* This method releases previously allocated memory space on device
*
* PLEASE NOTE : This method is NOT supported and has NO effect in CPU - based backend .
*
* @ param pointer pointer that ' ll be freed
* @ param ptrToDeviceId pointer to deviceId .
*/
int NativeOps : : freeDevice ( Nd4jPointer pointer , int deviceId ) {
// not supported
return 0L ;
}
/**
* Returns the maximum number open mp threads
*/
int NativeOps : : ompGetMaxThreads ( ) {
return omp_get_max_threads ( ) ;
}
/**
* Returns the number open mp threads
*/
int NativeOps : : ompGetNumThreads ( ) {
return omp_get_num_threads ( ) ;
}
/**
* Sets the number of openmp threads
*/
void NativeOps : : setOmpNumThreads ( int threads ) {
omp_set_num_threads ( threads ) ;
}
Nd4jPointer NativeOps : : createContext ( ) {
return 0L ;
}
Nd4jPointer NativeOps : : createStream ( ) {
return 0L ;
}
Nd4jPointer NativeOps : : createEvent ( ) {
return 0L ;
}
int NativeOps : : getDeviceMajor ( int deviceId ) {
return 0 ;
}
int NativeOps : : getDeviceMinor ( int deviceId ) {
return 0 ;
}
int NativeOps : : registerEvent ( Nd4jPointer event , Nd4jPointer stream ) {
return 0L ;
}
int NativeOps : : setDevice ( int deviceId ) {
return 0L ;
}
Nd4jLong NativeOps : : getDeviceFreeMemory ( int deviceId ) {
return 0L ;
}
Nd4jLong NativeOps : : getDeviceFreeMemory ( ) {
return 0L ;
}
Nd4jLong NativeOps : : getDeviceTotalMemory ( int deviceId ) {
return 0L ;
}
int NativeOps : : memcpy ( Nd4jPointer dst , Nd4jPointer src , Nd4jLong size , int flags , Nd4jPointer reserved ) {
return 0L ;
}
int NativeOps : : memcpyAsync ( Nd4jPointer dst , Nd4jPointer src , Nd4jLong size , int flags , Nd4jPointer reserved ) {
return 0L ;
}
int NativeOps : : memset ( Nd4jPointer dst , int value , Nd4jLong size , int flags , Nd4jPointer reserved ) {
return 0L ;
}
int NativeOps : : memsetAsync ( Nd4jPointer dst , int value , Nd4jLong size , int flags , Nd4jPointer reserved ) {
return 0L ;
}
int NativeOps : : destroyEvent ( Nd4jPointer event ) {
return 0L ;
}
int NativeOps : : streamSynchronize ( Nd4jPointer stream ) {
return 0L ;
}
int NativeOps : : eventSynchronize ( Nd4jPointer event ) {
return 0L ;
}
int NativeOps : : getAvailableDevices ( ) {
return 0L ;
}
void NativeOps : : enableDebugMode ( bool reallyEnable ) {
nd4j : : Environment : : getInstance ( ) - > setDebug ( reallyEnable ) ;
}
void NativeOps : : enableVerboseMode ( bool reallyEnable ) {
nd4j : : Environment : : getInstance ( ) - > setVerbose ( reallyEnable ) ;
}
void NativeOps : : setGridLimit ( int gridSize ) {
// no-op
}
nd4j : : TadPack * NativeOps : : tadOnlyShapeInfo ( Nd4jLong * hXShapeInfo , int * dimension , int dimensionLength ) {
auto pack = new TadPack ( ) ;
* pack = nd4j : : ConstantTadHelper : : getInstance ( ) - > tadForDimensions ( hXShapeInfo , dimension , dimensionLength ) ;
return pack ;
}
int NativeOps : : memcpyConstantAsync ( Nd4jLong dst , Nd4jPointer src , Nd4jLong size , int flags , Nd4jPointer reserved ) {
// no-op
return 0L ;
}
Nd4jPointer NativeOps : : getConstantSpace ( ) {
// no-op
return 0L ;
}
template < typename T >
void pullRowsGeneric ( void * vx ,
Nd4jLong * hXShapeInfo ,
void * vz ,
Nd4jLong * hZShapeInfo ,
const int n ,
Nd4jLong * indexes ,
Nd4jLong * tadShapeInfo ,
Nd4jLong * tadOffsets ,
Nd4jLong * zTadShapeInfo ,
Nd4jLong * zTadOffsets ) {
auto hX = reinterpret_cast < T * > ( vx ) ;
auto hZ = reinterpret_cast < T * > ( vz ) ;
const auto xEWS = shape : : elementWiseStride ( tadShapeInfo ) ;
const auto zEWS = shape : : elementWiseStride ( zTadShapeInfo ) ;
const auto tadLength = shape : : length ( tadShapeInfo ) ;
int elementsPerThread = n / TAD_THRESHOLD ;
int _threads = nd4j : : math : : nd4j_max < int > ( 1 , elementsPerThread ) ;
_threads = nd4j : : math : : nd4j_min < int > ( _threads , omp_get_max_threads ( ) ) ;
PRAGMA_OMP_PARALLEL_FOR_THREADS ( _threads )
for ( int idx = 0 ; idx < n ; idx + + ) {
auto xTadOffsetForBlock = tadOffsets [ indexes [ idx ] ] ;
auto zTadOffsetForBlock = zTadOffsets [ idx ] ;
auto rX = hX + xTadOffsetForBlock ;
auto rZ = hZ + zTadOffsetForBlock ;
if ( xEWS = = 1 & & zEWS = = 1 ) {
PRAGMA_OMP_SIMD
for ( int i = 0 ; i < tadLength ; i + + ) {
rZ [ i ] = rX [ i ] ;
}
} else if ( xEWS > = 1 & & zEWS > = 1 ) {
PRAGMA_OMP_SIMD
for ( int i = 0 ; i < tadLength ; i + + ) {
rZ [ i * zEWS ] = rX [ i * xEWS ] ;
}
}
else {
for ( int i = 0 ; i < tadLength ; i + + ) {
auto xOffset = xTadOffsetForBlock + shape : : getIndexOffset ( i , tadShapeInfo , tadLength ) ;
auto zOffset = zTadOffsetForBlock + shape : : getIndexOffset ( i , zTadShapeInfo , tadLength ) ;
hZ [ zOffset ] = hX [ xOffset ] ;
}
}
}
}
void NativeOps : : pullRows ( Nd4jPointer * extraPointers ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
Nd4jLong n ,
Nd4jLong * indexes ,
Nd4jLong * tadShapeInfo ,
Nd4jLong * tadOffsets ,
Nd4jLong * zTadShapeInfo ,
Nd4jLong * zTadOffsets ) {
auto xType = nd4j : : ArrayOptions : : dataType ( hXShapeInfo ) ;
BUILD_SINGLE_SELECTOR ( xType , pullRowsGeneric , ( hX , hXShapeInfo , hZ , hZShapeInfo , n , indexes , tadShapeInfo , tadOffsets , zTadShapeInfo , zTadOffsets ) , LIBND4J_TYPES ) ;
}
template < typename T >
void tearGeneric ( void * vx ,
Nd4jLong * hXShapeInfo ,
Nd4jPointer * targets ,
Nd4jLong * hZShapeInfo ,
Nd4jLong * tadShapeInfo ,
Nd4jLong * tadOffsets ) {
auto hX = reinterpret_cast < T * > ( vx ) ;
const auto tadLength = shape : : length ( tadShapeInfo ) ;
auto tadEWS = shape : : elementWiseStride ( tadShapeInfo ) ;
auto zEWS = shape : : elementWiseStride ( hZShapeInfo ) ;
auto numTads = shape : : length ( hXShapeInfo ) / tadLength ;
PRAGMA_OMP_PARALLEL_FOR
for ( Nd4jLong i = 0 ; i < numTads ; i + + ) {
auto hZ = reinterpret_cast < T * > ( targets [ i ] ) ;
auto s = hX + tadOffsets [ i ] ;
if ( zEWS = = 1 & & tadEWS = = 1 ) {
PRAGMA_OMP_SIMD
for ( Nd4jLong j = 0 ; j < tadLength ; j + + ) {
hZ [ j ] = s [ j ] ;
}
} else if ( zEWS > 0 & & tadEWS > 0 ) {
PRAGMA_OMP_SIMD
for ( Nd4jLong j = 0 ; j < tadLength ; j + + ) {
hZ [ j * zEWS ] = s [ j * tadEWS ] ;
}
}
else {
for ( Nd4jLong j = 0 ; j < tadLength ; j + + )
hZ [ shape : : getIndexOffset ( j , hZShapeInfo , tadLength ) ] = s [ shape : : getIndexOffset ( j , tadShapeInfo , tadLength ) ] ;
}
}
}
void NativeOps : : tear ( Nd4jPointer * extraPointers ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
Nd4jPointer * targets ,
Nd4jLong * hZShapeInfo ,
Nd4jLong * tadShapeInfo ,
Nd4jLong * tadOffsets ) {
auto xType = nd4j : : ArrayOptions : : dataType ( hXShapeInfo ) ;
BUILD_SINGLE_SELECTOR ( xType , tearGeneric , ( hX , hXShapeInfo , targets , hZShapeInfo , tadShapeInfo , tadOffsets ) , LIBND4J_TYPES ) ;
}
void NativeOps : : average ( Nd4jPointer * extras ,
Nd4jPointer * hX , Nd4jLong * hXShapeInfo ,
Nd4jPointer * dX , Nd4jLong * dXShapeInfo ,
void * z , Nd4jLong * hZShapeInfo ,
void * dz , Nd4jLong * dZShapeInfo ,
int n ,
Nd4jLong length ,
bool propagate ) {
auto xType = nd4j : : ArrayOptions : : dataType ( hXShapeInfo ) ;
BUILD_SINGLE_SELECTOR ( xType , nd4j : : SpecialMethods , : : averageGeneric ( hX , z , hZShapeInfo , n , length , propagate ) , LIBND4J_TYPES ) ;
}
void NativeOps : : accumulate ( Nd4jPointer * extras ,
Nd4jPointer * hX , Nd4jLong * hXShapeInfo ,
Nd4jPointer * dX , Nd4jLong * dXShapeInfo ,
void * hz , Nd4jLong * hZShapeInfo ,
void * dz , Nd4jLong * dZShapeInfo ,
int n ,
Nd4jLong length ) {
auto xType = nd4j : : ArrayOptions : : dataType ( hXShapeInfo ) ;
BUILD_SINGLE_SELECTOR ( xType , nd4j : : SpecialMethods , : : accumulateGeneric ( hX , hz , hZShapeInfo , n , length ) , LIBND4J_TYPES ) ;
}
void NativeOps : : enableP2P ( bool enable ) {
// no-op
}
void NativeOps : : encodeThresholdP1 ( Nd4jPointer * extraPointers , void * hX , Nd4jLong * hXShapeInfo , Nd4jLong N , int * dz , float threshold ) {
// TODO: to be implemented
}
void NativeOps : : encodeThresholdP2Int ( Nd4jPointer * extraPointers , int * hX , Nd4jLong N , int * dz ) {
// TODO: to be implemented
}
void NativeOps : : encodeThresholdP3 ( Nd4jPointer * extraPointers , void * hX , Nd4jLong * hXShapeInfo , int * offsets , Nd4jLong N , int * dz ) {
// offsets won't be used here
// TODO: to be implemented
}
void NativeOps : : decodeThreshold ( Nd4jPointer * extraPointers , void * hX , Nd4jLong N , void * dz , Nd4jLong * hZShapeInfo ) {
// TODO: to be implemented
}
bool NativeOps : : isP2PAvailable ( ) {
// always TRUE for cpu backend
return true ;
}
void NativeOps : : checkP2P ( ) {
// no-op
}
void NativeOps : : decodeBitmap ( Nd4jPointer * extraPointers , void * hX , Nd4jLong N , void * dz , Nd4jLong * hZShapeInfo ) {
NativeOpExecutioner : : decodeBitmap ( hX , N , dz , hZShapeInfo ) ;
}
template < typename T >
void shuffleGeneric ( void * * hX , Nd4jLong * * hXShapeInfo , void * * dz , Nd4jLong * * hZShapeInfo , int N , int * shuffleMap , Nd4jLong * * tadOnlyShapeInfo , Nd4jLong * * tadOffsets ) {
auto dX = reinterpret_cast < T * * > ( hX ) ;
auto dZ = reinterpret_cast < T * * > ( dz ) ;
PRAGMA_OMP_PARALLEL_FOR_SIMD_THREADS ( N )
for ( int f = 0 ; f < N ; f + + ) {
auto hX = reinterpret_cast < T * > ( dX [ f ] ) ;
//auto hZ = reinterpret_cast<T *>(dZ[f]);
auto xShapeInfo = hXShapeInfo [ f ] ;
auto tadOffset = reinterpret_cast < Nd4jLong * > ( tadOffsets [ f ] ) ;
const auto tadLength = shape : : length ( tadOnlyShapeInfo [ f ] ) ;
auto tadEWS = shape : : elementWiseStride ( tadOnlyShapeInfo [ f ] ) ;
auto tadRank = shape : : rank ( tadOnlyShapeInfo [ f ] ) ;
auto numTads = shape : : length ( hXShapeInfo [ f ] ) / tadLength ;
auto tadShape = shape : : shapeOf ( tadOnlyShapeInfo [ f ] ) ;
auto tadStride = shape : : stride ( tadOnlyShapeInfo [ f ] ) ;
if ( shape : : rank ( xShapeInfo ) = = 1 ) {
auto xLength = shape : : length ( xShapeInfo ) ;
auto ews = shape : : elementWiseStride ( xShapeInfo ) ;
for ( Nd4jLong r = 0 ; r < xLength ; r + + ) {
auto swapIdx = shuffleMap [ r ] ;
if ( swapIdx < 0 )
continue ;
nd4j : : math : : nd4j_swap < T > ( hX [ r * ews ] , hX [ swapIdx * ews ] ) ;
}
} else {
for ( Nd4jLong r = 0 ; r < numTads ; r + + ) {
if ( shuffleMap [ r ] < 0 )
continue ;
auto oldOffset = tadOffset [ r ] ;
auto newOffset = tadOffset [ shuffleMap [ r ] ] ;
auto rX = hX + oldOffset ;
auto rY = hX + newOffset ;
if ( tadEWS = = 1 ) {
for ( Nd4jLong i = 0 ; i < tadLength ; i + + ) {
nd4j : : math : : nd4j_swap < T > ( rX [ i ] , rY [ i ] ) ;
}
} else {
for ( Nd4jLong i = 0 ; i < tadLength ; i + + ) {
auto offset = shape : : getIndexOffset ( i , tadOnlyShapeInfo [ f ] , tadLength ) ;
nd4j : : math : : nd4j_swap < T > ( hX [ offset + oldOffset ] , hX [ offset + newOffset ] ) ;
}
}
}
}
}
}
void NativeOps : : shuffle ( Nd4jPointer * extras ,
Nd4jPointer * hX , Nd4jPointer * hXShapeInfo ,
Nd4jPointer * dX , Nd4jPointer * dXShapeInfo ,
Nd4jPointer * hz , Nd4jPointer * hZShapeInfo ,
Nd4jPointer * dz , Nd4jPointer * dZShapeInfo ,
int N ,
int * shuffleMap ,
Nd4jPointer * tadShapeInfo ,
Nd4jPointer * tadOffsets ) {
auto xShape = reinterpret_cast < Nd4jLong * * > ( hXShapeInfo ) ;
auto zShape = reinterpret_cast < Nd4jLong * * > ( hZShapeInfo ) ;
auto tadOnlyShapeInfo = reinterpret_cast < Nd4jLong * * > ( tadShapeInfo ) ;
auto tadOffset = reinterpret_cast < Nd4jLong * * > ( tadOffsets ) ;
auto xType = nd4j : : ArrayOptions : : dataType ( xShape [ 0 ] ) ;
BUILD_SINGLE_SELECTOR ( xType , shuffleGeneric , ( hX , xShape , hz , zShape , N , shuffleMap , tadOnlyShapeInfo , tadOffset ) , LIBND4J_TYPES ) ;
}
bool NativeOps : : isExperimentalEnabled ( ) {
return nd4j : : Environment : : getInstance ( ) - > isExperimentalBuild ( ) ;
}
void NativeOps : : setOmpMinThreads ( int threads ) {
// TODO: to be implemented
}
/*
void NativeOps : : execMetaPredicateShape ( Nd4jPointer * extras ,
const int opTypeA ,
const int opNumA ,
const int opTypeB ,
const int opNumB ,
Nd4jLong N ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * hY , Nd4jLong * hYShapeInfo ,
void * dY , Nd4jLong * dYShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * extraA ,
void * extraB ,
double scalarA ,
double scalarB ) {
// no-op;
}
*/
int NativeOps : : getDevice ( ) {
return 0 ;
}
void NativeOps : : execScalar ( Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * hScalars , Nd4jLong * hScalarShapeInfo ,
void * dScalars , Nd4jLong * dScalarShapeInfo ,
void * extraParams ,
void * hDimension , Nd4jLong * hDimensionShape ,
void * dDimension , Nd4jLong * dDimensionShape ,
Nd4jLong * tadShapeInfo , Nd4jLong * tadOffsets ,
Nd4jLong * tadShapeInfoZ , Nd4jLong * tadOffsetsZ ) {
auto dimension = reinterpret_cast < int * > ( hDimension ) ;
int dimensionLength = static_cast < int > ( shape : : length ( hDimensionShape ) ) ;
NativeOpExecutioner : : execScalar ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
extraParams ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
hScalars ,
hScalarShapeInfo ,
dScalars ,
dScalarShapeInfo ,
dimension ,
shape : : length ( hDimensionShape ) ,
tadShapeInfo ,
tadOffsets ,
tadShapeInfoZ ,
tadOffsetsZ ) ;
}
void NativeOps : : execScalarBool ( Nd4jPointer * extraPointers ,
int opNum ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * hScalars , Nd4jLong * hScalarShapeInfo ,
void * dScalars , Nd4jLong * dScalarShapeInfo ,
void * extraParams ,
void * hDimension , Nd4jLong * hDimensionShape ,
void * dDimension , Nd4jLong * dDimensionShape ,
Nd4jLong * tadShapeInfo , Nd4jLong * tadOffsets ,
Nd4jLong * tadShapeInfoZ , Nd4jLong * tadOffsetsZ ) {
auto dimension = reinterpret_cast < int * > ( hDimension ) ;
int dimensionLength = static_cast < int > ( shape : : length ( hDimensionShape ) ) ;
NativeOpExecutioner : : execScalarBool ( nullptr ,
opNum ,
hX ,
hXShapeInfo ,
dX ,
dXShapeInfo ,
extraParams ,
hZ ,
hZShapeInfo ,
dZ ,
dZShapeInfo ,
hScalars ,
hScalarShapeInfo ,
dScalars ,
dScalarShapeInfo ,
dimension ,
dimensionLength ,
tadShapeInfo ,
tadOffsets ,
tadShapeInfoZ ,
tadOffsetsZ ) ;
}
const char * NativeOps : : getDeviceName ( int deviceId ) {
if ( ! nameSet ) {
name = reinterpret_cast < char * > ( malloc ( 256 * sizeof ( char ) ) ) ;
CHECK_ALLOC ( name , " Failed to allocate new string buffer " , 256 ) ;
std : : memset ( name , 0 , 256 * sizeof ( char ) ) ;
nameSet = true ;
// TODO: provide proper CPU model name here
sprintf ( name , " x86-compatible CPU " ) ;
}
return name ;
}
void NativeOps : : execAggregate ( Nd4jPointer * extraPointers , int opNum ,
void * * arguments ,
int numArguments ,
Nd4jLong * * shapeArguments ,
int numShapeArguments ,
int * indexArguments ,
int numIndexArguments ,
int * * intArrays ,
int numIntArrays ,
void * realArguments ,
int numRealArguments ,
nd4j : : DataType dtype ) {
BUILD_SINGLE_SELECTOR ( dtype , NativeOpExecutioner : : execAggregate , ( nullptr , opNum , arguments , numArguments , shapeArguments , numShapeArguments , indexArguments , numIndexArguments , intArrays , numIntArrays , realArguments , numRealArguments ) , FLOAT_TYPES ) ;
}
template < typename T >
void NativeOps : : _batchExecutor ( Nd4jPointer * extraPointers ,
int numAggregates ,
int opNum ,
int maxArgs ,
int maxShapes ,
int maxIntArrays ,
int maxIntArraySize ,
int maxIdx ,
int maxReals ,
void * ptrToArguments ,
nd4j : : DataType dtype ) {
// probably, we don't want too much threads as usually
int _threads = nd4j : : math : : nd4j_min < int > ( numAggregates , omp_get_max_threads ( ) ) ;
nd4j : : PointersHelper < T > helper ( ptrToArguments ,
numAggregates ,
maxArgs ,
maxShapes ,
maxIntArrays ,
maxIntArraySize ,
maxIdx ,
maxReals ) ;
// special case here, we prefer spread arrangement here, all threads are detached from each other
PRAGMA_OMP_PARALLEL_FOR_THREADS ( _threads )
for ( int i = 0 ; i < numAggregates ; i + + ) {
auto intArrays = new int * [ maxIntArrays ] ;
auto arguments = helper . getArguments ( i ) ;
auto shapes = helper . getShapeArguments ( i ) ;
auto idxArg = helper . getIndexArguments ( i ) ;
auto realArg = helper . getRealArguments ( i ) ;
for ( int e = 0 ; e < maxIntArrays ; e + + ) {
intArrays [ e ] = helper . getIntArrayArguments ( i , e ) ;
}
execAggregate ( extraPointers ,
opNum ,
reinterpret_cast < void * * > ( arguments ) ,
helper . getNumArguments ( i ) ,
shapes ,
helper . getNumShapeArguments ( i ) ,
idxArg ,
helper . getNumIndexArguments ( i ) ,
intArrays ,
helper . getNumIntArrayArguments ( i ) ,
realArg ,
helper . getNumRealArguments ( i ) ,
dtype ) ;
delete [ ] intArrays ;
}
}
BUILD_SINGLE_TEMPLATE ( template void NativeOps : : _batchExecutor , ( Nd4jPointer * extraPointers , int numAggregates , int opNum , int maxArgs , int maxShapes , int maxIntArrays , int maxIntArraySize , int maxIdx , int maxReals , void * ptrToArguments , nd4j : : DataType dtype ) , FLOAT_TYPES ) ;
void NativeOps : : execAggregateBatch ( Nd4jPointer * extraPointers ,
int numAggregates ,
int opNum ,
int maxArgs ,
int maxShapes ,
int maxIntArrays ,
int maxIntArraySize ,
int maxIdx ,
int maxReals ,
void * ptrToArguments ,
nd4j : : DataType dtype ) {
BUILD_SINGLE_SELECTOR ( dtype , _batchExecutor , ( extraPointers , numAggregates , opNum , maxArgs , maxShapes , maxIntArrays , maxIntArraySize , maxIdx , maxReals , ptrToArguments , dtype ) , FLOAT_TYPES ) ;
}
void NativeOps : : execRandom ( Nd4jPointer * extraPointers ,
int opNum ,
Nd4jPointer state ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * extraArguments ) {
NativeOpExecutioner : : execRandom ( nullptr , opNum , state , hZ , hZShapeInfo , dZ , dZShapeInfo , extraArguments ) ;
}
void NativeOps : : execRandom ( Nd4jPointer * extraPointers ,
int opNum ,
Nd4jPointer state ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * hY , Nd4jLong * hYShapeInfo ,
void * dY , Nd4jLong * dYShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * extraArguments ) {
NativeOpExecutioner : : execRandom ( nullptr , opNum , state , hX , hXShapeInfo , dX , dXShapeInfo , hY , hYShapeInfo , dY , dYShapeInfo , hZ , hZShapeInfo , dZ , dZShapeInfo , extraArguments ) ;
}
void NativeOps : : execRandom ( Nd4jPointer * extraPointers ,
int opNum ,
Nd4jPointer state ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
void * hZ , Nd4jLong * hZShapeInfo ,
void * dZ , Nd4jLong * dZShapeInfo ,
void * extraArguments ) {
NativeOpExecutioner : : execRandom ( nullptr , opNum , state , hX , hXShapeInfo , dX , dXShapeInfo , hZ , hZShapeInfo , dZ , dZShapeInfo , extraArguments ) ;
}
Nd4jPointer NativeOps : : initRandom ( Nd4jPointer * extraPointers , long seed , long bufferSize , Nd4jPointer ptrToBuffer ) {
auto ptrBuf = reinterpret_cast < long * > ( ptrToBuffer ) ;
auto buffer = new nd4j : : random : : RandomBuffer ( seed , bufferSize , reinterpret_cast < uint64_t * > ( ptrBuf ) ) ;
nd4j : : random : : Xoroshiro128 generator ( buffer ) ;
generator . refreshBuffer ( ) ;
return ( Nd4jPointer ) buffer ;
}
void NativeOps : : refreshBuffer ( Nd4jPointer * extraPointers , long seed , Nd4jPointer ptrRandom ) {
auto buffer = reinterpret_cast < nd4j : : random : : RandomBuffer * > ( ptrRandom ) ;
buffer - > setSeed ( seed ) ;
buffer - > setOffset ( 0 ) ;
nd4j : : random : : Xoroshiro128 generator ( buffer ) ;
generator . refreshBuffer ( ) ;
}
void NativeOps : : reSeedBuffer ( Nd4jPointer * extraPointers , long seed , Nd4jPointer ptrRandom ) {
auto buffer = reinterpret_cast < nd4j : : random : : RandomBuffer * > ( ptrRandom ) ;
buffer - > reSeed ( seed ) ;
}
void NativeOps : : destroyRandom ( Nd4jPointer ptrBuffer ) {
auto buffer = reinterpret_cast < nd4j : : random : : RandomBuffer * > ( ptrBuffer ) ;
delete buffer ;
}
/**
* Return the length of a shape buffer
* based on the pointer
* @ param buffer the buffer pointer to check
* @ return
*/
int NativeOps : : lengthForShapeBufferPointer ( Nd4jPointer buffer ) {
auto shapeBuffer = reinterpret_cast < Nd4jLong * > ( buffer ) ;
return shape : : shapeInfoLength ( shape : : rank ( shapeBuffer ) ) ;
}
/**
* The pointer to get the address for
*
* @ param address the address to get the pointer
* @ return the pointer for the given address
*/
Nd4jPointer NativeOps : : pointerForAddress ( Nd4jLong address ) {
return reinterpret_cast < Nd4jPointer > ( address ) ;
}
void NativeOps : : sort ( Nd4jPointer * extraPointers ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
bool descending ) {
NativeOpExecutioner : : execSort ( hX , hXShapeInfo , descending ) ;
}
void NativeOps : : sortTad ( Nd4jPointer * extraPointers ,
void * hX , Nd4jLong * hXShapeInfo ,
void * dX , Nd4jLong * dXShapeInfo ,
int * dimension ,
int dimensionLength ,
Nd4jLong * tadShapeInfo ,
Nd4jLong * tadOffsets ,
bool descending ) {
NativeOpExecutioner : : execSort ( hX , hXShapeInfo , dimension , dimensionLength , tadShapeInfo , tadOffsets , descending ) ;
}
void NativeOps : : sortCooIndices ( Nd4jPointer * extraPointers ,
Nd4jLong * indices ,
void * values ,
Nd4jLong length ,
int rank ) {
NativeOpExecutioner : : execSortCooIndices ( indices , values , length , rank ) ;
}
Nd4jLong NativeOps : : encodeBitmap ( Nd4jPointer * extraPointers , void * hX , Nd4jLong * hXShapeInfo , Nd4jLong N , int * dz , float threshold ) {
return NativeOpExecutioner : : encodeBitmap ( hX , hXShapeInfo , N , dz , threshold ) ;
}
Nd4jLong * NativeOps : : mmapFile ( Nd4jPointer * extraPointers , const char * fileName , Nd4jLong length ) {
auto hZ = new Nd4jLong [ 2 ] ; errno = 0 ;
# if defined(_WIN32) || defined(_WIN64)
_mmap ( hZ , static_cast < size_t > ( length ) , fileName ) ;
# else
int fd = open ( fileName , O_RDWR , 0 ) ; // checking for failed fopen
if ( fd < 0 ) {
nd4j_printf ( " Errno: %i \n " , errno ) ;
throw std : : runtime_error ( " Failed to open file for MMAP " ) ;
}
void * ptr = mmap ( NULL , length , PROT_READ | PROT_WRITE , MAP_SHARED , fd , 0 ) ;
// check for failed allocation
if ( ptr = = MAP_FAILED )
return nullptr ;
hZ [ 0 ] = ( Nd4jLong ) ptr ;
hZ [ 1 ] = fd ;
# endif
return hZ ;
}
void NativeOps : : munmapFile ( Nd4jPointer * extraPointers , Nd4jLong * ptrMap , Nd4jLong length ) {
munmap ( ( Nd4jPointer ) ptrMap [ 0 ] , length ) ;
# if defined(_WIN32) || defined(_WIN64)
CloseHandle ( reinterpret_cast < HANDLE > ( ptrMap [ 1 ] ) ) ;
# else
close ( ( int ) ptrMap [ 1 ] ) ;
# endif
delete [ ] ptrMap ;
}
nd4j : : graph : : ResultWrapper * NativeOps : : executeFlatGraph ( Nd4jPointer * extraPointers , Nd4jPointer flatBufferPointer ) {
return nd4j : : graph : : GraphExecutioner : : executeFlatBuffer ( flatBufferPointer ) ;
}
const char * NativeOps : : getAllCustomOps ( ) {
return nd4j : : ops : : OpRegistrator : : getInstance ( ) - > getAllCustomOperations ( ) ;
}
template < typename T >
FORCEINLINE int estimateThresholdGeneric ( Nd4jPointer * extraPointers , Nd4jPointer hX , int N , T threshold ) {
auto buffer = reinterpret_cast < T * > ( hX ) ;
int span = ( N / 6 ) + 8 ;
int cnt = 0 ;
PRAGMA_OMP_PARALLEL_REDUCTION ( + : cnt )
{
int tid = omp_get_thread_num ( ) ;
int start = span * tid ;
int stop = span * ( tid + 1 ) ;
if ( stop > N )
stop = N ;
PRAGMA_OMP_SIMD
for ( int e = start ; e < stop ; e + + ) {
auto v = nd4j : : math : : nd4j_abs < T > ( buffer [ e ] ) ;
if ( v > = threshold )
cnt + + ;
}
}
return cnt ;
}
int NativeOps : : estimateThreshold ( Nd4jPointer * extraPointers , Nd4jPointer hX , Nd4jLong * hXShapeInfo , int N , float threshold ) {
auto xType = ArrayOptions : : dataType ( hXShapeInfo ) ;
BUILD_SINGLE_SELECTOR ( xType , return estimateThresholdGeneric , ( extraPointers , hX , N , threshold ) , FLOAT_TYPES ) ;
}
void NativeOps : : deleteShapeList ( Nd4jPointer shapeList ) {
auto list = reinterpret_cast < nd4j : : ShapeList * > ( shapeList ) ;
//list->destroy();
delete list ;
}
nd4j : : ShapeList * _calculateOutputShapes ( Nd4jPointer * extraPointers , nd4j : : ops : : DeclarableOp * op , Nd4jPointer * inputBuffers , Nd4jPointer * inputShapes , int numInputShapes , double * tArgs , int numTArgs , Nd4jLong * iArgs , int numIArgs , bool * bArgs , int numBArgs ) {
nd4j : : graph : : VariableSpace varSpace ;
Context block ( 2 , & varSpace ) ;
nd4j : : ShapeList inShapes ;
for ( int e = 0 ; e < numIArgs ; e + + )
block . getIArguments ( ) - > push_back ( iArgs [ e ] ) ;
for ( int e = 0 ; e < numTArgs ; e + + )
block . getTArguments ( ) - > push_back ( tArgs [ e ] ) ;
for ( int e = 0 ; e < numBArgs ; e + + )
block . getBArguments ( ) - > push_back ( bArgs [ e ] ) ;
for ( int e = 0 ; e < numInputShapes ; e + + ) {
auto shape_ = reinterpret_cast < Nd4jLong * > ( inputShapes [ e ] ) ;
// we shouldn't copy buffer if that's empty array
void * buffer_ = nd4j : : ArrayOptions : : arrayType ( shape_ ) = = ArrayType : : EMPTY ? nullptr : inputBuffers [ e ] ;
auto array = new nd4j : : NDArray ( buffer_ , shape_ , varSpace . launchContext ( ) , false ) ;
// block should contain references to proper variable
varSpace . putVariable ( 1 , e , array ) ;
block . pickInput ( 1 , e ) ;
inShapes . push_back ( shape_ ) ;
}
auto status = op - > validateDataTypes ( block ) ;
if ( status ! = Status : : OK ( ) )
throw std : : runtime_error ( " Data types validation failed " ) ;
auto shapeList = op - > calculateOutputShape ( & inShapes , block ) ;
if ( varSpace . launchContext ( ) ! = nullptr )
shapeList - > detach ( ) ;
return shapeList ;
}
nd4j : : ShapeList * NativeOps : : calculateOutputShapes ( Nd4jPointer * extraPointers , Nd4jLong hash , Nd4jPointer * inputBuffers , Nd4jPointer * inputShapes , int numInputShapes , double * tArgs , int numTArgs , Nd4jLong * iArgs , int numIArgs , bool * bArgs , int numBArgs ) {
auto op = nd4j : : ops : : OpRegistrator : : getInstance ( ) - > getOperation ( hash ) ;
return _calculateOutputShapes ( extraPointers , op , inputBuffers , inputShapes , numInputShapes , tArgs , numTArgs , iArgs , numIArgs , bArgs , numBArgs ) ;
}
nd4j : : ShapeList * _calculateOutputShapes ( Nd4jPointer * extraPointers , nd4j : : ops : : DeclarableOp * op , Nd4jPointer * inputShapes , int numInputShapes , double * tArgs , int numTArgs , Nd4jLong * iArgs , int numIArgs ) {
Context block ( 1 ) ;
nd4j : : ShapeList inShapes ;
for ( int e = 0 ; e < numIArgs ; e + + )
block . getIArguments ( ) - > push_back ( iArgs [ e ] ) ;
for ( int e = 0 ; e < numTArgs ; e + + )
block . getTArguments ( ) - > push_back ( tArgs [ e ] ) ;
for ( int e = 0 ; e < numInputShapes ; e + + )
inShapes . push_back ( reinterpret_cast < Nd4jLong * > ( inputShapes [ e ] ) ) ;
auto shapeList = op - > calculateOutputShape ( & inShapes , block ) ;
shapeList - > detach ( ) ;
return shapeList ;
}
nd4j : : ShapeList * NativeOps : : calculateOutputShapes ( Nd4jPointer * extraPointers , Nd4jLong hash , Nd4jPointer * inputShapes , int numInputShapes , double * tArgs , int numTArgs , Nd4jLong * iArgs , int numIArgs ) {
auto op = nd4j : : ops : : OpRegistrator : : getInstance ( ) - > getOperation ( hash ) ;
return _calculateOutputShapes ( extraPointers , op , inputShapes , numInputShapes , tArgs , numTArgs , iArgs , numIArgs ) ;
}
int NativeOps : : execCustomOp ( Nd4jPointer * extraPointers , Nd4jLong hash , Nd4jPointer opContext ) {
auto op = nd4j : : ops : : OpRegistrator : : getInstance ( ) - > getOperation ( hash ) ;
auto context = reinterpret_cast < Context * > ( opContext ) ;
return op - > execute ( context ) ;
}
Nd4jStatus realExec ( nd4j : : ops : : DeclarableOp * op , Nd4jPointer * extraPointers , Nd4jLong hash , Nd4jPointer * inputBuffers , Nd4jPointer * inputShapes , int numInputs , Nd4jPointer * outputBuffers , Nd4jPointer * outputShapes , int numOutputs , double * tArgs , int numTArgs , Nd4jLong * iArgs , int numIArgs , bool * bArgs , int numBArgs , bool isInplace ) {
if ( op = = nullptr )
nd4j_printf ( " Can't find requested operation: [%lld] \n " , hash ) ;
// we're using the same fake nodeId everywhere here
std : : vector < nd4j : : NDArray * > inputs ( numInputs ) ;
std : : vector < nd4j : : NDArray * > outputs ( numOutputs ) ;
std : : vector < double > ttArgs ( numTArgs ) ;
std : : vector < Nd4jLong > iiArgs ( numIArgs ) ;
std : : vector < bool > biArgs ( numBArgs ) ;
// filling block now with inputs
for ( int e = 0 ; e < numInputs ; e + + ) {
auto shape = reinterpret_cast < Nd4jLong * > ( inputShapes [ e ] ) ;
void * buffer = nd4j : : ArrayOptions : : arrayType ( shape ) = = ArrayType : : EMPTY ? nullptr : inputBuffers [ e ] ;
inputs [ e ] = new nd4j : : NDArray ( buffer , shape ) ;
}
// if not inplace - transferring output arrays
if ( ! isInplace )
for ( int e = 0 ; e < numOutputs ; e + + ) {
// we want to keep original output shape intact
auto shape = shape : : copyShape ( reinterpret_cast < Nd4jLong * > ( outputShapes [ e ] ) ) ;
void * buffer = nd4j : : ArrayOptions : : arrayType ( shape ) = = ArrayType : : EMPTY ? nullptr : outputBuffers [ e ] ;
// FIXME: revisit this.
bool canNullify = true ;
for ( int i = 0 ; i < numInputs ; i + + ) {
void * ibuffer = nd4j : : ArrayOptions : : arrayType ( shape ) = = ArrayType : : EMPTY ? nullptr : inputBuffers [ i ] ;
if ( ibuffer = = buffer ) {
canNullify = false ;
break ;
}
}
if ( canNullify )
memset ( ( uint8_t * ) buffer , ' \0 ' , shape : : length ( shape ) * DataTypeUtils : : sizeOfElement ( ArrayOptions : : dataType ( shape ) ) ) ;
auto array = new nd4j : : NDArray ( buffer , shape ) ;
outputs [ e ] = array ;
// and we want to release shape copy once we're done
delete [ ] shape ;
}
for ( int e = 0 ; e < numIArgs ; e + + )
iiArgs [ e ] = iArgs [ e ] ;
for ( int e = 0 ; e < numTArgs ; e + + )
ttArgs [ e ] = tArgs [ e ] ;
for ( int e = 0 ; e < numBArgs ; e + + )
biArgs [ e ] = bArgs [ e ] ;
// hypothetically at this point we have everything filled
auto hZ = op - > execute ( inputs , outputs , ttArgs , iiArgs , biArgs , isInplace ) ;
//auto hZ = op->execute(inputs, ttArgs, iiArgs, isInplace);
if ( ! isInplace )
for ( int e = 0 ; e < numOutputs ; e + + ) {
//shape::printShapeInfoLinear("JVM output shape", (int *) outputShapes[e]);
//shape::printShapeInfoLinear("C++ output shape", (int *) outputs[e]->shapeInfo());
//outputs[e]->printIndexedBuffer("C++ raw output");
//outputs[e]->printBuffer("C++ indexed output");
if ( outputs [ e ] - > ordering ( ) ! = shape : : order ( reinterpret_cast < Nd4jLong * > ( outputShapes [ e ] ) ) )
outputs [ e ] - > streamline ( shape : : order ( reinterpret_cast < Nd4jLong * > ( outputShapes [ e ] ) ) ) ;
}
/*
if ( ! isInplace ) {
if ( hZ - > size ( ) ! = numOutputs ) {
return ND4J_STATUS_BAD_OUTPUT ;
}
for ( int e = 0 ; e < numOutputs ; e + + ) {
auto buffer = ( T * ) outputBuffers [ e ] ;
auto shape = ( int * ) outputShapes [ e ] ;
nd4j : : NDArray < T > tmp ( buffer , shape ) ;
if ( tmp . lengthOf ( ) ! = hZ - > at ( e ) - > lengthOf ( ) ) {
nd4j_printf ( " Provided output array for [%s] has length of %i, but actual hZ has length of %i \n " , op - > getOpName ( ) - > c_str ( ) , tmp . lengthOf ( ) , hZ - > at ( e ) - > lengthOf ( ) ) ;
return ND4J_STATUS_BAD_OUTPUT ;
}
tmp . assign ( hZ - > at ( e ) ) ;
}
} else {
// if op is inplace, our ResultSet holds pointers
hZ - > purge ( ) ;
}
delete hZ ;
*/
for ( auto v : inputs )
delete v ;
for ( auto v : outputs )
delete v ;
return hZ ;
}
int NativeOps : : execCustomOp ( Nd4jPointer * extraPointers , Nd4jLong hash , Nd4jPointer * inputBuffers , Nd4jPointer * inputShapes , int numInputs , Nd4jPointer * outputBuffers , Nd4jPointer * outputShapes , int numOutputs , double * tArgs , int numTArgs , Nd4jLong * iArgs , int numIArgs , bool * bArgs , int numBArgs , bool isInplace ) {
auto op = nd4j : : ops : : OpRegistrator : : getInstance ( ) - > getOperation ( hash ) ;
return realExec ( op , extraPointers , hash , inputBuffers , inputShapes , numInputs , outputBuffers , outputShapes , numOutputs , tArgs , numTArgs , iArgs , numIArgs , bArgs , numBArgs , isInplace ) ;
}
int NativeOps : : registerGraph ( Nd4jPointer * extraPointers , Nd4jLong graphId , Nd4jPointer flatBufferPointer ) {
auto graph = nd4j : : graph : : GraphExecutioner : : importFromFlatPointer ( flatBufferPointer ) ;
nd4j : : graph : : GraphHolder : : getInstance ( ) - > registerGraph ( graphId , graph ) ;
return ND4J_STATUS_OK ;
}
static VariablesSet * executeStoredGraphT ( Nd4jPointer * extraPointers , Nd4jLong graphId , Nd4jPointer * inputBuffers , Nd4jPointer * inputShapes , int * inputIndices , int numInputs ) {
auto graph = nd4j : : graph : : GraphHolder : : getInstance ( ) - > cloneGraph ( graphId ) ;
auto varSpace = graph - > getVariableSpace ( ) ;
std : : vector < nd4j : : NDArray * > handles ;
for ( int e = 0 ; e < numInputs ; e + + ) {
auto idx = inputIndices [ e ] ;
// we'll delete this array later, together with cloned VariableSpace
auto array = new nd4j : : NDArray ( inputBuffers [ e ] , reinterpret_cast < Nd4jLong * > ( inputShapes [ e ] ) ) ;
handles . emplace_back ( array ) ;
if ( varSpace - > hasVariable ( idx ) ) {
auto var = varSpace - > getVariable ( idx ) ;
if ( var - > hasNDArray ( ) )
delete var - > getNDArray ( ) ;
var - > setNDArray ( array ) ;
} else
varSpace - > putVariable ( idx , array ) ;
}
auto hZ = nd4j : : graph : : GraphExecutioner : : execute ( graph , varSpace ) ;
auto varSet = new nd4j : : graph : : VariablesSet ( hZ ) ;
if ( hZ = = ND4J_STATUS_OK ) {
// pull back results, and provide them
auto outputs = graph - > fetchOutputs ( ) ;
for ( int e = 0 ; e < outputs - > size ( ) ; e + + ) {
// we're only getting variable ID/Index from original grap. values will be taken from cloned workspace
std : : pair < int , int > varId ( outputs - > at ( e ) - > id ( ) , outputs - > at ( e ) - > index ( ) ) ;
auto var = varSpace - > getVariable ( varId ) ;
varSet - > push_back ( var - > clone ( ) ) ;
}
delete outputs ;
}
delete graph ;
return varSet ;
}
nd4j : : graph : : VariablesSet * NativeOps : : executeStoredGraph ( Nd4jPointer * extraPointers , Nd4jLong graphId , Nd4jPointer * inputBuffers , Nd4jPointer * inputShapes , int * inputIndices , int numInputs ) {
return nullptr ;
}
int NativeOps : : unregisterGraph ( Nd4jPointer * extraPointers , Nd4jLong graphId ) {
nd4j : : graph : : GraphHolder : : getInstance ( ) - > dropGraphAny ( graphId ) ;
return nd4j : : Status : : OK ( ) ;
}
void NativeOps : : deletePointerArray ( Nd4jPointer pointer ) {
auto ptr = reinterpret_cast < Nd4jPointer * > ( pointer ) ;
delete [ ] ptr ;
}
void NativeOps : : deleteIntArray ( Nd4jPointer pointer ) {
auto ptr = reinterpret_cast < int * > ( pointer ) ;
delete [ ] ptr ;
}
void NativeOps : : deleteLongArray ( Nd4jPointer pointer ) {
auto ptr = reinterpret_cast < Nd4jLong * > ( pointer ) ;
delete [ ] ptr ;
}
template < typename T >
static void deleteVariablesSetT ( Nd4jPointer pointer ) {
auto ptr = reinterpret_cast < nd4j : : graph : : VariablesSet * > ( pointer ) ;
delete ptr ;
}
void NativeOps : : deleteVariablesSet ( Nd4jPointer pointer ) {
deleteVariablesSetT < double > ( pointer ) ;
}
const char * NativeOps : : getAllOperations ( ) {
return nd4j : : OpTracker : : getInstance ( ) - > exportOperations ( ) ;
}
Nd4jPointer NativeOps : : getGraphState ( Nd4jLong id ) {
return ( Nd4jPointer ) new nd4j : : graph : : GraphState ( id ) ;
}
void NativeOps : : deleteGraphState ( Nd4jPointer state ) {
auto stateP = reinterpret_cast < nd4j : : graph : : GraphState * > ( state ) ;
delete stateP ;
}
Nd4jStatus execCustomOpWithScope_ ( Nd4jPointer * extraPointers , nd4j : : graph : : GraphState * state , Nd4jLong opHash , Nd4jLong * scopes , int numScopes , Nd4jPointer * inputBuffers , Nd4jPointer * inputShapes , int numInputs , Nd4jPointer * outputBuffers , Nd4jPointer * outputShapes , int numOutputs ) {
/**
* That ' s basically exec , with VariableSpace provided in GraphState :
* depending on operation ( i . e . while of if ) , different logic executors could be used
*/
auto graph = state - > graph ( ) ;
auto varSpace = state - > variableSpace ( ) ;
// Node is dynamically created, and has nothing beyond it: only inputs and outputs
// this node has id of 0, and inputs are
Node node ( OpType_LOGIC , opHash , 0 ) ;
// mapping inputs
for ( int e = 0 ; e < numInputs ; e + + ) {
auto buffer = inputBuffers [ e ] ;
auto shapeInfo = reinterpret_cast < Nd4jLong * > ( inputShapes [ e ] ) ;
auto array = new nd4j : : NDArray ( buffer , shapeInfo , varSpace - > launchContext ( ) ) ;
// now we just put array to VarSpace
varSpace - > putVariable ( 0 , e , array ) ;
node . pickInput ( 0 , e ) ;
}
// mapping scopes
for ( int e = 0 ; e < numScopes ; e + + ) {
// we should check scope existence in GraphState/Graph
int scopeId = ( int ) scopes [ e ] ;
if ( ! state - > hasScope ( scopeId ) ) {
// nd4j_printf("execCustomOpWithScope: referenced scope [%i] doesn't exist\n", scopeId);
return Status : : THROW ( ) ;
}
node . pickInput ( scopeId , 0 ) ;
}
auto hZ = LogicExecutor : : processNode ( graph , & node ) ;
if ( hZ ! = Status : : OK ( ) )
return hZ ;
// mapping outputs
for ( int e = 0 ; e < numOutputs ; e + + ) {
auto buffer = outputBuffers [ e ] ;
auto shapeInfo = reinterpret_cast < Nd4jLong * > ( outputShapes [ e ] ) ;
NDArray array ( buffer , shapeInfo , varSpace - > launchContext ( ) ) ;
// now we just put array to VarSpace to the same ID
//varSpace->putVariable(0, e, array);
auto t = varSpace - > getVariable ( 0 , e ) - > getNDArray ( ) ;
array . assign ( t ) ;
}
// removing input variables
for ( int e = 0 ; e < numInputs ; e + + ) {
varSpace - > dropVariable ( 0 , e ) ;
}
// after some bla-bla-bla we should have Graph and Node for current op
return Status : : OK ( ) ;
}
Nd4jStatus NativeOps : : execCustomOpWithScope ( Nd4jPointer * extraPointers , Nd4jPointer state , Nd4jLong opHash , Nd4jLong * scopes , int numScopes , Nd4jPointer * inputBuffers , Nd4jPointer * inputShapes , int numInputs , Nd4jPointer * outputBuffers , Nd4jPointer * outputShapes , int numOutputs ) {
return execCustomOpWithScope_ ( extraPointers , reinterpret_cast < nd4j : : graph : : GraphState * > ( state ) , opHash , scopes , numScopes , inputBuffers , inputShapes , numInputs , outputBuffers , outputShapes , numOutputs ) ;
}
void NativeOps : : deleteResultWrapper ( Nd4jPointer ptr ) {
// just 0 room for compiler s@!t
auto p = reinterpret_cast < nd4j : : graph : : ResultWrapper * > ( ptr ) ;
delete p ;
}
/*
* TypeDef :
* void convertTypes ( Nd4jPointer * extras , int srcType , Nd4jPointer hX , long N , int dstType , Nd4jPointer hZ ) ;
*/
void NativeOps : : convertTypes ( Nd4jPointer * extras , int srcType , Nd4jPointer hX , Nd4jLong N , int dstType , Nd4jPointer hZ ) {
auto hx = reinterpret_cast < void * > ( hX ) ;
auto hz = reinterpret_cast < void * > ( hZ ) ;
if ( srcType = = ND4J_FLOAT8 ) {
if ( dstType = = ND4J_FLOAT8 ) {
// convertGeneric<double, nd4j::float8>(hx, N, hz);
} else if ( dstType = = ND4J_INT8 ) {
//nd4j::TypeCast::convertGeneric<nd4j::float8, nd4j::int8>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_UINT8 ) {
//nd4j::TypeCast::convertGeneric<nd4j::float8, nd4j::uint8>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_FLOAT16 ) {
//nd4j::TypeCast::convertGeneric<nd4j::float8, float16>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_INT16 ) {
//nd4j::TypeCast::convertGeneric<nd4j::float8, nd4j::int16>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_UINT16 ) {
//nd4j::TypeCast::convertGeneric<nd4j::float8, nd4j::uint16>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_FLOAT24 ) {
} else if ( dstType = = ND4J_FLOAT32 ) {
//nd4j::TypeCast::convertGeneric<nd4j::float8, float>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_DOUBLE ) {
//nd4j::TypeCast::convertGeneric<nd4j::float8, double>(nullptr, hx, N, hz);
} else {
//nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType);
}
} else if ( srcType = = ND4J_INT8 ) {
if ( dstType = = ND4J_FLOAT8 ) {
//nd4j::TypeCast::convertGeneric<nd4j::int8, nd4j::float8>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_INT8 ) {
//convertGeneric<nd4j::int8, nd4j::int8>(hx, N, hz);
} else if ( dstType = = ND4J_UINT8 ) {
nd4j : : TypeCast : : convertGeneric < int8_t , uint8_t > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_FLOAT16 ) {
nd4j : : TypeCast : : convertGeneric < int8_t , float16 > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_INT16 ) {
nd4j : : TypeCast : : convertGeneric < int8_t , int16_t > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_UINT16 ) {
//nd4j::TypeCast::convertGeneric<int8_t, uint16_t>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_FLOAT24 ) {
// TODO: eventually we might want to add it
} else if ( dstType = = ND4J_FLOAT32 ) {
nd4j : : TypeCast : : convertGeneric < int8_t , float > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_DOUBLE ) {
nd4j : : TypeCast : : convertGeneric < int8_t , double > ( nullptr , hx , N , hz ) ;
} else {
nd4j_printf ( " Unsupported types conversion: [%i] -> [%i] \n " , srcType , dstType ) ;
}
} else if ( srcType = = ND4J_UINT8 ) {
if ( dstType = = ND4J_FLOAT8 ) {
// nd4j::TypeCast::convertGeneric<uint8_t, nd4j::float8>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_INT8 ) {
nd4j : : TypeCast : : convertGeneric < uint8_t , int8_t > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_UINT8 ) {
nd4j : : TypeCast : : convertGeneric < uint8_t , uint8_t > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_FLOAT16 ) {
nd4j : : TypeCast : : convertGeneric < uint8_t , float16 > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_INT16 ) {
nd4j : : TypeCast : : convertGeneric < uint8_t , int16_t > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_UINT16 ) {
// nd4j::TypeCast::convertGeneric<uint8_t, uint16_t>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_FLOAT24 ) {
// TODO: still might want to add
} else if ( dstType = = ND4J_FLOAT32 ) {
nd4j : : TypeCast : : convertGeneric < uint8_t , float > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_DOUBLE ) {
nd4j : : TypeCast : : convertGeneric < uint8_t , double > ( nullptr , hx , N , hz ) ;
} else {
nd4j_printf ( " Unsupported types conversion: [%i] -> [%i] \n " , srcType , dstType ) ;
}
} else if ( srcType = = ND4J_FLOAT16 ) {
if ( dstType = = ND4J_FLOAT8 ) {
// nd4j::TypeCast::convertGeneric<float16, nd4j::float8>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_INT8 ) {
nd4j : : TypeCast : : convertGeneric < float16 , int8_t > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_UINT8 ) {
nd4j : : TypeCast : : convertGeneric < float16 , uint8_t > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_FLOAT16 ) {
nd4j : : TypeCast : : convertGeneric < float16 , float16 > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_INT16 ) {
nd4j : : TypeCast : : convertGeneric < float16 , int16_t > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_UINT16 ) {
// nd4j::TypeCast::convertGeneric<float16, uint16_t>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_FLOAT24 ) {
// TODO: .... ^^^
} else if ( dstType = = ND4J_FLOAT32 ) {
nd4j : : TypeCast : : convertGeneric < float16 , float > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_DOUBLE ) {
nd4j : : TypeCast : : convertGeneric < float16 , double > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_THRESHOLD ) {
nd4j : : TypeCast : : convertToThreshold < float16 > ( nullptr , hx , N , hz ) ;
} else {
nd4j_printf ( " Unsupported types conversion: [%i] -> [%i] \n " , srcType , dstType ) ;
}
} else if ( srcType = = ND4J_INT16 ) {
if ( dstType = = ND4J_FLOAT8 ) {
// nd4j::TypeCast::convertGeneric<int16_t, nd4j::float8>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_INT8 ) {
nd4j : : TypeCast : : convertGeneric < int16_t , int8_t > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_UINT8 ) {
nd4j : : TypeCast : : convertGeneric < int16_t , uint8_t > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_FLOAT16 ) {
nd4j : : TypeCast : : convertGeneric < int16_t , float16 > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_INT16 ) {
//nd4j::TypeCast::convertGeneric<int16_t, int16_t>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_UINT16 ) {
// nd4j::TypeCast::convertGeneric<int16_t, uint16_t>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_FLOAT24 ) {
// TODO...
} else if ( dstType = = ND4J_FLOAT32 ) {
nd4j : : TypeCast : : convertGeneric < int16_t , float > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_DOUBLE ) {
nd4j : : TypeCast : : convertGeneric < int16_t , double > ( nullptr , hx , N , hz ) ;
} else {
printf ( " Unsupported types conversion: [%i] -> [%i] \n " , srcType , dstType ) ;
}
} else if ( srcType = = ND4J_FLOAT24 ) {
} else if ( srcType = = ND4J_FLOAT32 ) {
if ( dstType = = ND4J_FLOAT8 ) {
// nd4j::TypeCast::convertGeneric<float, nd4j::float8>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_INT8 ) {
nd4j : : TypeCast : : convertGeneric < float , int8_t > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_UINT8 ) {
nd4j : : TypeCast : : convertGeneric < float , uint8_t > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_FLOAT16 ) {
nd4j : : TypeCast : : convertGeneric < float , float16 > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_INT16 ) {
nd4j : : TypeCast : : convertGeneric < float , int16_t > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_UINT16 ) {
// nd4j::TypeCast::convertGeneric<float, uint16_t>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_FLOAT24 ) {
} else if ( dstType = = ND4J_DOUBLE ) {
nd4j : : TypeCast : : convertGeneric < float , double > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_THRESHOLD ) {
nd4j : : TypeCast : : convertToThreshold < float > ( nullptr , hx , N , hz ) ;
} else {
nd4j_printf ( " Unsupported types conversion: [%i] -> [%i] \n " , srcType , dstType ) ;
}
} else if ( srcType = = ND4J_DOUBLE ) {
if ( dstType = = ND4J_FLOAT8 ) {
// nd4j::TypeCast::convertGeneric<double, nd4j::float8>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_INT8 ) {
nd4j : : TypeCast : : convertGeneric < double , int8_t > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_UINT8 ) {
nd4j : : TypeCast : : convertGeneric < double , uint8_t > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_FLOAT16 ) {
nd4j : : TypeCast : : convertGeneric < double , float16 > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_INT16 ) {
nd4j : : TypeCast : : convertGeneric < double , int16_t > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_UINT16 ) {
// nd4j::TypeCast::convertGeneric<double, uint16_t>(nullptr, hx, N, hz);
} else if ( dstType = = ND4J_FLOAT24 ) {
} else if ( dstType = = ND4J_FLOAT32 ) {
nd4j : : TypeCast : : convertGeneric < double , float > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_DOUBLE ) {
//
} else if ( dstType = = ND4J_THRESHOLD ) {
nd4j : : TypeCast : : convertToThreshold < double > ( nullptr , hx , N , hz ) ;
} else {
nd4j_printf ( " Unsupported types conversion: [%i] -> [%i] \n " , srcType , dstType ) ;
}
} else if ( srcType = = ND4J_THRESHOLD ) {
if ( dstType = = ND4J_FLOAT16 ) {
nd4j : : TypeCast : : convertFromThreshold < float16 > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_FLOAT32 ) {
nd4j : : TypeCast : : convertFromThreshold < float > ( nullptr , hx , N , hz ) ;
} else if ( dstType = = ND4J_DOUBLE ) {
nd4j : : TypeCast : : convertFromThreshold < double > ( nullptr , hx , N , hz ) ;
} else {
nd4j_printf ( " Unsupported types conversion: [%i] -> [%i] \n " , srcType , dstType ) ;
}
} else {
nd4j_printf ( " Unsupported types conversion: [%i] -> [%i] \n " , srcType , dstType ) ;
}
}
/*
void NativeOps : : fillUtf8String ( Nd4jPointer * extraPointers , const char * * strings , int numStrings , Nd4jPointer buffer ) {
auto hZ = reinterpret_cast < nd4j : : utf8string * * > ( buffer ) ;
for ( int e = 0 ; e < numStrings ; e + + ) {
hZ [ e ] = reinterpret_cast < nd4j : : utf8string * > ( createUtf8String ( extraPointers , strings [ e ] ) ) ;
}
}
*/
Nd4jPointer NativeOps : : createUtf8String ( Nd4jPointer * extraPointers , const char * string , int length ) {
auto u = new nd4j : : utf8string ( string , length ) ;
return reinterpret_cast < Nd4jPointer > ( u ) ;
}
void NativeOps : : deleteUtf8String ( Nd4jPointer * extraPointers , Nd4jPointer ptr ) {
delete ( reinterpret_cast < nd4j : : utf8string * > ( ptr ) ) ;
}
////////////////////////////////////////////////////////////////////////
void NativeOps : : scatterUpdate ( Nd4jPointer * extraPointers , int opCode , int numOfSubArrs ,
void * hX , Nd4jLong * hXShapeInfo , Nd4jLong * hXOffsets ,
void * dX , Nd4jLong * dXShapeInfo , Nd4jLong * dXOffsets ,
void * hY , Nd4jLong * hYShapeInfo , Nd4jLong * hYOffsets ,
void * dY , Nd4jLong * dYShapeInfo , Nd4jLong * dYOffsets ,
int * hIindexes , int * dIindexes ) {
int numThreads = omp_get_max_threads ( ) ;
PRAGMA_OMP_PARALLEL_THREADS ( numThreads )
{
for ( int i = 0 ; i < numOfSubArrs ; + + i ) {
int threadIndex = omp_get_thread_num ( ) ;
const auto xIndex = hIindexes [ i ] ;
const bool isOwner = xIndex < numThreads ? threadIndex = = xIndex : threadIndex = = xIndex % numThreads ;
if ( ! isOwner )
continue ;
NDArray inSubArr ( reinterpret_cast < int8_t * > ( hX ) + ( hXOffsets [ hIindexes [ i ] ] * DataTypeUtils : : sizeOf ( hXShapeInfo ) ) , hXShapeInfo ) ;
NDArray updSubArr ( reinterpret_cast < int8_t * > ( hY ) + ( hYOffsets [ i ] * DataTypeUtils : : sizeOf ( hXShapeInfo ) ) , hYShapeInfo ) ;
if ( inSubArr . lengthOf ( ) ! = updSubArr . lengthOf ( ) ) {
continue ;
}
switch ( opCode ) {
case 0 :
inSubArr . applyPairwiseTransform ( pairwise : : Add , & updSubArr , & inSubArr , nullptr ) ;
break ;
case 1 :
inSubArr . applyPairwiseTransform ( pairwise : : Subtract , & updSubArr , & inSubArr , nullptr ) ;
break ;
case 2 :
inSubArr . applyPairwiseTransform ( pairwise : : Multiply , & updSubArr , & inSubArr , nullptr ) ;
break ;
case 3 :
inSubArr . applyPairwiseTransform ( pairwise : : Divide , & updSubArr , & inSubArr , nullptr ) ;
break ;
case 4 :
inSubArr . applyPairwiseTransform ( pairwise : : ReverseSubtract , & updSubArr , & inSubArr , nullptr ) ;
break ;
case 5 :
inSubArr . applyPairwiseTransform ( pairwise : : ReverseDivide , & updSubArr , & inSubArr , nullptr ) ;
break ;
case 6 :
inSubArr . applyPairwiseTransform ( pairwise : : CopyPws , & updSubArr , & inSubArr , nullptr ) ;
break ;
default :
continue ;
}
}
}
}
void NativeOps : : inspectArray ( Nd4jPointer * extraPointers , Nd4jPointer buffer , Nd4jLong * shapeInfo , Nd4jPointer specialBuffer , Nd4jLong * specialShapeInfo , Nd4jPointer debugInfo ) {
auto p = reinterpret_cast < nd4j : : DebugInfo * > ( debugInfo ) ;
NDArray array ( buffer , shapeInfo ) ;
nd4j : : DebugHelper : : retrieveDebugStatistics ( p , & array ) ;
}
void NativeOps : : tryPointer ( Nd4jPointer extra , Nd4jPointer p , int len ) {
auto buf = reinterpret_cast < int8_t * > ( p ) ;
int cnt = 0 ;
for ( int i = 0 ; i < len ; i + + )
cnt + = buf [ cnt ] ;
}
nd4j : : ConstantDataBuffer * NativeOps : : shapeBuffer ( int rank , Nd4jLong * shape , Nd4jLong * strides , nd4j : : DataType dtype , char order , Nd4jLong ews , bool empty ) {
auto buffer = new ConstantDataBuffer ( ) ;
* buffer = nd4j : : ConstantShapeHelper : : getInstance ( ) - > bufferForShapeInfo ( ShapeDescriptor ( dtype , order , shape , strides , rank , ews , empty ) ) ;
return buffer ;
}
nd4j : : ConstantDataBuffer * NativeOps : : constantBuffer ( nd4j : : DataType dtype , Nd4jLong * data , int length ) {
return nullptr ;
}
nd4j : : ConstantDataBuffer * NativeOps : : constantBuffer ( nd4j : : DataType dtype , double * data , int length ) {
return nullptr ;
}
nd4j : : ConstantDataBuffer * NativeOps : : constantBuffer ( nd4j : : DataType dtype , nd4j : : ConstantDescriptor * descriptor ) {
return nd4j : : ConstantHelper : : getInstance ( ) - > constantBuffer ( * descriptor , dtype ) ;
}
int NativeOps : : dataTypeFromNpyHeader ( void * header ) {
return ( int ) cnpy : : dataTypeFromHeader ( reinterpret_cast < char * > ( header ) ) ;
}
2019-06-15 13:34:34 +02:00
Nd4jPointer NativeOps : : shapeBufferForNumpy ( Nd4jPointer npyArray ) {
cnpy : : NpyArray arr = cnpy : : loadNpyFromPointer ( reinterpret_cast < char * > ( npyArray ) ) ;
unsigned int shapeSize = arr . shape . size ( ) ;
std : : vector < Nd4jLong > shape ( shapeSize ) ;
bool _empty = false ;
for ( unsigned int i = 0 ; i < shapeSize ; i + + ) {
shape [ i ] = arr . shape [ i ] ;
if ( arr . shape [ i ] = = 0 )
_empty = true ;
}
auto dtype = cnpy : : dataTypeFromHeader ( reinterpret_cast < char * > ( npyArray ) ) ;
Nd4jLong * shapeBuffer ;
if ( _empty ) {
if ( shapeSize > 0 )
shapeBuffer = nd4j : : ShapeBuilders : : emptyShapeInfo ( dtype , arr . fortranOrder ? ' f ' : ' c ' , shape ) ;
else
shapeBuffer = nd4j : : ShapeBuilders : : emptyShapeInfo ( dtype ) ;
} else {
shapeBuffer = nd4j : : ShapeBuilders : : createShapeInfo ( dtype , arr . fortranOrder ? ' f ' : ' c ' , shape ) ;
}
return reinterpret_cast < Nd4jPointer > ( shapeBuffer ) ;
}
Merge master to upstream (#7945)
* Shugeo strided slice zeros (#14)
* Modified strided_slice op to properly work with empty-like shapes.
* Fixed test for reduce_mean with empty-like input.
* [WIP] Last merge (#15)
* correct logsoftmax looss (#2)
* Small SameDiff listener fix (#4)
* Various fixes (#6)
* #7839 Fix for asXMatrix and tests
* #7866 EmbeddingSequenceLayer dtype fix + test
* #7856 SameDiff save/load stream methods
* #7859 RegressionEvaluation rank 4 fix + tests + axis configuration
* EvaluationBinary 3d/4d
* More evaluation 3d/4d tests
* #7847 Evaluation empty checks
* Small test ifx
* #7848 Fix median edge case
* Improve DL4J samediff layer tests
* [WIP] FastText wrapper implemented (#8)
* FastText implemented
* Some fixes
* Fix shapes for wordsNearest
* Validation of input vectors
* Fixes
* Fixed test
* Thread tagged
* Some tweaks
* setContextClassLoader for DeallocatorServiceThread
* Numpy format tests (#1)
* Various fixes (#11)
* #7852 SameDiff gather fix
* #7892 SameDiff placeholder to constant conversion
* #7890 validate input rank for MLN/CG init methods
* Fix broken permute shape calculation
* Permute and gather fixes
* Tests
* #7850 LogSumExp fix + test
* Handful of test fixes
* Empty arrays with non-scalar shapes (#10)
* minor rearrangements for lambdas
* empty tensors with non-scalar shapes
* numpy empty tensors with non-scalar shapes
* few more empty tweaks
* Small fixes
* conv3d signature update
* micro fix in batchnorm mkldnn
* Import fixes
* Fix
* MKL-DNN update
* Small fill fix
* fill with empty input + test
* Fixes
* Small error improvement
* Fix
* one special test
* couple of fixes for lstm
* Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone
* Fixes
* FP16
* Unsigned
* BFloat16
* Fill op - empty tweaks
* - couple of fixes for empty arrays construction
- stack updated
* strided slice fix
* one transform test
* provide method for reducing shapeInfo in case of input array is empty
* Fixed reduceAlongDimensions to use empty input properly.
* couple of broadcast tests
* couple of tests broadcast tests + tweak to make them pass
* add check of non-empty to methods producing sub-arrays
* Fixed reshapeC with zeros in shape.
* complete empty check in reduce_... legacy ops
* Concat and cumsum/prod
* Tweak to empty shape inference on import
* add empty check to the rest of reduce legacy ops
* one more test
* correct typo in evalReduceShapeInfoEmpty
* Added tests for reduce_* ops to tests with zero shapes.
* few more tests for empty reductions
* Fixed strided_slice op with empty case and tests.
* one more empty reduction test
* Fixed strided_slice test.
* add empty check to NDArray::reshapei
* infOrMax
* empty min/max with infinity tests
* made unstack working correctly with empty arrays
* few IndexReduce tests + tweaks for empty shapes
* add test for empty concat
* few tests fixed
* Validation fix for reductions on empty shapes
* Reverse fix
* Reduction shape calc fixes
* SameDiff.generateOutputVariable: don't use shape function to determine number of outputs
* Range fix
* - NDArray constructor updated for scalars/empty arrays
- few tests fixed
* More fixes
* Empty creator fixes
* concat fix
* concat fix
* TF import tests: allow 'both all NaN' and 'both all inf' to pass
* Slice, zero fraction, and reshape fixes
* transpose, gather
* Zero fraction
* scalar cast fix
* Empty reduction axis support
* few more tests fixed
* Fixed input checks conforming with TF for concat op and tests.
* few tests fixed
* matmul scalar shape fix
* Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats.
* broadcast bool fix
* few more tests
* few more tests
* correct evalReduceShapeInfoEmpty
* argmax/argmin + tests
* one more empty edge case + one more test
* argmax/argmin/realdiv_bp tweaks
* empty reshape test + fix
* Helper fixes
* Small fixes
* Gather test fix
* Gather test fix
* Small fixes
* reduce scalar zero values
* scalar mean workaround
* Remove debug code
* along dim mean workaround
* one more test
* - equalsTo() tweak for empty arrays
- one more test
* broadcast tweaks
* [WIP] Fixing outstanding issues for NLP (#9)
* Avoid using not-inited objects
* Test fixed.
* Redundant method avoided for models like FastText
* KMeans++ implementation
* KMeans++ implementation
* Disable parallel execution
* KMeans++
* Tests
* Dev branch merge (#16)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Fix some issues on master (#17)
* Fix DataVec test issue
* Fix issue with dl4j SameDiff output layer
* Dtype fix for lambda layers
* #7912 BertIterator dtype fix (use float32 not global default)
* [WIP] Next set of CUDA stuff (#7)
New CUDA implementations and improvements
* bad file
* Dev branch master merge (#23)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Compatibility of deserialization (#18)
Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
* SameDiff: add activation gradient checking support for debugging (#19)
* SameDiff gradient checker: first pass on activation gradient checks
* Fixes + tests for activation gradient checking
* Javadoc
* [WIP] Some nd4j data type corrections (#20)
* Adjust data type
* Set correct Data type.
* Size of proper data type.
* fix averaged cpu load (#22)
* SameDiff ops, TF import and fixes (#24)
* CheckNumerics tests + fixes + misc fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fake quant
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* FakeQuantWithMinMaxArgs
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* CheckNumerics fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Small fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Javadoc
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Exception tweak
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix for out of scope stack allocated var use
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Ignores
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Ignore for known failing test (already logged issue)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Merge upstream to fork (#25)
* Add thousand-separator commas to TotalParams (#7915)
* Add thousand-separator commas to TotalParams
The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them.
* Add thousand-separator commas to MultiLayerNetwork
Corresponding change to MultiLayerNetwork
Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com>
* Update contributing and issue/PR templates (#7934)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix link to AdaDelta paper (#7942)
Fix link to AdaDelta paper hosted on matthewzeiler.com
Signed-off-by: Jxtps
* Fixes, and ignores for known/logged failing issues (#7943)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* SameDiff + DL4J/SameDiff: Multiple fixes (#28)
* #7919 HDF5 attribute buffer length fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7909 Arbiter constructor exception ux improvements
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7925 RNN output layer length checks
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7939 Add listener for validating inputs are not incorrectly modified
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7939 Integrate NonInplaceValidationListener into tests
* #7844 DL4J SameDiff fixes for variable minibatch size
* DL4J SameDiff fixes - ensure gradient for input placeholder is available
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Tweaks to ExternalErrorsFunction - use placeholders, make more robust
* Another fix
* More fixes
* More SameDiff/DL4J fixes
* Scope out scalar array creation in BaseScalarOp
* Remove debug code
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] Final dev branch merge (#29)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Compatibility of deserialization (#18)
Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
* SameDiff: add activation gradient checking support for debugging (#19)
* SameDiff gradient checker: first pass on activation gradient checks
* Fixes + tests for activation gradient checking
* Javadoc
* [WIP] Some nd4j data type corrections (#20)
* Adjust data type
* Set correct Data type.
* Size of proper data type.
* fix averaged cpu load (#22)
* [WIP] Multiple dataset iterators (#27)
* Splitting dataset into arbitrary number
* Fixes
* Multiple split of iterator
* Test
* Test
* Some fixes
* signature change
* one more tweak
Signed-off-by: raver119 <raver119@gmail.com>
* one more test for sequential use of DataSetIteratorSplitter
Signed-off-by: raver119 <raver119@gmail.com>
* Fixes
* Fixes
* one more test for Alexander
Signed-off-by: raver119 <raver119@gmail.com>
* Some fixes
* Some fixes
* one more test for Alexander
Signed-off-by: raver119 <raver119@gmail.com>
* minor test fix
Signed-off-by: raver119 <raver119@gmail.com>
* Some fixes
* Some fixes
* couple of assertions tweaked
Signed-off-by: raver119 <raver119@gmail.com>
* MDS splitter test :/
Signed-off-by: raver119 <raver119@gmail.com>
* Minor refactoring
* Multi dataset
* Some fixes
* More tests
* Small number of test fixes/improvements (failures on CI) (#31)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] More CUDA stuff (#26)
* initial commit
Signed-off-by: raver119 <raver119@gmail.com>
* LRN BP CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* less memory
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed bug with crop_and_resize op helper.
* get rid of unnecessary index-calculation dunction
Signed-off-by: Yurii <yurii@skymind.io>
* Fixed sort with nth_element cuda-based helper.
* Refactored nth_element.
* Refactored nth_element op and tests.
* Modified usage of dim array with sortTad routine.
* Refactored main routine of helper for non_max_image_suppression op.
* non_max_image_suppression op helper with cuda kernel implementation. Initial revision.
* fix vol2col cuda kernel
* meh
Signed-off-by: raver119 <raver119@gmail.com>
* topK concept
Signed-off-by: raver119 <raver119@gmail.com>
* unsorted topK with scanWitdh of 1
Signed-off-by: raver119 <raver119@gmail.com>
* correct vol2col tests
* sorted/unsorted topK
Signed-off-by: raver119 <raver119@gmail.com>
* implementation and fixing col2im/col2vol
* Corrected usage flags with input/output with reverse op.
* dup is const now
Signed-off-by: raver119 <raver119@gmail.com>
* percentile op
Signed-off-by: raver119 <raver119@gmail.com>
* group tests for mapool2d
Signed-off-by: Yurii <yurii@skymind.io>
* special test for george
Signed-off-by: raver119 <raver119@gmail.com>
* less threads for sortTad
Signed-off-by: raver119 <raver119@gmail.com>
* provide conv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* remove auther in sort tad kernel code
Signed-off-by: Yurii <yurii@skymind.io>
* provide depthwise_conv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* - max_pooling_with_argmax
- null check for special use
Signed-off-by: raver119 <raver119@gmail.com>
* dts cuda
Signed-off-by: raver119 <raver119@gmail.com>
* provide sconv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* std cuda
Signed-off-by: raver119 <raver119@gmail.com>
* Refactored non_max_suppression op to conform TF implementation.
* Improved suppression helper.
* provide pooling3d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* minor lstm rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* more of minor lstm rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* (bi)dynamic_rnn
Signed-off-by: raver119 <raver119@gmail.com>
* templates init order
Signed-off-by: raver119 <raver119@gmail.com>
* Refactored non_max_suppression op.
* Added cuda kernel for non_max_suppression.
* CPU sort by key/value
Signed-off-by: raver119 <raver119@gmail.com>
* CPU sort TAD by key/value
Signed-off-by: raver119 <raver119@gmail.com>
* CPU sort TAD by key/value tests
Signed-off-by: raver119 <raver119@gmail.com>
* Eliminate compiler error with cuda implementation.
* - repaired gradCheck in cuda
- provide conv2d_bp for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* missed signature
Signed-off-by: raver119 <raver119@gmail.com>
* provide depthwise_conv2d_bp for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* Implementation of lup helper with cuda kernel. Initial commit.
* further work on backprops for convolutions
Signed-off-by: Yurii <yurii@skymind.io>
* CUDA linear sort by key/val
Signed-off-by: raver119 <raver119@gmail.com>
* CUDA tad sort by key/val
Signed-off-by: raver119 <raver119@gmail.com>
* start providing of backprop for pooling2d/3d
Signed-off-by: Yurii <yurii@skymind.io>
* Added atomicAdd for bool datatype.
* dynamic partition concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic partition concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic partition scalar CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* important comment
Signed-off-by: raver119 <raver119@gmail.com>
* fix pooling2d/3d backprop helpers
Signed-off-by: Yurii <yurii@skymind.io>
* Added non-linear test with dynamic_partition.
* Improved test for dynamic_partition.
* dynamic_partition TAD concept
Signed-off-by: raver119 <raver119@gmail.com>
* - dynamic_partition TAD CUDA impl
- dynamic_partition TAD CPU fix
Signed-off-by: raver119 <raver119@gmail.com>
* - rewrite cpu code for usampling2d/3d
- write cuda code for usampling2d/3d
Signed-off-by: Yurii <yurii@skymind.io>
* dynamic_stitch CUDA vector case
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic_stitch CUDA TAD case concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic_stitch CUDA TAD case impl
Signed-off-by: raver119 <raver119@gmail.com>
* Added tests for dynamic_stitch 3D-4D cases.
* minor tests tweaks
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed type check for dynamic stitch.
* min/max bp
Signed-off-by: raver119 <raver119@gmail.com>
* rewrite code for upsampling2d/3d cpu
Signed-off-by: Yurii <yurii@skymind.io>
* reduce min/max/norm_max bp
Signed-off-by: raver119 <raver119@gmail.com>
* lup implementation. Additional enhancements.
* provide code for upsamling2d/3d backprop
Signed-off-by: Yurii <yurii@skymind.io>
* weightedCrossEntropyWithLogits
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed template math atomicMul for 64bit ints.
* Refactored dynamic_partition_bp op.
* inverseBroadcast fix
Signed-off-by: raver119 <raver119@gmail.com>
* DynamicPartitionBP test datatype fixed.
* - nd4j_atomicMul Windows fix
- cpu/NDArrayLambda.hpp excluded from CUDA
Signed-off-by: raver119 <raver119@gmail.com>
2019-06-27 17:37:04 +02:00
void NativeOps : : sortByKey ( Nd4jPointer * extraPointers ,
void * x , Nd4jLong * xShapeInfo ,
void * dx , Nd4jLong * dxShapeInfo ,
void * y , Nd4jLong * yShapeInfo ,
void * dy , Nd4jLong * dyShapeInfo ,
bool descending ) {
auto xType = ArrayOptions : : dataType ( xShapeInfo ) ;
auto yType = ArrayOptions : : dataType ( yShapeInfo ) ;
BUILD_DOUBLE_SELECTOR ( xType , yType , nd4j : : DoubleMethods , : : sortByKey ( x , xShapeInfo , y , yShapeInfo , descending ) , LIBND4J_TYPES , LIBND4J_TYPES ) ;
}
void NativeOps : : sortByValue ( Nd4jPointer * extraPointers ,
void * x , Nd4jLong * xShapeInfo ,
void * dx , Nd4jLong * dxShapeInfo ,
void * y , Nd4jLong * yShapeInfo ,
void * dy , Nd4jLong * dyShapeInfo ,
bool descending ) {
auto xType = ArrayOptions : : dataType ( xShapeInfo ) ;
auto yType = ArrayOptions : : dataType ( yShapeInfo ) ;
BUILD_DOUBLE_SELECTOR ( xType , yType , nd4j : : DoubleMethods , : : sortByValue ( x , xShapeInfo , y , yShapeInfo , descending ) , LIBND4J_TYPES , LIBND4J_TYPES ) ;
}
void NativeOps : : sortTadByKey ( Nd4jPointer * extraPointers ,
void * x , Nd4jLong * xShapeInfo ,
void * dx , Nd4jLong * dxShapeInfo ,
void * y , Nd4jLong * yShapeInfo ,
void * dy , Nd4jLong * dyShapeInfo ,
int * dimension ,
int dimensionLength ,
bool descending ) {
auto xType = ArrayOptions : : dataType ( xShapeInfo ) ;
auto yType = ArrayOptions : : dataType ( yShapeInfo ) ;
BUILD_DOUBLE_SELECTOR ( xType , yType , nd4j : : DoubleMethods , : : sortTadByKey ( x , xShapeInfo , y , yShapeInfo , dimension , dimensionLength , descending ) , LIBND4J_TYPES , LIBND4J_TYPES ) ;
}
void NativeOps : : sortTadByValue ( Nd4jPointer * extraPointers ,
void * x , Nd4jLong * xShapeInfo ,
void * dx , Nd4jLong * dxShapeInfo ,
void * y , Nd4jLong * yShapeInfo ,
void * dy , Nd4jLong * dyShapeInfo ,
int * dimension ,
int dimensionLength ,
bool descending ) {
auto xType = ArrayOptions : : dataType ( xShapeInfo ) ;
auto yType = ArrayOptions : : dataType ( yShapeInfo ) ;
BUILD_DOUBLE_SELECTOR ( xType , yType , nd4j : : DoubleMethods , : : sortTadByValue ( x , xShapeInfo , y , yShapeInfo , dimension , dimensionLength , descending ) , LIBND4J_TYPES , LIBND4J_TYPES ) ;
}
2019-06-06 14:21:15 +02:00
BUILD_SINGLE_TEMPLATE ( template void flattenGeneric , ( Nd4jPointer * , int , char , void * , Nd4jLong * , void * , Nd4jLong * ) , LIBND4J_TYPES ) ;
BUILD_SINGLE_TEMPLATE ( template void pullRowsGeneric , ( void * , Nd4jLong * , void * , Nd4jLong * , const int , Nd4jLong * , Nd4jLong * , Nd4jLong * , Nd4jLong * , Nd4jLong * ) , LIBND4J_TYPES ) ;
BUILD_SINGLE_TEMPLATE ( template void tearGeneric , ( void * , Nd4jLong * , Nd4jPointer * , Nd4jLong * , Nd4jLong * , Nd4jLong * ) , LIBND4J_TYPES ) ;
BUILD_SINGLE_TEMPLATE ( template void shuffleGeneric , ( void * * , Nd4jLong * * , void * * , Nd4jLong * * , int , int * , Nd4jLong * * , Nd4jLong * * ) , LIBND4J_TYPES ) ;