2019-07-12 07:21:15 +02:00
/*******************************************************************************
* Copyright ( c ) 2015 - 2018 Skymind , Inc .
*
* This program and the accompanying materials are made available under the
* terms of the Apache License , Version 2.0 which is available at
* https : //www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an " AS IS " BASIS , WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied . See the
* License for the specific language governing permissions and limitations
* under the License .
*
* SPDX - License - Identifier : Apache - 2.0
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
//
// @author raver119@gmail.com
//
# include <ops/declarable/CustomOperations.h>
# include <performance/benchmarking/FullBenchmarkSuit.h>
# include <ops/declarable/LegacyRandomOp.h>
2019-11-13 15:15:18 +01:00
# include <algorithm>
2019-07-12 07:21:15 +02:00
2019-11-13 15:15:18 +01:00
# ifdef RELEASE_BUILD
2019-07-12 07:21:15 +02:00
int wIterations = 4 ;
int rIterations = 20 ;
int gemmRegularUpperPow = 11 ;
int scalarBenchmarkPowLimit = 26 ;
int transformBenchmarkPowLimit = 26 ;
int intermediateTransformPowLimit = 22 ;
int intermediateTransformPowLimit2 = 18 ;
int pairwisePowLimit = 26 ;
int heavyPowLimit = 22 ;
int nonEwsPowLimit = 10 ;
int reduceScalarPowLimit = 26 ;
int stridedReductionPowLimit = 20 ;
int mismatchedAssignPowLimit = 26 ;
int gatherOpPowLimit = 18 ;
int gatherOpPowLimit2 = 16 ;
int gatherOpPowLimit3 = 12 ;
int broadcastMatrixRankLimit = 5 ;
int limit30 = 30 ;
int limit26 = 26 ;
int limit24 = 24 ;
int limit22 = 22 ;
int limit20 = 20 ;
int limit18 = 18 ;
int limit10 = 10 ;
int limit5 = 5 ;
int limit3 = 3 ;
# else
int wIterations = 0 ;
int rIterations = 1 ;
int gemmRegularUpperPow = 7 ;
int scalarBenchmarkPowLimit = 10 ;
int transformBenchmarkPowLimit = 10 ;
int intermediateTransformPowLimit = 10 ;
int intermediateTransformPowLimit2 = 10 ;
int pairwisePowLimit = 10 ;
int heavyPowLimit = 10 ;
int nonEwsPowLimit = 6 ;
int reduceScalarPowLimit = 10 ;
int stridedReductionPowLimit = 12 ;
int mismatchedAssignPowLimit = 2 ;
int gatherOpPowLimit = 10 ;
int gatherOpPowLimit2 = 8 ;
int gatherOpPowLimit3 = 8 ;
int broadcastMatrixRankLimit = 3 ;
int limit26 = 8 ;
int limit24 = 8 ;
int limit22 = 8 ;
int limit20 = 8 ;
int limit18 = 8 ;
int limit10 = 4 ;
int limit5 = 3 ;
int limit3 = 1 ;
# endif
2020-03-02 10:49:41 +01:00
namespace sd {
2019-07-12 07:21:15 +02:00
static std : : string layerNormBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
BoolParameters nhwc ( " nhwc " ) ; //0 = nchw
# ifdef _RELEASE
int c = 32 ;
int hw = 64 ;
# else
int c = 3 ;
int hw = 8 ;
# endif
ParametersBatch batch ( { & nhwc } ) ;
auto generator = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
int n = p . getIntParam ( " nhwc " ) ;
int axis ;
if ( n = = 0 ) {
//nchw
auto input = NDArrayFactory : : create_ < float > ( ' c ' , { 16 , c , hw , hw } ) ;
auto output = NDArrayFactory : : create_ < float > ( ' c ' , { 16 , c , hw , hw } ) ;
ctx - > setInputArray ( 0 , input , true ) ;
ctx - > setOutputArray ( 0 , output , true ) ;
axis = 1 ;
} else {
auto input = NDArrayFactory : : create_ < float > ( ' c ' , { 32 , hw , hw , c } ) ;
auto output = NDArrayFactory : : create_ < float > ( ' c ' , { 32 , hw , hw , c } ) ;
ctx - > setInputArray ( 0 , input , true ) ;
ctx - > setOutputArray ( 0 , output , true ) ;
axis = 3 ;
}
auto bias = NDArrayFactory : : create_ < float > ( ' c ' , { c } ) ;
ctx - > setInputArray ( 1 , bias , true ) ;
auto iargs = new Nd4jLong [ 1 ] ;
iargs [ 0 ] = axis ;
ctx - > setIArguments ( iargs , 1 ) ;
delete [ ] iargs ;
return ctx ;
} ;
2020-03-02 10:49:41 +01:00
sd : : ops : : layer_norm layerNorm ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark benchmark ( layerNorm , " layer norm " ) ;
output + = helper . runOperationSuit ( & benchmark , generator , batch , " Layer Norm " ) ;
return output ;
}
static std : : string maxPool3DBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
BoolParameters ncdhw ( " ncdhw " ) ; //1 = ndhwc
ParametersBatch batch ( { & ncdhw } ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : maxpool3dnew maxpool3Dnew ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark benchmark ( maxpool3Dnew , " maxPool3d " ) ;
# ifdef _RELEASE
int mb = 16 ;
int chIn = 16 ;
int chOut = 16 ;
int dhw = 64 ;
# else
int mb = 1 ;
int chIn = 3 ;
int chOut = 3 ;
int dhw = 16 ;
# endif
auto generator = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
int format = p . getIntParam ( " ncdhw " ) ;
//Set inputs and outputs
//Same mode + stride 1: output is same shape as input
if ( format = = 1 ) {
//NDHWC
ctx - > setInputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { mb , dhw , dhw , dhw , chIn } ) , true ) ;
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { mb , dhw , dhw , dhw , chIn } ) , true ) ;
} else {
//NCDHW
ctx - > setInputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { mb , chIn , dhw , dhw , dhw } ) , true ) ;
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { mb , chIn , dhw , dhw , dhw } ) , true ) ;
}
auto iargs = new Nd4jLong [ 15 ] ;
//Kernel, strides, padding, dilation - x3 each
iargs [ 0 ] = 3 ; //Kernel
iargs [ 1 ] = 3 ;
iargs [ 2 ] = 3 ;
iargs [ 3 ] = 1 ; //Stride
iargs [ 4 ] = 1 ;
iargs [ 5 ] = 1 ;
iargs [ 6 ] = 0 ; //Padding
iargs [ 7 ] = 0 ;
iargs [ 8 ] = 0 ;
iargs [ 9 ] = 1 ; //Dilation
iargs [ 10 ] = 1 ;
iargs [ 11 ] = 1 ;
iargs [ 12 ] = 1 ; //Same mode
iargs [ 13 ] = 0 ; //Unused for max
iargs [ 14 ] = format ; //0 = ncdhw
ctx - > setIArguments ( iargs , 14 ) ;
delete [ ] iargs ;
return ctx ;
} ;
output + = helper . runOperationSuit ( & benchmark , generator , batch , " maxPool3d " ) ;
return output ;
}
static std : : string conv3dBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
BoolParameters ncdhw ( " ncdhw " ) ; //1 = ndhwc
ParametersBatch batch ( { & ncdhw } ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : conv3dnew conv3Dnew ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark benchmark ( conv3Dnew , " conv3d " ) ;
# ifdef _RELEASE
int mb = 16 ;
int chIn = 16 ;
int chOut = 16 ;
int dhw = 64 ;
# else
int mb = 1 ;
int chIn = 3 ;
int chOut = 3 ;
int dhw = 16 ;
# endif
auto generator = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
int format = p . getIntParam ( " ncdhw " ) ;
//Set inputs and outputs
//Same mode + stride 1: output is same shape as input
if ( format = = 1 ) {
//NDHWC
ctx - > setInputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { mb , dhw , dhw , dhw , chIn } ) , true ) ;
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { mb , dhw , dhw , dhw , chIn } ) , true ) ;
} else {
//NCDHW
ctx - > setInputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { mb , chIn , dhw , dhw , dhw } ) , true ) ;
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { mb , chIn , dhw , dhw , dhw } ) , true ) ;
}
//Weights and bias:
ctx - > setInputArray ( 1 , NDArrayFactory : : create_ < float > ( ' c ' , { 3 , 3 , 3 , chIn , chOut } ) , true ) ;
ctx - > setInputArray ( 2 , NDArrayFactory : : create_ < float > ( ' c ' , { chOut } ) , true ) ;
auto iargs = new Nd4jLong [ 14 ] ;
//Kernel, strides, padding, dilation - x3 each
iargs [ 0 ] = 3 ; //Kernel
iargs [ 1 ] = 3 ;
iargs [ 2 ] = 3 ;
iargs [ 3 ] = 1 ; //Stride
iargs [ 4 ] = 1 ;
iargs [ 5 ] = 1 ;
iargs [ 6 ] = 0 ; //Padding
iargs [ 7 ] = 0 ;
iargs [ 8 ] = 0 ;
iargs [ 9 ] = 1 ; //Dilation
iargs [ 10 ] = 1 ;
iargs [ 11 ] = 1 ;
iargs [ 12 ] = 1 ; //Same mode
iargs [ 13 ] = format ; //0 = ncdhw
ctx - > setIArguments ( iargs , 14 ) ;
delete [ ] iargs ;
return ctx ;
} ;
output + = helper . runOperationSuit ( & benchmark , generator , batch , " CNN3D " ) ;
return output ;
}
static std : : string lstmBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
BoolParameters format ( " format " ) ; //0=TNS=[seqLen,mb,size]; 1=NST=[mb,size,seqLen]
# ifdef _RELEASE
PredefinedParameters mb ( " mb " , { 1 , 8 , 64 } ) ;
PredefinedParameters nInOut ( " nInOut " , { 32 , 256 , 1024 } ) ;
# else
PredefinedParameters mb ( " mb " , { 1 } ) ;
PredefinedParameters nInOut ( " nInOut " , { 32 } ) ;
# endif
ParametersBatch batch ( { & format , & mb , & nInOut } ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : lstmBlock lstmBlock ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark benchmark ( lstmBlock , " lstm " ) ;
int seqLength = 32 ;
auto generator = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
int f = p . getIntParam ( " format " ) ;
int m = p . getIntParam ( " mb " ) ;
int n = p . getIntParam ( " nInOut " ) ;
Nd4jLong l = 0 ;
ctx - > setInputArray ( 0 , NDArrayFactory : : create_ < Nd4jLong > ( l ) , true ) ; //Max TS length (unused)
if ( f = = 0 ) {
//TNS format
ctx - > setInputArray ( 1 , NDArrayFactory : : create_ < float > ( ' c ' , { seqLength , m , n } ) , true ) ; //x
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { seqLength , m , n } ) , true ) ; //i
ctx - > setOutputArray ( 1 , NDArrayFactory : : create_ < float > ( ' c ' , { seqLength , m , n } ) , true ) ; //c
ctx - > setOutputArray ( 2 , NDArrayFactory : : create_ < float > ( ' c ' , { seqLength , m , n } ) , true ) ; //f
ctx - > setOutputArray ( 3 , NDArrayFactory : : create_ < float > ( ' c ' , { seqLength , m , n } ) , true ) ; //o
ctx - > setOutputArray ( 4 , NDArrayFactory : : create_ < float > ( ' c ' , { seqLength , m , n } ) , true ) ; //z
ctx - > setOutputArray ( 5 , NDArrayFactory : : create_ < float > ( ' c ' , { seqLength , m , n } ) , true ) ; //h
ctx - > setOutputArray ( 6 , NDArrayFactory : : create_ < float > ( ' c ' , { seqLength , m , n } ) , true ) ; //y
} else {
//NST format
ctx - > setInputArray ( 1 , NDArrayFactory : : create_ < float > ( ' f ' , { m , n , seqLength } ) , true ) ; //x
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < float > ( ' f ' , { m , n , seqLength } ) , true ) ; //i
ctx - > setOutputArray ( 1 , NDArrayFactory : : create_ < float > ( ' f ' , { m , n , seqLength } ) , true ) ; //c
ctx - > setOutputArray ( 2 , NDArrayFactory : : create_ < float > ( ' f ' , { m , n , seqLength } ) , true ) ; //f
ctx - > setOutputArray ( 3 , NDArrayFactory : : create_ < float > ( ' f ' , { m , n , seqLength } ) , true ) ; //o
ctx - > setOutputArray ( 4 , NDArrayFactory : : create_ < float > ( ' f ' , { m , n , seqLength } ) , true ) ; //z
ctx - > setOutputArray ( 5 , NDArrayFactory : : create_ < float > ( ' f ' , { m , n , seqLength } ) , true ) ; //h
ctx - > setOutputArray ( 6 , NDArrayFactory : : create_ < float > ( ' f ' , { m , n , seqLength } ) , true ) ; //y
}
auto cLast = NDArrayFactory : : create_ < float > ( ' c ' , { m , n } ) ;
auto yLast = NDArrayFactory : : create_ < float > ( ' c ' , { m , n } ) ;
auto W = NDArrayFactory : : create_ < float > ( ' c ' , { 2 * n , 4 * n } ) ;
auto Wci = NDArrayFactory : : create_ < float > ( ' c ' , { n } ) ;
auto Wcf = NDArrayFactory : : create_ < float > ( ' c ' , { n } ) ;
auto Wco = NDArrayFactory : : create_ < float > ( ' c ' , { n } ) ;
auto b = NDArrayFactory : : create_ < float > ( ' c ' , { 4 * n } ) ;
ctx - > setInputArray ( 2 , cLast , true ) ;
ctx - > setInputArray ( 3 , yLast , true ) ;
ctx - > setInputArray ( 4 , W , true ) ;
ctx - > setInputArray ( 5 , Wci , true ) ;
ctx - > setInputArray ( 6 , Wcf , true ) ;
ctx - > setInputArray ( 7 , Wco , true ) ;
ctx - > setInputArray ( 8 , b , true ) ;
auto iargs = new Nd4jLong [ 2 ] ;
iargs [ 0 ] = 0 ; //No peephole
iargs [ 1 ] = f ;
ctx - > setIArguments ( iargs , 2 ) ;
delete [ ] iargs ;
auto targs = new double [ 2 ] ;
targs [ 0 ] = 1.0 ; //forget bias
targs [ 1 ] = 0.0 ; //cell clipping value
ctx - > setTArguments ( targs , 2 ) ;
delete [ ] targs ;
return ctx ;
} ;
output + = helper . runOperationSuit ( & benchmark , generator , batch , " LSTMBlock " ) ;
return output ;
}
static std : : string batchnormBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
//Convolution2D op
BoolParameters nhwc ( " nhwc " ) ;
# ifdef _RELEASE
PredefinedParameters c ( " c " , { 3 , 32 , 128 } ) ;
PredefinedParameters hw ( " hw " , { 32 , 128 } ) ;
# else
PredefinedParameters c ( " c " , { 3 } ) ;
PredefinedParameters hw ( " hw " , { 16 } ) ;
# endif
ParametersBatch batch ( { & nhwc , & c , & hw } ) ;
auto generator = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
int n = p . getIntParam ( " nhwc " ) ;
int hw = p . getIntParam ( " hw " ) ;
int ch = p . getIntParam ( " c " ) ;
auto args = new Nd4jLong [ 3 ] ;
args [ 0 ] = args [ 1 ] = 1 ; //apply scale and offset
if ( n = = 0 ) {
auto input = NDArrayFactory : : create_ < float > ( ' c ' , { 32 , ch , hw , hw } ) ;
auto output = NDArrayFactory : : create_ < float > ( ' c ' , { 32 , ch , hw , hw } ) ;
ctx - > setInputArray ( 0 , input , true ) ;
ctx - > setOutputArray ( 0 , output , true ) ;
args [ 2 ] = 1 ; //axis
} else {
auto input = NDArrayFactory : : create_ < float > ( ' c ' , { 32 , hw , hw , ch } ) ;
auto output = NDArrayFactory : : create_ < float > ( ' c ' , { 32 , hw , hw , ch } ) ;
ctx - > setInputArray ( 0 , input , true ) ;
ctx - > setOutputArray ( 0 , output , true ) ;
args [ 2 ] = 3 ; //axis
}
ctx - > setIArguments ( args , 3 ) ;
delete [ ] args ;
ctx - > setInputArray ( 1 , NDArrayFactory : : create_ < float > ( ' c ' , { ch } ) , true ) ; //mean
auto v = NDArrayFactory : : create_ < float > ( ' c ' , { ch } ) ;
v - > assign ( 1.0f ) ;
ctx - > setInputArray ( 2 , v , true ) ; //variance
auto g = NDArrayFactory : : create_ < float > ( ' c ' , { ch } ) ;
g - > assign ( 1.0 ) ;
ctx - > setInputArray ( 3 , g , true ) ; //gamma
auto b = NDArrayFactory : : create_ < float > ( ' c ' , { ch } ) ;
b - > assign ( 1.0 ) ;
ctx - > setInputArray ( 4 , b , true ) ; //beta
auto targs = new double [ 1 ] ;
targs [ 0 ] = 1e-5 ;
ctx - > setTArguments ( targs , 1 ) ;
delete [ ] targs ;
return ctx ;
} ;
2020-03-02 10:49:41 +01:00
sd : : ops : : batchnorm batchnorm ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark benchmark ( batchnorm , " batchnorm " ) ;
output + = helper . runOperationSuit ( & benchmark , generator , batch , " Batch Normalization " ) ;
return output ;
}
static std : : string pool2dBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
//Convolution2D op
BoolParameters nhwc ( " nhwc " ) ;
# ifdef _RELEASE
PredefinedParameters k ( " k " , { 2 , 3 , 5 } ) ;
PredefinedParameters c ( " c " , { 3 , 32 , 128 } ) ;
PredefinedParameters hw ( " hw " , { 32 , 128 } ) ;
# else
PredefinedParameters k ( " k " , { 2 } ) ;
PredefinedParameters c ( " c " , { 3 } ) ;
PredefinedParameters hw ( " hw " , { 8 } ) ;
# endif
ParametersBatch batch ( { & nhwc , & k , & c , & hw } ) ;
auto generator = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
int n = p . getIntParam ( " nhwc " ) ;
int hw = p . getIntParam ( " hw " ) ;
int khw = p . getIntParam ( " k " ) ;
if ( n = = 0 ) {
auto input = NDArrayFactory : : create_ < float > ( ' c ' , { 32 , p . getIntParam ( " c " ) , hw , hw } ) ;
auto output = NDArrayFactory : : create_ < float > ( ' c ' , { 32 , p . getIntParam ( " c " ) , hw , hw } ) ;
ctx - > setInputArray ( 0 , input , true ) ;
ctx - > setOutputArray ( 0 , output , true ) ;
} else {
auto input = NDArrayFactory : : create_ < float > ( ' c ' , { 32 , hw , hw , p . getIntParam ( " c " ) } ) ;
auto output = NDArrayFactory : : create_ < float > ( ' c ' , { 32 , hw , hw , p . getIntParam ( " c " ) } ) ;
ctx - > setInputArray ( 0 , input , true ) ;
ctx - > setOutputArray ( 0 , output , true ) ;
}
auto args = new Nd4jLong [ 11 ] ;
args [ 0 ] = args [ 1 ] = khw ; //Kernel
args [ 2 ] = args [ 3 ] = 1 ; //Stride
args [ 4 ] = args [ 5 ] = 0 ; //Pad
args [ 6 ] = args [ 7 ] = 1 ; //Dilation
args [ 8 ] = 1 ; //SAME
args [ 9 ] = 0 ; //Divisor mode - 0 = exclude padding in divisor
args [ 10 ] = n ; //0-nchw, 1=nhwc
ctx - > setIArguments ( args , 11 ) ;
delete [ ] args ;
return ctx ;
} ;
2020-03-02 10:49:41 +01:00
sd : : ops : : avgpool2d avgpool2d ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark benchmark1 ( avgpool2d , " avgpool " ) ;
output + = helper . runOperationSuit ( & benchmark1 , generator , batch , " Average Pooling 2d Operation " ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : maxpool2d maxpool2d ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark benchmark2 ( maxpool2d , " maxpool " ) ;
output + = helper . runOperationSuit ( & benchmark2 , generator , batch , " Max Pooling 2d Operation " ) ;
return output ;
}
static std : : string conv2dBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
//Convolution2D op
BoolParameters nhwc ( " nhwc " ) ;
# ifdef _RELEASE
PredefinedParameters k ( " k " , { 2 , 3 , 5 } ) ;
PredefinedParameters c ( " c " , { 3 , 32 , 128 } ) ;
PredefinedParameters hw ( " hw " , { 32 , 128 } ) ;
# else
PredefinedParameters k ( " k " , { 2 } ) ;
PredefinedParameters c ( " c " , { 3 } ) ;
PredefinedParameters hw ( " hw " , { 8 } ) ;
# endif
ParametersBatch batch ( { & nhwc , & k , & c , & hw } ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : conv2d conv2d ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark benchmark ( conv2d , " conv2d " ) ;
auto generator = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
int n = p . getIntParam ( " nhwc " ) ;
int hw = p . getIntParam ( " hw " ) ;
int khw = p . getIntParam ( " k " ) ;
if ( n = = 0 ) {
auto input = NDArrayFactory : : create_ < float > ( ' c ' , { 32 , p . getIntParam ( " c " ) , hw , hw } ) ;
auto output = NDArrayFactory : : create_ < float > ( ' c ' , { 32 , p . getIntParam ( " c " ) , hw , hw } ) ;
ctx - > setInputArray ( 0 , input , true ) ;
ctx - > setOutputArray ( 0 , output , true ) ;
} else {
auto input = NDArrayFactory : : create_ < float > ( ' c ' , { 32 , hw , hw , p . getIntParam ( " c " ) } ) ;
auto output = NDArrayFactory : : create_ < float > ( ' c ' , { 32 , hw , hw , p . getIntParam ( " c " ) } ) ;
ctx - > setInputArray ( 0 , input , true ) ;
ctx - > setOutputArray ( 0 , output , true ) ;
}
auto b = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " c " ) } ) ;
auto w = NDArrayFactory : : create_ < float > ( ' c ' , { khw , khw , p . getIntParam ( " c " ) , p . getIntParam ( " c " ) } ) ; // [kH, kW, iC, oC] always
ctx - > setInputArray ( 1 , w , true ) ;
ctx - > setInputArray ( 2 , b , true ) ;
auto args = new Nd4jLong [ 10 ] ;
args [ 0 ] = args [ 1 ] = khw ; //Kernel
args [ 2 ] = args [ 3 ] = 1 ; //Stride
args [ 4 ] = args [ 5 ] = 0 ; //Pad
args [ 6 ] = args [ 7 ] = 1 ; //Dilation
args [ 8 ] = 1 ; //SAME
args [ 9 ] = n ; //0-nchw, 1=nhwc
ctx - > setIArguments ( args , 10 ) ;
delete [ ] args ;
return ctx ;
} ;
output + = helper . runOperationSuit ( & benchmark , generator , batch , " Conv2d Operation " ) ;
return output ;
}
static std : : string rngBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
//Uniform, gaussian and bernoulli RNG generation
IntPowerParameters length ( " length " , 2 , 4 , scalarBenchmarkPowLimit , 3 ) ; //2^8 to 2^30 in steps of 3
ParametersBatch batch ( { & length } ) ;
auto gen01 = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
ctx - > setInputArray ( 0 , NDArrayFactory : : create_ < Nd4jLong > ( ' c ' , { 2 } , { 1 , p . getIntParam ( " length " ) } ) , true ) ; //Shape as NDArray
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { 1 , p . getIntParam ( " length " ) } ) , true ) ;
auto d = new double [ 2 ] ;
d [ 0 ] = 0.0 ;
d [ 1 ] = 1.0 ;
ctx - > setTArguments ( d , 2 ) ;
delete [ ] d ;
return ctx ;
} ;
auto gen05 = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
ctx - > setInputArray ( 0 , NDArrayFactory : : create_ < Nd4jLong > ( ' c ' , { 2 } , { 1 , p . getIntParam ( " length " ) } ) , true ) ; //Shape as NDArray
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { 1 , p . getIntParam ( " length " ) } ) , true ) ;
auto d = new double [ 1 ] ;
d [ 0 ] = 0.5 ;
ctx - > setTArguments ( d , 1 ) ;
delete [ ] d ;
return ctx ;
} ;
2020-03-02 10:49:41 +01:00
sd : : ops : : LegacyRandomOp unif ( random : : UniformDistribution ) ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark dbU ( unif , " uniform " ) ;
output + = helper . runOperationSuit ( & dbU , gen01 , batch , " Uniform Distribution " ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : LegacyRandomOp gaussian ( random : : GaussianDistribution ) ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark dbG ( gaussian , " gaussian " ) ;
output + = helper . runOperationSuit ( & dbG , gen01 , batch , " Gaussian Distribution " ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : LegacyRandomOp trunc ( random : : TruncatedNormalDistribution ) ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark dbTU ( unif , " trunc.norm " ) ;
output + = helper . runOperationSuit ( & dbTU , gen01 , batch , " Truncated Normal Distribution " ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : LegacyRandomOp ln ( random : : LogNormalDistribution ) ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark dbLN ( ln , " uniform " ) ;
output + = helper . runOperationSuit ( & dbLN , gen01 , batch , " Log Normal Distribution " ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : LegacyRandomOp bernoulli ( random : : BernoulliDistribution ) ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark dbB ( bernoulli , " bernoulli " ) ;
output + = helper . runOperationSuit ( & dbB , gen05 , batch , " Bernoulli Distribution " ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : LegacyRandomOp dropout ( random : : BernoulliDistribution ) ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark dbD ( dropout , " dropout " ) ;
output + = helper . runOperationSuit ( & dbD , gen05 , batch , " Dropout " ) ;
return output ;
}
static std : : string gemmIrregularBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
//Basically the same as above, but with irregular shapes (not multiples of 8, etc)
# ifdef _RELEASE
int tAMax = 1 ;
int tBMax = 1 ;
int b = 1024 ;
int c = 1024 ;
# else
int tAMax = 1 ;
int tBMax = 1 ;
int b = 32 ;
int c = 32 ;
# endif
for ( int tA = 0 ; tA < = tAMax ; tA + + ) {
for ( int tB = 0 ; tB < = tBMax ; tB + + ) {
IntParameters d ( " d " , 1020 , 1028 , 1 ) ; //1020, 1021, ..., 1028
ParametersBatch dim ( { & d } ) ;
//Vary A.rows:
auto generator = PARAMETRIC_XYZ ( ) {
auto a = p . getIntParam ( " d " ) ;
std : : vector < Nd4jLong > shapeA ;
std : : vector < Nd4jLong > shapeB ;
if ( tA ) {
shapeA = { b , a } ;
} else {
shapeA = { a , b } ;
}
if ( tB ) {
shapeB = { c , b } ;
} else {
shapeB = { b , c } ;
}
auto A = NDArrayFactory : : create_ < float > ( ' c ' , shapeA ) ;
auto B = NDArrayFactory : : create_ < float > ( ' c ' , shapeB ) ;
auto C = NDArrayFactory : : create_ < float > ( ' f ' , { a , c } ) ;
x . push_back ( A ) ;
y . push_back ( B ) ;
z . push_back ( C ) ;
} ;
std : : string n ;
n + = " Gemm (a.rows) - tA= " ;
n + = std : : to_string ( tA ) ;
n + = " , tB= " ;
n + = std : : to_string ( tB ) ;
MatrixBenchmark mb ( 1.0 , 0.0 , tA , tB , n ) ;
output + = helper . runOperationSuit ( & mb , generator , dim , n . c_str ( ) ) ;
//Vary A.columns / B.rows
auto generator2 = PARAMETRIC_XYZ ( ) {
auto a = 1024 ;
auto b = p . getIntParam ( " d " ) ;
auto c = 1024 ;
std : : vector < Nd4jLong > shapeA ;
std : : vector < Nd4jLong > shapeB ;
if ( tA ) {
shapeA = { b , a } ;
} else {
shapeA = { a , b } ;
}
if ( tB ) {
shapeB = { c , b } ;
} else {
shapeB = { b , c } ;
}
auto A = NDArrayFactory : : create_ < float > ( ' c ' , shapeA ) ;
auto B = NDArrayFactory : : create_ < float > ( ' c ' , shapeB ) ;
auto C = NDArrayFactory : : create_ < float > ( ' f ' , { a , c } ) ;
x . push_back ( A ) ;
y . push_back ( B ) ;
z . push_back ( C ) ;
} ;
std : : string n2 ;
n2 + = " Gemm (a.columns) - tA= " ;
n2 + = std : : to_string ( tA ) ;
n2 + = " , tB= " ;
n2 + = std : : to_string ( tB ) ;
MatrixBenchmark mb2 ( 1.0 , 0.0 , tA , tB , n2 ) ;
output + = helper . runOperationSuit ( & mb2 , generator2 , dim , n2 . c_str ( ) ) ;
//Vary A.columns / B.rows
auto generator3 = PARAMETRIC_XYZ ( ) {
auto a = 1024 ;
auto b = 1024 ;
auto c = p . getIntParam ( " d " ) ;
std : : vector < Nd4jLong > shapeA ;
std : : vector < Nd4jLong > shapeB ;
if ( tA ) {
shapeA = { b , a } ;
} else {
shapeA = { a , b } ;
}
if ( tB ) {
shapeB = { c , b } ;
} else {
shapeB = { b , c } ;
}
auto A = NDArrayFactory : : create_ < float > ( ' c ' , shapeA ) ;
auto B = NDArrayFactory : : create_ < float > ( ' c ' , shapeB ) ;
auto C = NDArrayFactory : : create_ < float > ( ' f ' , { a , c } ) ;
x . push_back ( A ) ;
y . push_back ( B ) ;
z . push_back ( C ) ;
} ;
std : : string n3 ;
n3 + = " Gemm (b.columns) - tA= " ;
n3 + = std : : to_string ( tA ) ;
n3 + = " , tB= " ;
n3 + = std : : to_string ( tB ) ;
MatrixBenchmark mb3 ( 1.0 , 0.0 , tA , tB , n ) ;
output + = helper . runOperationSuit ( & mb3 , generator3 , dim , n3 . c_str ( ) ) ;
}
}
return output ;
}
static std : : string batchGemmBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
//Rank 3 - [32,1024,1024]x[32,1024,1024]
//Rank 4 - [4,8,1024,1024]x[4,8,1024,1024]
IntParameters rank ( " rank " , 3 , 4 , 1 ) ;
ParametersBatch b ( { & rank } ) ;
auto generator = PARAMETRIC_D ( ) {
auto rank = p . getIntParam ( " rank " ) ;
std : : vector < Nd4jLong > shapeA ;
std : : vector < Nd4jLong > shapeB ;
auto ctx = new Context ( 1 ) ;
if ( rank = = 3 ) {
ctx - > setInputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { 32 , 1024 , 1024 } ) , true ) ;
ctx - > setInputArray ( 1 , NDArrayFactory : : create_ < float > ( ' c ' , { 32 , 1024 , 1024 } ) , true ) ;
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { 32 , 1024 , 1024 } ) , true ) ;
} else {
ctx - > setInputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { 4 , 8 , 1024 , 1024 } ) , true ) ;
ctx - > setInputArray ( 1 , NDArrayFactory : : create_ < float > ( ' c ' , { 4 , 8 , 1024 , 1024 } ) , true ) ;
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { 4 , 8 , 1024 , 1024 } ) , true ) ;
}
return ctx ;
} ;
2020-03-02 10:49:41 +01:00
sd : : ops : : matmul mmul ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark benchmark ( mmul , " mmul (batch) " ) ;
output + = helper . runOperationSuit ( & benchmark , generator , b , " MMul (batch) " ) ;
return output ;
}
static std : : string gemmRegularBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
for ( int o = 0 ; o < = 1 ; o + + ) {
char resultOrder = ( o = = 0 ? ' f ' : ' c ' ) ;
for ( int tA = 0 ; tA < = 1 ; tA + + ) {
for ( int tB = 0 ; tB < = 1 ; tB + + ) {
IntPowerParameters pa ( " sz " , 2 , 7 , gemmRegularUpperPow , 2 ) ; //2^7=128, 2^9=512, 2^11=2048
ParametersBatch b ( { & pa } ) ;
auto generator = PARAMETRIC_XYZ ( ) {
auto s = p . getIntParam ( " sz " ) ;
auto A = NDArrayFactory : : create_ < float > ( ' c ' , { s , s } ) ;
auto B = NDArrayFactory : : create_ < float > ( ' c ' , { s , s } ) ;
auto C = NDArrayFactory : : create_ < float > ( resultOrder , { s , s } ) ;
x . push_back ( A ) ;
y . push_back ( B ) ;
z . push_back ( C ) ;
} ;
std : : string n ;
n + = " Gemm - tA= " ;
n + = std : : to_string ( tA ) ;
n + = " , tB= " ;
n + = std : : to_string ( tB ) ;
n + = " , cOrder= " ;
n + = resultOrder ;
MatrixBenchmark mb ( 1.0 , 0.0 , tA = = 0 ? false : true , tB = = 0 ? false : true , n ) ;
output + = helper . runOperationSuit ( & mb , generator , b , n . c_str ( ) ) ;
}
}
}
return output ;
}
static std : : string scatterOpBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
IntPowerParameters length ( " length " , 2 , 10 , gatherOpPowLimit , 4 ) ; //2^10 to 2^26 in steps of 4
ParametersBatch batch ( { & length } ) ;
//Gather 1D tests - 1d ref, 1d indices, 1d updates -> 1d output
2020-03-02 10:49:41 +01:00
sd : : ops : : scatter_upd scatter_update1 ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark sa1d ( scatter_update1 , " scatter_update1d " ) ;
auto generator = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
int length = p . getIntParam ( " length " ) ;
auto in = NDArrayFactory : : create_ < float > ( ' c ' , { length } ) ;
auto indices = NDArrayFactory : : create_ < int > ( ' c ' , { length } ) ;
auto updates = NDArrayFactory : : create_ < float > ( ' c ' , { length } ) ;
int * a = new int [ length ] ;
for ( int i = 0 ; i < length ; i + + ) {
a [ i ] = i ;
}
srand ( 12345 ) ;
std : : random_shuffle ( a , ( a + length - 1 ) ) ;
for ( int i = 0 ; i < length ; i + + ) {
indices - > p ( i , a [ i ] ) ;
}
delete [ ] a ;
ctx - > setInputArray ( 0 , in , true ) ;
ctx - > setInputArray ( 1 , indices , true ) ;
ctx - > setInputArray ( 2 , updates , true ) ;
ctx - > setOutputArray ( 0 , in ) ; //Needs to be inplace to avoid copy!
ctx - > markInplace ( true ) ;
return ctx ;
} ;
output + = helper . runOperationSuit ( & sa1d , generator , batch , " Scatter Update - 1d " ) ;
//Gather 2D tests - 2d input, 1d indices, 2d updates -> 2d output
IntPowerParameters rows ( " rows " , 2 , 8 , gatherOpPowLimit2 , 4 ) ; //2^10 to 2^16 in steps of 2: 2^10, ..., 2^20
PredefinedParameters cols ( " cols " , { 32 } ) ;
ParametersBatch batch2 ( { & rows , & cols } ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : scatter_upd scatter_update2 ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark sa2d ( scatter_update2 , " scatter_update2d " ) ;
auto generator2 = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
int rows = p . getIntParam ( " rows " ) ;
int cols = p . getIntParam ( " cols " ) ;
auto in = NDArrayFactory : : create_ < float > ( ' c ' , { rows , cols } ) ;
auto indices = NDArrayFactory : : create_ < int > ( ' c ' , { rows } ) ;
auto updates = NDArrayFactory : : create_ < float > ( ' c ' , { rows , cols } ) ;
int * a = new int [ rows ] ;
for ( int i = 0 ; i < rows ; i + + ) {
a [ i ] = i ;
}
srand ( 12345 ) ;
std : : random_shuffle ( a , ( a + rows - 1 ) ) ;
for ( int i = 0 ; i < rows ; i + + ) {
indices - > p ( i , a [ i ] ) ;
}
delete [ ] a ;
ctx - > setInputArray ( 0 , in , true ) ;
ctx - > setInputArray ( 1 , indices , true ) ;
ctx - > setInputArray ( 2 , updates , true ) ;
ctx - > setOutputArray ( 0 , in ) ; //Needs to be inplace to avoid copy!
ctx - > markInplace ( true ) ;
return ctx ;
} ;
output + = helper . runOperationSuit ( & sa2d , generator2 , batch2 , " Scatter Update - 2d " ) ;
//Gather 3D tests - 3d input, 1d indices -> 3d output
IntPowerParameters sz0 ( " sz0 " , 2 , 8 , gatherOpPowLimit3 , 4 ) ;
PredefinedParameters sz1 ( " sz1 " , { 32 } ) ;
ParametersBatch batch3 ( { & sz0 , & sz1 } ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : scatter_upd scatter_update3 ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark sa3d ( scatter_update3 , " scatter3d " ) ;
auto generator3 = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
int sz0 = p . getIntParam ( " sz0 " ) ;
int sz1 = p . getIntParam ( " sz1 " ) ;
auto in = NDArrayFactory : : create_ < float > ( ' c ' , { sz0 , sz1 , 512 / sz1 } ) ;
auto indices = NDArrayFactory : : create_ < int > ( ' c ' , { sz0 } ) ;
auto updates = NDArrayFactory : : create_ < float > ( ' c ' , { sz0 , sz1 , 512 / sz1 } ) ;
int * a = new int [ sz0 ] ;
for ( int i = 0 ; i < sz0 ; i + + ) {
a [ i ] = i ;
}
srand ( 12345 ) ;
std : : random_shuffle ( a , ( a + sz0 - 1 ) ) ;
for ( int i = 0 ; i < sz0 ; i + + ) {
indices - > p ( i , a [ i ] ) ;
}
delete [ ] a ;
ctx - > setInputArray ( 0 , in , true ) ;
ctx - > setInputArray ( 1 , indices , true ) ;
ctx - > setInputArray ( 2 , updates , true ) ;
ctx - > setOutputArray ( 0 , in ) ; //Needs to be inplace to avoid copy!
ctx - > markInplace ( true ) ;
return ctx ;
} ;
output + = helper . runOperationSuit ( & sa3d , generator3 , batch3 , " Scatter Update - 3d " ) ;
return output ;
}
static std : : string gatherOpBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
IntPowerParameters length ( " length " , 2 , 10 , gatherOpPowLimit , 4 ) ; //2^10 to 2^22 in steps of 4
ParametersBatch batch ( { & length } ) ;
//Gather 1D tests - 1d input, 1d indices -> 1d output
2020-03-02 10:49:41 +01:00
sd : : ops : : gather gather1 ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark gather1d ( gather1 , " gather1d " ) ;
auto generator = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
int length = p . getIntParam ( " length " ) ;
auto in = NDArrayFactory : : create_ < float > ( ' c ' , { length } ) ;
auto indices = NDArrayFactory : : create_ < int > ( ' c ' , { length } ) ;
int * a = new int [ length ] ;
for ( int i = 0 ; i < length ; i + + ) {
a [ i ] = i ;
}
srand ( 12345 ) ;
std : : random_shuffle ( a , ( a + length - 1 ) ) ;
for ( int i = 0 ; i < length ; i + + ) {
indices - > p ( i , a [ i ] ) ;
}
delete [ ] a ;
ctx - > setInputArray ( 0 , in , true ) ;
ctx - > setInputArray ( 1 , indices , true ) ;
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { length } ) , true ) ;
return ctx ;
} ;
output + = helper . runOperationSuit ( & gather1d , generator , batch , " Gather - 1d " ) ;
//Gather 2D tests - 2d input, 1d indices -> 2d output
IntPowerParameters rows ( " rows " , 2 , 8 , gatherOpPowLimit2 , 4 ) ; //2^10 to 2^20 in steps of 2: 2^10, ..., 2^20
PredefinedParameters cols ( " cols " , { 32 } ) ;
ParametersBatch batch2 ( { & rows , & cols } ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : gather gather2 ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark gather2d ( gather2 , " gather2d " ) ;
auto generator2 = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
int rows = p . getIntParam ( " rows " ) ;
int cols = p . getIntParam ( " cols " ) ;
auto in = NDArrayFactory : : create_ < float > ( ' c ' , { rows , cols } ) ;
auto indices = NDArrayFactory : : create_ < int > ( ' c ' , { rows } ) ;
int * a = new int [ rows ] ;
for ( int i = 0 ; i < rows ; i + + ) {
a [ i ] = i ;
}
srand ( 12345 ) ;
std : : random_shuffle ( a , ( a + rows - 1 ) ) ;
for ( int i = 0 ; i < rows ; i + + ) {
indices - > p ( i , a [ i ] ) ;
}
delete [ ] a ;
ctx - > setInputArray ( 0 , in , true ) ;
ctx - > setInputArray ( 1 , indices , true ) ;
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { rows , cols } ) , true ) ;
return ctx ;
} ;
output + = helper . runOperationSuit ( & gather2d , generator2 , batch2 , " Gather - 2d " ) ;
//Gather 3D tests - 3d input, 1d indices -> 3d output
IntPowerParameters sz0 ( " sz0 " , 2 , 8 , gatherOpPowLimit3 , 4 ) ; //2^8 to 2^16 in steps of 4
PredefinedParameters sz1 ( " sz1 " , { 32 } ) ;
ParametersBatch batch3 ( { & sz0 , & sz1 } ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : gather gather3 ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark gather3d ( gather3 , " gather3d " ) ;
auto generator3 = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
int sz0 = p . getIntParam ( " sz0 " ) ;
int sz1 = p . getIntParam ( " sz1 " ) ;
auto in = NDArrayFactory : : create_ < float > ( ' c ' , { sz0 , sz1 , 512 / sz1 } ) ;
auto indices = NDArrayFactory : : create_ < int > ( ' c ' , { sz0 } ) ;
int * a = new int [ sz0 ] ;
for ( int i = 0 ; i < sz0 ; i + + ) {
a [ i ] = i ;
}
srand ( 12345 ) ;
std : : random_shuffle ( a , ( a + sz0 - 1 ) ) ;
for ( int i = 0 ; i < sz0 ; i + + ) {
indices - > p ( i , a [ i ] ) ;
}
delete [ ] a ;
ctx - > setInputArray ( 0 , in , true ) ;
ctx - > setInputArray ( 1 , indices , true ) ;
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { sz0 , sz1 , 512 / sz1 } ) , true ) ;
return ctx ;
} ;
output + = helper . runOperationSuit ( & gather3d , generator3 , batch3 , " Gather - 3d " ) ;
return output ;
}
static std : : string mismatchedOrdersAssignBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
IntPowerParameters rows ( " rows " , 2 , 2 , mismatchedAssignPowLimit , 4 ) ; //2^2 to 2^26 in steps of 2 - 2^1=2, ..., 2^26=67108864
BoolParameters cf ( " cf " ) ;
ParametersBatch batch ( { & rows , & cf } ) ;
auto generator = PARAMETRIC_XZ ( ) {
int numElements = 67108864 ; //2^26
int rows = p . getIntParam ( " rows " ) ;
int cols = numElements / rows ;
bool c = p . getIntParam ( " cf " ) ;
auto arr = NDArrayFactory : : create_ < float > ( c ? ' c ' : ' f ' , { rows , cols } ) ;
auto arr2 = NDArrayFactory : : create_ < float > ( c ? ' f ' : ' c ' , { rows , cols } ) ;
x . push_back ( arr ) ;
z . push_back ( arr2 ) ;
} ;
TransformBenchmark tb ( transform : : AnyOps : : Assign , " assign " ) ;
output + = helper . runOperationSuit ( & tb , generator , batch , " C->F and F->C Assign " ) ;
//Also test: NCHW to NHWC and back
BoolParameters nchw ( " nchw " ) ;
ParametersBatch batch2 ( { & nchw } ) ;
auto generator2 = PARAMETRIC_XZ ( ) {
bool nchw = p . getIntParam ( " nchw " ) ;
if ( nchw ) {
auto orig = NDArrayFactory : : create_ < float > ( ' c ' , { 16 , 32 , 64 , 64 } ) ;
orig - > permutei ( { 0 , 2 , 3 , 1 } ) ;
x . push_back ( orig ) ;
z . push_back ( NDArrayFactory : : create_ < float > ( ' c ' , { 16 , 64 , 64 , 32 } ) ) ;
} else {
auto orig = NDArrayFactory : : create_ < float > ( ' c ' , { 16 , 64 , 64 , 32 } ) ;
orig - > permutei ( { 0 , 3 , 1 , 2 } ) ;
x . push_back ( orig ) ;
z . push_back ( NDArrayFactory : : create_ < float > ( ' c ' , { 16 , 32 , 64 , 64 } ) ) ;
}
} ;
TransformBenchmark tb2 ( transform : : AnyOps : : Assign , " assign_nchw " ) ;
output + = helper . runOperationSuit ( & tb2 , generator2 , batch2 , " nchw->nhwc and nhwc->nchw Assign " ) ;
return output ;
}
static std : : string broadcastOpsMatrixBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
//Broadcast ops: matrices for rank 3, 4, 5
for ( int rank = 3 ; rank < = broadcastMatrixRankLimit ; rank + + ) {
int numAxisTests = - 1 ;
if ( rank = = 3 ) {
numAxisTests = 3 ;
} else if ( rank = = 4 ) {
numAxisTests = 6 ;
} else if ( rank = = 5 ) {
numAxisTests = 10 ;
}
IntParameters testNum ( " testNum " , 0 , numAxisTests - 1 , 1 ) ;
ParametersBatch b ( { & testNum } ) ;
auto generator = PARAMETRIC_D ( ) {
int n = p . getIntParam ( " testNum " ) ;
std : : vector < int > axis ( { } ) ;
switch ( n ) {
//rank 3+
case 0 :
axis = std : : vector < int > ( { 0 , 1 } ) ;
break ;
case 1 :
axis = std : : vector < int > ( { 0 , 2 } ) ;
break ;
case 2 :
axis = std : : vector < int > ( { 1 , 2 } ) ;
break ;
//rank 4+
case 3 :
axis = std : : vector < int > ( { 0 , 3 } ) ;
break ;
case 4 :
axis = std : : vector < int > ( { 1 , 3 } ) ;
break ;
case 5 :
axis = std : : vector < int > ( { 2 , 3 } ) ;
break ;
//Rank 5
case 6 :
axis = std : : vector < int > ( { 0 , 4 } ) ;
break ;
case 7 :
axis = std : : vector < int > ( { 1 , 4 } ) ;
break ;
case 8 :
axis = std : : vector < int > ( { 2 , 4 } ) ;
break ;
case 9 :
axis = std : : vector < int > ( { 3 , 4 } ) ;
break ;
}
std : : vector < Nd4jLong > shape ( { } ) ;
std : : vector < Nd4jLong > toBcShape ( { } ) ;
int vectorLength ;
if ( rank = = 3 ) {
shape = std : : vector < Nd4jLong > ( { 64 , 64 , 64 } ) ;
toBcShape = std : : vector < Nd4jLong > ( { 64 , 64 , 64 } ) ;
vectorLength = 64 ;
} else if ( rank = = 4 ) {
shape = std : : vector < Nd4jLong > ( { 32 , 32 , 32 , 32 } ) ;
toBcShape = std : : vector < Nd4jLong > ( { 32 , 32 , 32 , 32 } ) ;
vectorLength = 32 ;
} else if ( rank = = 5 ) {
shape = std : : vector < Nd4jLong > ( { 16 , 16 , 16 , 16 , 16 } ) ;
toBcShape = std : : vector < Nd4jLong > ( { 16 , 16 , 16 , 16 , 16 } ) ;
vectorLength = 16 ;
}
for ( int i = 0 ; i < rank ; i + + ) {
if ( axis [ 0 ] = = i | | axis [ 1 ] = = i ) {
continue ;
}
toBcShape [ i ] = 1 ;
}
auto ctx = new Context ( 1 ) ;
ctx - > setInputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , shape ) , true ) ;
ctx - > setInputArray ( 1 , NDArrayFactory : : create_ < float > ( ' c ' , toBcShape ) , true ) ;
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , shape ) , true ) ;
return ctx ;
} ;
std : : string name ;
name + = " Broadcast Matrix Add (Custom) - Rank " ;
name + = std : : to_string ( rank ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : add op ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark benchmark ( op , " add " ) ;
output + = helper . runOperationSuit ( & benchmark , generator , b , name . c_str ( ) ) ;
}
return output ;
}
static std : : string broadcast2dBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
PredefinedParameters rows ( " rows " , { 65536 } ) ;
IntPowerParameters cols ( " cols " , 2 , 2 , limit10 , 4 ) ; //2^2, 2^6, 2^10
BoolParameters axis ( " axis " ) ;
BoolParameters inplace ( " inplace " ) ;
ParametersBatch batch ( { & rows , & cols , & axis , & inplace } ) ;
auto generator = PARAMETRIC_D ( ) {
auto a = p . getIntParam ( " axis " ) ;
auto arr = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " rows " ) , p . getIntParam ( " cols " ) } ) ;
auto ctx = new Context ( 1 ) ;
ctx - > setInputArray ( 0 , arr , true ) ;
if ( a = = 0 ) {
ctx - > setInputArray ( 1 , NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " rows " ) , 1 } ) , true ) ;
} else {
ctx - > setInputArray ( 1 , NDArrayFactory : : create_ < float > ( ' c ' , { 1 , p . getIntParam ( " cols " ) } ) , true ) ;
}
if ( p . getIntParam ( " inplace " ) = = 1 ) {
ctx - > setOutputArray ( 0 , arr ) ;
ctx - > markInplace ( true ) ;
} else {
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " rows " ) , p . getIntParam ( " cols " ) } ) , true ) ;
}
return ctx ;
} ;
std : : string s ( " add " ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : add op ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark benchmark ( op , " add " ) ;
output + = helper . runOperationSuit ( & benchmark , generator , batch , " Broadcast (Custom) Add - 2d " ) ;
return output ;
}
static std : : string broadcastBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
//Broadcast ops: vectors for rank 2, 3, 4, 5
for ( int axis = 0 ; axis < = 1 ; axis + + ) {
PredefinedParameters rows ( " rows " , { 65536 } ) ;
IntPowerParameters cols ( " cols " , 2 , 2 , limit10 , 4 ) ; //2^1 to 2^10 in steps of 2 - 2^1=2, ..., 2^10=1024
BoolParameters inplace ( " inplace " ) ;
ParametersBatch batch ( { & rows , & cols , & inplace } ) ;
auto generator = PARAMETRIC_XYZ ( ) {
auto arr = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " rows " ) , p . getIntParam ( " cols " ) } ) ;
x . push_back ( arr ) ;
if ( axis = = 0 ) {
y . push_back ( NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " rows " ) } ) ) ;
} else {
y . push_back ( NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " cols " ) } ) ) ;
}
if ( p . getIntParam ( " inplace " ) = = 1 ) {
z . push_back ( arr ) ;
} else {
z . push_back ( NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " rows " ) , p . getIntParam ( " cols " ) } ) ) ;
}
} ;
std : : string s ( " bAdd " ) ; s + = std : : to_string ( axis ) ; s + = " r2 " ;
BroadcastBenchmark bAdd ( broadcast : : Add , s , { axis } ) ;
output + = helper . runOperationSuit ( & bAdd , generator , batch , " Broadcast Add - Rank 2 " ) ;
}
for ( int rank = 3 ; rank < = 5 ; rank + + ) {
for ( int axis = 1 ; axis < rank ; axis + + ) {
std : : vector < Nd4jLong > shape ( { } ) ;
int vectorLength ;
if ( rank = = 3 ) {
shape = std : : vector < Nd4jLong > ( { 32 , 128 , 128 } ) ;
vectorLength = 128 ;
} else if ( rank = = 4 ) {
shape = std : : vector < Nd4jLong > ( { 16 , 64 , 64 , 64 } ) ;
vectorLength = 64 ;
} else if ( rank = = 5 ) {
shape = std : : vector < Nd4jLong > ( { 16 , 48 , 48 , 48 , 48 } ) ;
vectorLength = 48 ;
}
ParametersBatch batch ( { } ) ;
//Note: always inplace here
auto generator = PARAMETRIC_XYZ ( ) {
auto arr = NDArrayFactory : : create_ < float > ( ' c ' , shape ) ;
x . push_back ( arr ) ;
y . push_back ( NDArrayFactory : : create_ < float > ( ' c ' , { vectorLength } ) ) ;
z . push_back ( arr ) ;
} ;
std : : string name ( " bArr-r " ) ; name + = std : : to_string ( rank ) ; name + = " a " ; name + = std : : to_string ( axis ) ;
BroadcastBenchmark bAdd ( broadcast : : Add , name , { axis } ) ;
std : : string n2 ( " Broadcast Add - Rank " ) ; n2 + = std : : to_string ( rank ) ; n2 + = " - axis= " ; n2 + = std : : to_string ( axis ) ;
output + = helper . runOperationSuit ( & bAdd , generator , batch , n2 . c_str ( ) ) ;
}
}
return output ;
}
static std : : string fastStridedReductionNonEws ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
IntPowerParameters stride ( " stride " , 2 , 0 , 10 , 2 ) ; //2^0=1, ..., 2^10=1024
ParametersBatch batch ( { & stride } ) ;
//This is an edge case: technically an EWS *should* be available here
auto generator1 = PARAMETRIC_XYZ ( ) {
auto stride = p . getIntParam ( " stride " ) ;
auto arr = NDArrayFactory : : create_ < float > ( ' c ' , { 131072 + ( stride = = 1 ? 0 : 1 ) , stride } ) ;
NDArray * strided ;
if ( stride = = 1 ) {
strided = arr ;
} else {
IndicesList indices ( { NDIndex : : interval ( 0 , 131072 ) , NDIndex : : interval ( 0 , 1 ) } ) ;
2019-12-20 20:35:39 +01:00
strided = new NDArray ( arr - > subarray ( indices ) ) ; //All rows, first column
2019-07-12 07:21:15 +02:00
delete arr ;
}
strided - > assign ( 1.0 ) ;
x . push_back ( strided ) ;
y . push_back ( nullptr ) ;
z . push_back ( NDArrayFactory : : create_ < float > ( 0.0f ) ) ;
} ;
ReductionBenchmark rbSum ( reduce : : SameOps : : Sum , " stridedSum " ) ;
output + = helper . runOperationSuit ( & rbSum , ( const std : : function < void ( Parameters & , ResultSet & , ResultSet & , ResultSet & ) > ) ( generator1 ) , batch , " Strided Sum - No EWS Test 1 " ) ;
//No EWS defined for this case
auto generator2 = PARAMETRIC_XYZ ( ) {
auto stride = p . getIntParam ( " stride " ) ;
auto arr = NDArrayFactory : : create_ < float > ( ' c ' , { ( stride = = 1 ? 1 : 2 ) * 1024 , 1024 , stride } ) ;
NDArray * strided ;
if ( stride = = 1 ) {
strided = arr ;
} else {
IndicesList indices ( { NDIndex : : interval ( 0 , 2 * 1024 , 2 ) , NDIndex : : all ( ) , NDIndex : : interval ( 0 , 1 ) } ) ;
2019-12-20 20:35:39 +01:00
strided = new NDArray ( arr - > subarray ( indices ) ) ;
2019-07-12 07:21:15 +02:00
delete arr ;
}
strided - > assign ( 1.0 ) ;
x . push_back ( strided ) ;
y . push_back ( nullptr ) ;
z . push_back ( NDArrayFactory : : create_ < float > ( 0.0f ) ) ;
} ;
ReductionBenchmark rbSum2 ( reduce : : SameOps : : Sum , " stridedSumNoEWS " ) ;
output + = helper . runOperationSuit ( & rbSum2 , ( const std : : function < void ( Parameters & , ResultSet & , ResultSet & , ResultSet & ) > ) ( generator2 ) , batch , " Strided Sum - No EWS Test 2 " ) ;
return output ;
}
static std : : string fastStridedReductionIrregular ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
IntPowerParameters length ( " length " , 2 , 12 , stridedReductionPowLimit , 4 ) ; //2^12 to 2^20 in steps of 4
PredefinedParameters stride ( " stride " , { 26 , 27 , 28 , 29 , 30 , 31 , 32 , 33 , 34 , 35 , 36 ,
122 , 123 , 124 , 125 , 126 , 127 , 128 , 129 , 130 , 131 , 132 ,
1018 , 1019 , 1020 , 1021 , 1022 , 1023 , 1024 , 1025 , 1026 , 1027 , 1028 } ) ;
ParametersBatch batch ( { & length , & stride } ) ;
auto generator = PARAMETRIC_XYZ ( ) {
auto stride = p . getIntParam ( " stride " ) ;
auto arr = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) , stride } ) ;
NDArray * strided ;
if ( stride = = 1 ) {
strided = arr ;
} else {
IndicesList indices ( { NDIndex : : all ( ) , NDIndex : : interval ( 0 , 1 ) } ) ;
2019-12-20 20:35:39 +01:00
strided = new NDArray ( arr - > subarray ( indices ) ) ; //All rows, first column
2019-07-12 07:21:15 +02:00
delete arr ;
}
strided - > assign ( 1.0 ) ;
x . push_back ( strided ) ;
y . push_back ( nullptr ) ;
z . push_back ( NDArrayFactory : : create_ < float > ( 0.0f ) ) ;
} ;
ReductionBenchmark rbSum ( reduce : : SameOps : : Sum , " stridedSum " ) ;
output + = helper . runOperationSuit ( & rbSum , ( const std : : function < void ( Parameters & , ResultSet & , ResultSet & , ResultSet & ) > ) ( generator ) , batch , " Strided Sum - Irregular Strides " ) ;
return output ;
}
static std : : string fastStridedReductionsRegular ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
IntPowerParameters length ( " length " , 2 , 12 , stridedReductionPowLimit , 4 ) ; //2^12 to 2^20 in steps of 4
IntPowerParameters stride ( " stride " , 2 , 0 , 10 ) ; //2^0=1, ..., 2^10=1024
ParametersBatch batch ( { & length , & stride } ) ;
auto generator = PARAMETRIC_XYZ ( ) {
auto stride = p . getIntParam ( " stride " ) ;
auto arr = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) , stride } ) ;
NDArray * strided ;
if ( stride = = 1 ) {
strided = arr ;
} else {
IndicesList indices ( { NDIndex : : all ( ) , NDIndex : : point ( 0 ) } ) ;
2019-12-20 20:35:39 +01:00
strided = new NDArray ( arr - > subarray ( indices ) ) ; //All rows, first column
2019-07-12 07:21:15 +02:00
delete arr ;
}
strided - > assign ( 1.0 ) ;
x . push_back ( strided ) ;
y . push_back ( nullptr ) ;
// z.push_back(NDArrayFactory::create_<float>(0.0f));
z . push_back ( NDArrayFactory : : create_ < float > ( ' c ' , { 1 } ) ) ;
} ;
ReductionBenchmark rbSum ( reduce : : SameOps : : Sum , " Strided Sum " ) ;
output + = helper . runOperationSuit ( & rbSum , ( const std : : function < void ( Parameters & , ResultSet & , ResultSet & , ResultSet & ) > ) ( generator ) , batch , " Strided Sum - Regular Strides (powers of 2) " ) ;
auto generator3 = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
auto stride = p . getIntParam ( " stride " ) ;
auto arr = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) , stride } ) ;
NDArray * strided ;
if ( stride = = 1 ) {
strided = arr ;
} else {
IndicesList indices ( { NDIndex : : all ( ) , NDIndex : : point ( 0 ) } ) ;
2019-12-20 20:35:39 +01:00
strided = new NDArray ( arr - > subarray ( indices ) ) ; //All rows, first column
2019-07-12 07:21:15 +02:00
delete arr ;
}
strided - > assign ( 1.0 ) ;
ctx - > setInputArray ( 0 , strided , true ) ;
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < Nd4jLong > ( ' c ' , { 1 } ) , true ) ;
auto iargs = new Nd4jLong [ 1 ] ;
iargs [ 0 ] = 0 ;
ctx - > setIArguments ( iargs , 1 ) ;
delete [ ] iargs ;
return ctx ;
} ;
2020-03-02 10:49:41 +01:00
sd : : ops : : argmax opArgmax ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark dbArgmax ( opArgmax , " stridedArgmax " ) ;
output + = helper . runOperationSuit ( & dbArgmax , generator3 , batch , " Strided Argmax " ) ;
return output ;
}
static std : : string fastReduceAlongDimBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
int length [ ] = { 1024 * 1024 , 64 * 1024 * 1024 } ;
int powLimit [ ] = { 10 , 20 , 26 } ;
int powStep [ ] = { 2 , 2 , 4 } ;
for ( int i = 0 ; i < limit3 ; i + + ) {
IntPowerParameters rows ( " rows " , 2 , 0 , powLimit [ i ] , powStep [ i ] ) ;
BoolParameters dim ( " dim " ) ;
ParametersBatch batch ( { & rows , & dim } ) ;
auto generator = PARAMETRIC_XYZ ( ) {
int rows = p . getIntParam ( " rows " ) ;
int cols = length [ i ] / rows ;
int dim = p . getIntParam ( " dim " ) ;
auto arr = NDArrayFactory : : create_ < float > ( ' c ' , { rows , cols } ) ;
x . push_back ( arr ) ;
y . push_back ( NDArrayFactory : : create_ < Nd4jLong > ( dim ) ) ;
NDArray * result ;
if ( dim = = 0 ) {
result = NDArrayFactory : : create_ < float > ( ' c ' , { cols } ) ;
} else {
result = NDArrayFactory : : create_ < float > ( ' c ' , { rows } ) ;
}
z . push_back ( result ) ;
} ;
ReductionBenchmark rbSum ( reduce : : SameOps : : Sum , " sum " ) ;
ReductionBenchmark rbMax ( reduce : : SameOps : : Max , " max " ) ;
std : : string s1 ( " Sum Along Dimension - " ) ;
s1 + = std : : to_string ( length [ i ] ) ;
output + = helper . runOperationSuit ( & rbSum , ( const std : : function < void ( Parameters & , ResultSet & , ResultSet & , ResultSet & ) > ) ( generator ) , batch , s1 . c_str ( ) ) ;
auto generator3 = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
int rows = p . getIntParam ( " rows " ) ;
int cols = length [ i ] / rows ;
int dim = p . getIntParam ( " dim " ) ;
auto arr = NDArrayFactory : : create_ < float > ( ' c ' , { rows , cols } ) ;
Nd4jLong * dimArg = new Nd4jLong [ 1 ] ;
dimArg [ 0 ] = dim ;
ctx - > setIArguments ( dimArg , 1 ) ;
delete [ ] dimArg ;
ctx - > setInputArray ( 0 , arr , true ) ;
NDArray * result ;
if ( dim = = 0 ) {
result = NDArrayFactory : : create_ < Nd4jLong > ( ' c ' , { cols } ) ;
} else {
result = NDArrayFactory : : create_ < Nd4jLong > ( ' c ' , { rows } ) ;
}
ctx - > setOutputArray ( 0 , result , true ) ;
return ctx ;
} ;
std : : string s5 ( " Argmax Along Dimension - " ) ;
s5 + = std : : to_string ( length [ i ] ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : argmax opArgmax ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark dbArgmax ( opArgmax , " Argmax " ) ;
output + = helper . runOperationSuit ( & dbArgmax , generator3 , batch , s5 . c_str ( ) ) ;
}
return output ;
}
static std : : string fastReduceToScalarBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
IntPowerParameters length ( " length " , 2 , 10 , reduceScalarPowLimit , 4 ) ; //2^10 to 2^26 in steps of 4
ParametersBatch batch ( { & length } ) ;
auto generator = PARAMETRIC_XYZ ( ) {
auto arr = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) ;
x . push_back ( arr ) ;
y . push_back ( nullptr ) ;
z . push_back ( NDArrayFactory : : create_ < float > ( 0.0f ) ) ;
} ;
ReductionBenchmark rbSum ( reduce : : SameOps : : Sum , " sum " ) ;
output + = helper . runOperationSuit ( & rbSum , ( const std : : function < void ( Parameters & , ResultSet & , ResultSet & , ResultSet & ) > ) ( generator ) , batch , " Sum - Full Array Reduction " ) ;
//Index reduction
2020-03-02 10:49:41 +01:00
sd : : ops : : argmax opArgmax ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark dbArgmax ( opArgmax , " Argmax " ) ;
auto generator3 = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
ctx - > setInputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) , true ) ;
ctx - > setInputArray ( 1 , NDArrayFactory : : create_ < Nd4jLong > ( ( Nd4jLong ) 0 ) , true ) ;
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < Nd4jLong > ( 0 ) , true ) ;
return ctx ;
} ;
output + = helper . runOperationSuit ( & dbArgmax , generator3 , batch , " Argmax Full Array Reduction " ) ;
return output ;
}
static std : : string fastNonEwsTransformBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
IntPowerParameters rowcol ( " rowcol " , 2 , 2 , nonEwsPowLimit , 4 ) ; //2^2 to 2^14 in steps of 4 -> non-inplace case: 2x 2^10 x 2^10 = 128mb
BoolParameters inplace ( " inplace " ) ;
ParametersBatch batch ( { & rowcol , & inplace } ) ;
auto generator = PARAMETRIC_XZ ( ) {
int r = p . getIntParam ( " rowcol " ) ;
auto arr = NDArrayFactory : : create_ < float > ( ' c ' , { r , r + 1 } ) ;
IndicesList indices ( { NDIndex : : all ( ) , NDIndex : : interval ( 0 , r - 1 ) } ) ;
2019-12-20 20:35:39 +01:00
auto view = new NDArray ( arr - > subarray ( indices ) ) ;
2019-07-12 07:21:15 +02:00
//nd4j_printf("VIEW ARRAY: rows=%lld, columns=%lld", view->sizeAt(0), view->sizeAt(1));
x . push_back ( view ) ;
if ( p . getIntParam ( " inplace " ) = = 1 ) {
z . push_back ( view ) ;
} else {
2020-03-16 08:31:06 +01:00
z . push_back ( NDArrayFactory : : create_ < float > ( ' c ' , { view - > sizeAt ( 0 ) , view - > sizeAt ( 1 ) } ) ) ;
2019-07-12 07:21:15 +02:00
}
delete arr ;
} ;
ScalarBenchmark sbLRelu ( scalar : : Ops : : LeakyRELU , " LeakyRELU_View " ) ;
sbLRelu . setY ( NDArrayFactory : : create_ < float > ( 0.0 ) ) ;
TransformBenchmark tbExp ( transform : : StrictOps : : Exp , " exp view " ) ;
output + = helper . runOperationSuit ( & sbLRelu , generator , batch , " LeakyRELU View " ) ;
output + = helper . runOperationSuit ( & tbExp , generator , batch , " Exp View " ) ;
return output ;
}
static std : : string fastPairwiseBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
IntPowerParameters length ( " length " , 2 , 10 , pairwisePowLimit , 4 ) ; //2^10 to 2^26 in steps of 4 -> max is 512mb
BoolParameters inplace ( " inplace " ) ;
ParametersBatch batch ( { & length , & inplace } ) ;
auto generator = PARAMETRIC_XYZ ( ) {
auto arr1 = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) ;
auto arr2 = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) ;
x . push_back ( arr1 ) ;
y . push_back ( arr2 ) ;
if ( p . getIntParam ( " inplace " ) = = 1 ) {
z . push_back ( arr1 ) ;
} else {
z . push_back ( NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) ) ;
}
} ;
PairwiseBenchmark pb1 ( pairwise : : Ops : : Add , " Add " ) ;
output + = helper . runOperationSuit ( & pb1 , generator , batch , " Pairwise Add " ) ;
PairwiseBenchmark pb2 ( pairwise : : Ops : : Add , " Multiply " ) ;
output + = helper . runOperationSuit ( & pb2 , generator , batch , " Pairwise Multiply " ) ;
return output ;
}
static std : : string heavyTransformsBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
IntPowerParameters length ( " length " , 2 , 10 , heavyPowLimit , 4 ) ; //2^10 to 2^22, steps of 4
BoolParameters inplace ( " inplace " ) ;
ParametersBatch batch ( { & length , & inplace } ) ;
auto generator = PARAMETRIC_XZ ( ) {
auto arr = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) ;
arr - > assign ( 1.0 ) ;
x . push_back ( arr ) ;
if ( p . getIntParam ( " inplace " ) = = 1 ) {
z . push_back ( arr ) ;
} else {
z . push_back ( NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) ) ;
}
} ;
//Ops to test: erf (transform), betainc (custom), polygamma, synthetic ops?
TransformBenchmark erf ( transform : : StrictOps : : Erf , " Erf " ) ;
output + = helper . runOperationSuit ( & erf , generator , batch , " Error Function (Erf) " ) ;
ParametersBatch batch2 ( { & length } ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : polygamma op1 ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark pg ( op1 , " polygamma " ) ;
auto generator2 = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
auto in0 = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) ;
in0 - > assign ( 0.25 ) ;
auto in1 = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) ;
in1 - > assign ( 0.5 ) ;
ctx - > setInputArray ( 0 , in0 , true ) ;
ctx - > setInputArray ( 1 , in1 , true ) ;
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) , true ) ;
return ctx ;
} ;
IntPowerParameters lengthBetaInc ( " length " , 2 , 10 , heavyPowLimit , 4 ) ; //2^10 to 2^22 in steps of 4
ParametersBatch batch3 ( { & lengthBetaInc } ) ;
2020-03-02 10:49:41 +01:00
sd : : ops : : betainc op2 ;
2019-07-12 07:21:15 +02:00
DeclarableBenchmark binc ( op2 , " betainc " ) ;
auto generator3 = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
auto in0 = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) ;
in0 - > assign ( 0.25 ) ;
auto in1 = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) ;
in1 - > assign ( 0.5 ) ;
auto in2 = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) ;
in2 - > assign ( 0.75 ) ;
ctx - > setInputArray ( 0 , in0 , true ) ;
ctx - > setInputArray ( 1 , in1 , true ) ;
ctx - > setInputArray ( 2 , in2 , true ) ;
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) , true ) ;
return ctx ;
} ;
output + = helper . runOperationSuit ( & pg , generator2 , batch2 , " PolyGamma Function " ) ;
output + = helper . runOperationSuit ( & binc , generator3 , batch3 , " Incomplete Beta Function (BetaInc) " ) ;
return output ;
}
static std : : string intermediateTransformsBenchmark ( ) {
std : : string output ;
//Non-inplace: 2x 2^26 elements FP32 -> 512MB
BenchmarkHelper helper ( wIterations , rIterations ) ;
IntPowerParameters length ( " length " , 2 , 10 , intermediateTransformPowLimit , 4 ) ; //2^20 to 2^22 in steps of 4
BoolParameters inplace ( " inplace " ) ;
ParametersBatch batch ( { & length , & inplace } ) ;
auto generator = PARAMETRIC_XZ ( ) {
auto arr = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) ;
arr - > assign ( 1.0 ) ;
x . push_back ( arr ) ;
if ( p . getIntParam ( " inplace " ) = = 1 ) {
z . push_back ( arr ) ;
} else {
z . push_back ( NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) ) ;
}
} ;
TransformBenchmark tbTanh ( transform : : StrictOps : : Tanh , " tanh " ) ;
TransformBenchmark tbGelu ( transform : : StrictOps : : GELU , " gelu " ) ;
output + = helper . runOperationSuit ( & tbTanh , generator , batch , " Tanh " ) ;
output + = helper . runOperationSuit ( & tbGelu , generator , batch , " gelu " ) ;
//2x 1024 cols x 2^18 = 2GB
IntPowerParameters rows ( " rows " , 2 , 10 , intermediateTransformPowLimit2 , 4 ) ;
PredefinedParameters cols ( " cols " , { 4 , 128 , 1024 } ) ;
ParametersBatch batch2 ( { & rows , & cols , & inplace } ) ;
auto generator2 = PARAMETRIC_XZ ( ) {
auto arr = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " rows " ) , p . getIntParam ( " cols " ) } ) ;
arr - > assign ( 1.0 ) ;
x . push_back ( arr ) ;
if ( p . getIntParam ( " inplace " ) = = 1 ) {
z . push_back ( arr ) ;
} else {
z . push_back ( NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " rows " ) , p . getIntParam ( " cols " ) } ) ) ;
}
} ;
2019-07-16 17:48:40 +02:00
//TransformBenchmark tbSoftmax(transform::StrictOps::SoftMax, "softmax");
2019-07-12 07:21:15 +02:00
2019-07-16 17:48:40 +02:00
//output += helper.runOperationSuit(&tbSoftmax, generator2, batch2, "Softmax");
2019-07-12 07:21:15 +02:00
return output ;
}
static std : : string fastTransformsBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
IntPowerParameters length ( " length " , 2 , 10 , transformBenchmarkPowLimit , 4 ) ; //2^10 to 2^30 in steps of 4 - 2^10, 2^14, ..., 2^26
BoolParameters inplace ( " inplace " ) ;
ParametersBatch batch ( { & length , & inplace } ) ;
auto generator = PARAMETRIC_XZ ( ) {
auto arr = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) ;
arr - > assign ( 1.0 ) ;
x . push_back ( arr ) ;
if ( p . getIntParam ( " inplace " ) = = 1 ) {
z . push_back ( arr ) ;
} else {
z . push_back ( NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) ) ;
}
} ;
ScalarBenchmark sbLRelu ( scalar : : Ops : : LeakyRELU , " LeakyRELU " ) ;
sbLRelu . setY ( NDArrayFactory : : create_ < float > ( 0.0 ) ) ;
TransformBenchmark tbAbs ( transform : : SameOps : : Abs , " abs " ) ;
TransformBenchmark tbExp ( transform : : StrictOps : : Exp , " exp " ) ;
output + = helper . runOperationSuit ( & sbLRelu , generator , batch , " LeakyRELU " ) ;
output + = helper . runOperationSuit ( & tbAbs , generator , batch , " Abs " ) ;
output + = helper . runOperationSuit ( & tbExp , generator , batch , " Exp " ) ;
return output ;
}
static std : : string fastScalarBenchmark ( ) {
std : : string output ;
BenchmarkHelper helper ( wIterations , rIterations ) ;
IntPowerParameters length ( " length " , 2 , 10 , scalarBenchmarkPowLimit , 4 ) ; //2^10 to 2^30 in steps of 4 - 2^10, 2^14, ..., 2^26
BoolParameters inplace ( " inplace " ) ;
ParametersBatch batch ( { & length , & inplace } ) ;
auto generator = PARAMETRIC_XZ ( ) {
auto arr = NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) ;
arr - > assign ( 1.0 ) ;
x . push_back ( arr ) ;
if ( p . getIntParam ( " inplace " ) = = 1 ) {
z . push_back ( arr ) ;
} else {
z . push_back ( NDArrayFactory : : create_ < float > ( ' c ' , { p . getIntParam ( " length " ) } ) ) ;
}
} ;
ScalarBenchmark sbAdd ( scalar : : Ops : : Add , " sAdd " ) ;
ScalarBenchmark sbDiv ( scalar : : Ops : : Divide , " sDiv " ) ;
ScalarBenchmark sbPow ( scalar : : Ops : : Pow , " sPow " ) ;
sbAdd . setY ( NDArrayFactory : : create_ < float > ( 3.14159265359 ) ) ;
sbDiv . setY ( NDArrayFactory : : create_ < float > ( 3.14159265359 ) ) ;
sbPow . setY ( NDArrayFactory : : create_ < float > ( 3.14159265359 ) ) ;
output + = helper . runOperationSuit ( & sbAdd , generator , batch , " Scalar Addition - x.add(3.14159265359) - F32 " ) ;
output + = helper . runOperationSuit ( & sbDiv , generator , batch , " Scalar Division - x.div(3.14159265359) - F32 " ) ;
output + = helper . runOperationSuit ( & sbPow , generator , batch , " Scalar Power - x.pow(3.14159265359) - F32 " ) ;
return output ;
}
static long nowMs ( ) {
auto s = std : : chrono : : system_clock : : now ( ) . time_since_epoch ( ) ;
auto v = std : : chrono : : duration_cast < std : : chrono : : milliseconds > ( s ) . count ( ) ;
return v ;
}
static long duration ( long start ) {
return nowMs ( ) - start ;
}
static long done ( long start ) {
long dur = duration ( start ) ;
nd4j_printf ( " Done: %i ms \n " , dur ) ;
return nowMs ( ) ;
}
std : : string FullBenchmarkSuit : : runSuit ( ) {
std : : string result ;
long start = nowMs ( ) ;
2019-10-26 13:14:21 +02:00
2019-07-12 07:21:15 +02:00
// set 1
nd4j_printf ( " Running FullBenchmarkSuite.fastScalarBenchmark \n " , " " ) ;
result + = fastScalarBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.fastTransformsBenchmark \n " , " " ) ;
result + = fastTransformsBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.intermediateTransformsBenchmark \n " , " " ) ;
result + = intermediateTransformsBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.fastPairwiseBenchmark \n " , " " ) ;
result + = fastPairwiseBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.heavyTransformsBenchmark \n " , " " ) ;
result + = heavyTransformsBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.fastNonEwsTransformBenchmark \n " , " " ) ;
result + = fastNonEwsTransformBenchmark ( ) ;
start = done ( start ) ;
// set 2
nd4j_printf ( " Running FullBenchmarkSuite.fastReduceToScalarBenchmark \n " , " " ) ;
result + = fastReduceToScalarBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.fastReduceAlongDimBenchmark \n " , " " ) ;
result + = fastReduceAlongDimBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.fastStridedReductionsRegular \n " , " " ) ;
result + = fastStridedReductionsRegular ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.fastStridedReductionIrregular \n " , " " ) ;
result + = fastStridedReductionIrregular ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.fastStridedReductionNonEws \n " , " " ) ;
result + = fastStridedReductionNonEws ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.broadcastBenchmark \n " , " " ) ;
result + = broadcastBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.broadcast2dBenchmark \n " , " " ) ;
result + = broadcast2dBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.broadcastOpsMatrixBenchmark \n " , " " ) ;
result + = broadcastOpsMatrixBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.mismatchedOrdersAssignBenchmark \n " , " " ) ;
result + = mismatchedOrdersAssignBenchmark ( ) ;
start = done ( start ) ;
// set 3
nd4j_printf ( " Running FullBenchmarkSuite.gatherOpBenchmark \n " , " " ) ;
result + = gatherOpBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.scatterOpBenchmark \n " , " " ) ;
result + = scatterOpBenchmark ( ) ;
start = done ( start ) ;
// set 4
nd4j_printf ( " Running FullBenchmarkSuite.gemmRegularBenchmark \n " , " " ) ;
result + = gemmRegularBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.gemmIrregularBenchmark \n " , " " ) ;
result + = gemmIrregularBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.rngBenchmark \n " , " " ) ;
result + = rngBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.conv2dBenchmark \n " , " " ) ;
result + = conv2dBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.pool2dBenchmark \n " , " " ) ;
result + = pool2dBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.batchnormBenchmark \n " , " " ) ;
result + = batchnormBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.lstmBenchmark \n " , " " ) ;
result + = lstmBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.conv3dBenchmark \n " , " " ) ;
result + = conv3dBenchmark ( ) ;
start = done ( start ) ;
nd4j_printf ( " Running FullBenchmarkSuite.maxPool3DBenchmark \n " , " " ) ;
result + = maxPool3DBenchmark ( ) ;
start = done ( start ) ;
// nd4j_printf("Running FullBenchmarkSuite.layerNormBenchmark\n", "");
// result += layerNormBenchmark();
// start = done(start);
return result ;
}
}