2019-07-12 07:21:15 +02:00
/*******************************************************************************
* Copyright ( c ) 2015 - 2018 Skymind , Inc .
*
* This program and the accompanying materials are made available under the
* terms of the Apache License , Version 2.0 which is available at
* https : //www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an " AS IS " BASIS , WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied . See the
* License for the specific language governing permissions and limitations
* under the License .
*
* SPDX - License - Identifier : Apache - 2.0
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
//
// @author raver119@gmail.com
//
# include <ops/declarable/CustomOperations.h>
# include "performance/benchmarking/LightBenchmarkSuit.h"
2019-11-13 15:15:18 +01:00
# ifdef RELEASE_BUILD
# define WARMUP 5
# define NUM_ITER 100
2019-07-12 07:21:15 +02:00
# else
2019-11-13 15:15:18 +01:00
# define WARMUP 5
# define NUM_ITER 100
2019-07-12 07:21:15 +02:00
# endif
namespace nd4j {
template < typename T >
static std : : string transformBenchmark ( ) {
std : : string output ;
output + = " transformBenchmark " + DataTypeUtils : : asString ( DataTypeUtils : : fromT < T > ( ) ) ;
BenchmarkHelper helper ( WARMUP , NUM_ITER ) ;
IntPowerParameters length ( " length " , 2 , 8 , 20 , 4 ) ; //2^8, 2^12, 2^16, 2^20 - 4MB
BoolParameters inplace ( " inplace " ) ;
ParametersBatch batch ( { & length , & inplace } ) ;
auto generator = PARAMETRIC_XZ ( ) {
auto arr = NDArrayFactory : : create_ < T > ( ' c ' , { p . getIntParam ( " length " ) } ) ;
arr - > assign ( 1.0 ) ;
x . push_back ( arr ) ;
if ( p . getIntParam ( " inplace " ) = = 1 ) {
z . push_back ( arr ) ;
} else {
z . push_back ( NDArrayFactory : : create_ < T > ( ' c ' , { p . getIntParam ( " length " ) } ) ) ;
}
} ;
ScalarBenchmark sbRelu ( scalar : : Ops : : RELU , " RELU " ) ;
sbRelu . setY ( NDArrayFactory : : create_ < T > ( 0.0 ) ) ;
TransformBenchmark tbSigmoid ( transform : : StrictOps : : Sigmoid , " sigmoid " ) ;
2019-07-16 17:48:40 +02:00
//TransformBenchmark tbSoftmax(transform::StrictOps::SoftMax, "softmax");
2019-07-12 07:21:15 +02:00
output + = helper . runOperationSuit ( & sbRelu , generator , batch , " RELU " ) ;
output + = helper . runOperationSuit ( & tbSigmoid , generator , batch , " Sigmoid " ) ;
2019-07-16 17:48:40 +02:00
//output += helper.runOperationSuit(&tbSigmoid, generator, batch, "Softmax");
2019-07-12 07:21:15 +02:00
return output ;
}
template < typename T >
static std : : string scalarBenchmark ( ) {
std : : string output ;
output + = " scalarBenchmark " + DataTypeUtils : : asString ( DataTypeUtils : : fromT < T > ( ) ) ;
BenchmarkHelper helper ( WARMUP , NUM_ITER ) ;
IntPowerParameters length ( " length " , 2 , 8 , 20 , 4 ) ; //2^8, 2^12, 2^16, 2^20
BoolParameters inplace ( " inplace " ) ;
ParametersBatch batch ( { & length , & inplace } ) ;
auto generator = PARAMETRIC_XZ ( ) {
auto arr = NDArrayFactory : : create_ < T > ( ' c ' , { p . getIntParam ( " length " ) } ) ;
arr - > assign ( 1.0 ) ;
x . push_back ( arr ) ;
if ( p . getIntParam ( " inplace " ) = = 1 ) {
z . push_back ( arr ) ;
} else {
z . push_back ( NDArrayFactory : : create_ < T > ( ' c ' , { p . getIntParam ( " length " ) } ) ) ;
}
} ;
ScalarBenchmark sbAdd ( scalar : : Ops : : Add , " sAdd " ) ;
ScalarBenchmark sbDiv ( scalar : : Ops : : Divide , " sDiv " ) ;
ScalarBenchmark sbPow ( scalar : : Ops : : Pow , " sPow " ) ;
sbAdd . setY ( NDArrayFactory : : create_ < T > ( 3.14159265359 ) ) ;
sbDiv . setY ( NDArrayFactory : : create_ < T > ( 3.14159265359 ) ) ;
sbPow . setY ( NDArrayFactory : : create_ < T > ( 3.14159265359 ) ) ;
output + = helper . runOperationSuit ( & sbAdd , generator , batch , " Scalar Addition - x.add(3.14159265359) " ) ;
output + = helper . runOperationSuit ( & sbDiv , generator , batch , " Scalar Division - x.div(3.14159265359) " ) ;
output + = helper . runOperationSuit ( & sbPow , generator , batch , " Scalar Power - x.pow(3.14159265359) " ) ;
return output ;
}
template < typename T >
static std : : string pairwiseBenchmark ( ) {
std : : string output ;
output + = " pairwiseBenchmark " + DataTypeUtils : : asString ( DataTypeUtils : : fromT < T > ( ) ) ;
BenchmarkHelper helper ( WARMUP , NUM_ITER ) ;
IntPowerParameters length ( " length " , 2 , 8 , 20 , 4 ) ; //2^4 to 2^20 in steps of 4 - 2^4, 2^8, 2^16, 2^20
BoolParameters inplace ( " inplace " ) ;
ParametersBatch batch ( { & length , & inplace } ) ;
auto generator = PARAMETRIC_XYZ ( ) {
auto arr1 = NDArrayFactory : : create_ < T > ( ' c ' , { p . getIntParam ( " length " ) } ) ;
auto arr2 = NDArrayFactory : : create_ < T > ( ' c ' , { p . getIntParam ( " length " ) } ) ;
x . push_back ( arr1 ) ;
y . push_back ( arr2 ) ;
if ( p . getIntParam ( " inplace " ) = = 1 ) {
z . push_back ( arr1 ) ;
} else {
z . push_back ( NDArrayFactory : : create_ < T > ( ' c ' , { p . getIntParam ( " length " ) } ) ) ;
}
} ;
PairwiseBenchmark pb1 ( pairwise : : Ops : : Add , " Add " ) ;
output + = helper . runOperationSuit ( & pb1 , generator , batch , " Pairwise Add " ) ;
PairwiseBenchmark pb2 ( pairwise : : Ops : : Divide , " Divide " ) ;
output + = helper . runOperationSuit ( & pb2 , generator , batch , " Pairwise Divide " ) ;
return output ;
}
static std : : string mismatchedOrderAssign ( ) {
std : : string output ;
BenchmarkHelper helper ( WARMUP , NUM_ITER ) ;
IntPowerParameters rows ( " rows " , 2 , 8 , 20 , 4 ) ; //2^8, 2^12, 2^16, 2^20
BoolParameters cf ( " cf " ) ;
ParametersBatch batch ( { & rows , & cf } ) ;
auto generator = PARAMETRIC_XZ ( ) {
int numElements = 4194304 ; //2^24
int rows = p . getIntParam ( " rows " ) ;
int cols = numElements / rows ;
bool c = p . getIntParam ( " cf " ) ;
auto arr = NDArrayFactory : : create_ < float > ( c ? ' c ' : ' f ' , { rows , cols } ) ;
auto arr2 = NDArrayFactory : : create_ < float > ( c ? ' f ' : ' c ' , { rows , cols } ) ;
x . push_back ( arr ) ;
z . push_back ( arr2 ) ;
} ;
TransformBenchmark tb ( transform : : AnyOps : : Assign , " assign " ) ;
output + = helper . runOperationSuit ( & tb , generator , batch , " C->F and F->C Assign F32 " ) ;
//Also test: NCHW to NHWC and back
BoolParameters nchw ( " nchw " ) ;
int mb = 8 ;
int hw = 64 ;
int c = 3 ;
ParametersBatch batch2 ( { & nchw } ) ;
auto generator2 = PARAMETRIC_XZ ( ) {
bool nchw = p . getIntParam ( " nchw " ) ;
if ( nchw ) {
auto orig = NDArrayFactory : : create_ < float > ( ' c ' , { mb , c , hw , hw } ) ;
orig - > permutei ( { 0 , 2 , 3 , 1 } ) ;
x . push_back ( orig ) ;
z . push_back ( NDArrayFactory : : create_ < float > ( ' c ' , { mb , hw , hw , c } ) ) ;
} else {
auto orig = NDArrayFactory : : create_ < float > ( ' c ' , { mb , hw , hw , c } ) ;
orig - > permutei ( { 0 , 3 , 1 , 2 } ) ;
x . push_back ( orig ) ;
z . push_back ( NDArrayFactory : : create_ < float > ( ' c ' , { mb , c , hw , hw } ) ) ;
}
} ;
TransformBenchmark tb2 ( transform : : AnyOps : : Assign , " assign_nchw " ) ;
output + = helper . runOperationSuit ( & tb2 , generator2 , batch2 , " nchw->nhwc and nhwc->nchw Assign FP32 " ) ;
return output ;
}
template < typename T >
static std : : string gemmBenchmark ( ) {
std : : string output ;
output + = " gemm " + DataTypeUtils : : asString ( DataTypeUtils : : fromT < T > ( ) ) ;
BenchmarkHelper helper ( WARMUP , NUM_ITER ) ;
for ( int o = 0 ; o < = 1 ; o + + ) {
char resultOrder = ( o = = 0 ? ' f ' : ' c ' ) ;
IntPowerParameters sz ( " sz " , 2 , 4 , 10 , 2 ) ; //2^4=16, ..., 2^10=1024 -> 4 elements
ParametersBatch b ( { & sz } ) ;
auto generator = PARAMETRIC_XYZ ( ) {
auto a = p . getIntParam ( " sz " ) ;
auto b = p . getIntParam ( " sz " ) ;
auto c = p . getIntParam ( " sz " ) ;
std : : vector < Nd4jLong > shapeA ;
std : : vector < Nd4jLong > shapeB ;
shapeA = { a , b } ;
shapeB = { b , c } ;
auto A = NDArrayFactory : : create_ < T > ( ' c ' , shapeA ) ;
auto B = NDArrayFactory : : create_ < T > ( ' c ' , shapeB ) ;
auto C = NDArrayFactory : : create_ < T > ( resultOrder , { a , c } ) ;
x . push_back ( A ) ;
y . push_back ( B ) ;
z . push_back ( C ) ;
} ;
std : : string n ;
n + = " Gemm - cOrder= " ;
n + = resultOrder ;
MatrixBenchmark mb ( 1.0 , 0.0 , false , false , n ) ;
output + = helper . runOperationSuit ( & mb , generator , b , n . c_str ( ) ) ;
}
return output ;
}
template < typename T >
static std : : string reduceFullBenchmark ( ) {
std : : string output ;
output + = " reduceFullBenchmark " + DataTypeUtils : : asString ( DataTypeUtils : : fromT < T > ( ) ) ;
BenchmarkHelper helper ( WARMUP , NUM_ITER ) ;
IntPowerParameters length ( " length " , 2 , 8 , 20 , 4 ) ; //2^8, 2^12, 2^16, 2^20
ParametersBatch batch ( { & length } ) ;
auto generator = PARAMETRIC_XYZ ( ) {
auto arr = NDArrayFactory : : create_ < T > ( ' c ' , { p . getIntParam ( " length " ) } ) ;
x . push_back ( arr ) ;
y . push_back ( nullptr ) ;
z . push_back ( NDArrayFactory : : create_ < T > ( 0.0f ) ) ;
} ;
ReductionBenchmark rbSum ( reduce : : SameOps : : Sum , " sum " ) ;
ReductionBenchmark rbProd ( reduce : : SameOps : : Prod , " prod " ) ;
ReductionBenchmark rbMax ( reduce : : SameOps : : Max , " max " ) ;
output + = helper . runOperationSuit ( & rbSum , ( const std : : function < void ( Parameters & , ResultSet & , ResultSet & , ResultSet & ) > ) ( generator ) , batch , " Sum - Full Array Reduction " ) ;
output + = helper . runOperationSuit ( & rbProd , ( const std : : function < void ( Parameters & , ResultSet & , ResultSet & , ResultSet & ) > ) ( generator ) , batch , " Product - Full Array Reduction " ) ;
output + = helper . runOperationSuit ( & rbMax , ( const std : : function < void ( Parameters & , ResultSet & , ResultSet & , ResultSet & ) > ) ( generator ) , batch , " Maximum - Full Array Reduction " ) ;
//Index reduction
nd4j : : ops : : argmax opArgmax ;
DeclarableBenchmark dbArgmax ( opArgmax , " Argmax " ) ;
auto generator3 = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
ctx - > setInputArray ( 0 , NDArrayFactory : : create_ < T > ( ' c ' , { p . getIntParam ( " length " ) } ) , true ) ;
ctx - > setInputArray ( 1 , NDArrayFactory : : create_ < Nd4jLong > ( ( Nd4jLong ) 0 ) , true ) ;
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < Nd4jLong > ( 0 ) , true ) ;
return ctx ;
} ;
output + = helper . runOperationSuit ( & dbArgmax , generator3 , batch , " Argmax Full Array Reduction " ) ;
return output ;
}
template < typename T >
static std : : string reduceDimBenchmark ( ) {
std : : string output ;
output + = " reduceDimBenchmark " + DataTypeUtils : : asString ( DataTypeUtils : : fromT < T > ( ) ) ;
BenchmarkHelper helper ( WARMUP , NUM_ITER ) ;
int length [ ] = { 1024 * 1024 } ;
int pow [ ] = { 10 } ;
for ( int i = 0 ; i < 1 ; i + + ) {
IntPowerParameters rows ( " rows " , 2 , 0 , pow [ i ] , 2 ) ;
BoolParameters dim ( " dim " ) ;
ParametersBatch batch ( { & rows , & dim } ) ;
auto generator = PARAMETRIC_XYZ ( ) {
int rows = p . getIntParam ( " rows " ) ;
int cols = length [ i ] / rows ;
int dim = p . getIntParam ( " dim " ) ;
auto arr = NDArrayFactory : : create_ < T > ( ' c ' , { rows , cols } ) ;
x . push_back ( arr ) ;
y . push_back ( NDArrayFactory : : create_ < Nd4jLong > ( dim ) ) ;
NDArray * result ;
if ( dim = = 0 ) {
result = NDArrayFactory : : create_ < T > ( ' c ' , { cols } ) ;
} else {
result = NDArrayFactory : : create_ < T > ( ' c ' , { rows } ) ;
}
z . push_back ( result ) ;
} ;
ReductionBenchmark rbSum ( reduce : : SameOps : : Sum , " sum " ) ;
ReductionBenchmark rbMax ( reduce : : SameOps : : Max , " max " ) ;
std : : string s1 ( " Sum Along Dimension - " ) ;
s1 + = std : : to_string ( length [ i ] ) ;
std : : string s3 ( " Maximum Along Dimension - " ) ;
s3 + = std : : to_string ( length [ i ] ) ;
output + = helper . runOperationSuit ( & rbSum , ( const std : : function < void ( Parameters & , ResultSet & , ResultSet & , ResultSet & ) > ) ( generator ) , batch , s1 . c_str ( ) ) ;
output + = helper . runOperationSuit ( & rbMax , ( const std : : function < void ( Parameters & , ResultSet & , ResultSet & , ResultSet & ) > ) ( generator ) , batch , s3 . c_str ( ) ) ;
auto generator3 = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
int rows = p . getIntParam ( " rows " ) ;
int cols = length [ i ] / rows ;
int dim = p . getIntParam ( " dim " ) ;
auto arr = NDArrayFactory : : create_ < T > ( ' c ' , { rows , cols } ) ;
auto dimArg = new Nd4jLong [ 1 ] ;
dimArg [ 0 ] = dim ;
ctx - > setIArguments ( dimArg , 1 ) ;
delete [ ] dimArg ;
ctx - > setInputArray ( 0 , arr , true ) ;
NDArray * result ;
if ( dim = = 0 ) {
result = NDArrayFactory : : create_ < Nd4jLong > ( ' c ' , { cols } ) ;
} else {
result = NDArrayFactory : : create_ < Nd4jLong > ( ' c ' , { rows } ) ;
}
ctx - > setOutputArray ( 0 , result , true ) ;
return ctx ;
} ;
std : : string s5 ( " Argmax Along Dimension - " ) ;
s5 + = std : : to_string ( length [ i ] ) ;
nd4j : : ops : : argmax opArgmax ;
DeclarableBenchmark dbArgmax ( opArgmax , " Argmax " ) ;
output + = helper . runOperationSuit ( & dbArgmax , generator3 , batch , s5 . c_str ( ) ) ;
}
return output ;
}
template < typename T >
static std : : string conv2d ( ) {
std : : string output ;
output + = " conv2d " + DataTypeUtils : : asString ( DataTypeUtils : : fromT < T > ( ) ) ;
BenchmarkHelper helper ( WARMUP , NUM_ITER ) ;
//Convolution2D op
BoolParameters nhwc ( " nhwc " ) ;
PredefinedParameters k ( " k " , { 2 , 3 } ) ;
ParametersBatch batch ( { & nhwc , & k } ) ;
nd4j : : ops : : conv2d conv2d ;
DeclarableBenchmark benchmark ( conv2d , " conv2d " ) ;
int hw = 64 ;
auto generator = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
int n = p . getIntParam ( " nhwc " ) ;
int khw = p . getIntParam ( " k " ) ;
if ( n = = 0 ) {
auto input = NDArrayFactory : : create_ < T > ( ' c ' , { 8 , 3 , hw , hw } ) ;
auto output = NDArrayFactory : : create_ < T > ( ' c ' , { 8 , 3 , hw , hw } ) ;
ctx - > setInputArray ( 0 , input , true ) ;
ctx - > setOutputArray ( 0 , output , true ) ;
} else {
auto input = NDArrayFactory : : create_ < T > ( ' c ' , { 8 , hw , hw , 3 } ) ;
auto output = NDArrayFactory : : create_ < T > ( ' c ' , { 8 , hw , hw , 3 } ) ;
ctx - > setInputArray ( 0 , input , true ) ;
ctx - > setOutputArray ( 0 , output , true ) ;
}
auto b = NDArrayFactory : : create_ < T > ( ' c ' , { 3 } ) ;
auto w = NDArrayFactory : : create_ < T > ( ' c ' , { khw , khw , 3 , 3 } ) ; // [kH, kW, iC, oC] always
ctx - > setInputArray ( 1 , w , true ) ;
ctx - > setInputArray ( 2 , b , true ) ;
auto args = new Nd4jLong [ 10 ] ;
args [ 0 ] = args [ 1 ] = khw ; //Kernel
args [ 2 ] = args [ 3 ] = 1 ; //Stride
args [ 4 ] = args [ 5 ] = 0 ; //Pad
args [ 6 ] = args [ 7 ] = 1 ; //Dilation
args [ 8 ] = 1 ; //SAME
args [ 9 ] = n ; //0-nchw, 1=nhwc
ctx - > setIArguments ( args , 10 ) ;
delete [ ] args ;
return ctx ;
} ;
output + = helper . runOperationSuit ( & benchmark , generator , batch , " Conv2d " ) ;
return output ;
}
template < typename T >
static std : : string pool2d ( ) {
std : : string output ;
output + = " pool2d " + DataTypeUtils : : asString ( DataTypeUtils : : fromT < T > ( ) ) ;
BenchmarkHelper helper ( WARMUP , NUM_ITER ) ;
//Convolution2D op
BoolParameters nhwc ( " nhwc " ) ;
PredefinedParameters k ( " k " , { 2 , 3 } ) ;
ParametersBatch batch ( { & nhwc , & k } ) ;
int c = 3 ;
int hw = 64 ;
auto generator = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
int n = p . getIntParam ( " nhwc " ) ;
int khw = p . getIntParam ( " k " ) ;
if ( n = = 0 ) {
auto input = NDArrayFactory : : create_ < T > ( ' c ' , { 8 , c , hw , hw } ) ;
auto output = NDArrayFactory : : create_ < T > ( ' c ' , { 8 , c , hw , hw } ) ;
ctx - > setInputArray ( 0 , input , true ) ;
ctx - > setOutputArray ( 0 , output , true ) ;
} else {
auto input = NDArrayFactory : : create_ < T > ( ' c ' , { 8 , hw , hw , c } ) ;
auto output = NDArrayFactory : : create_ < T > ( ' c ' , { 8 , hw , hw , c } ) ;
ctx - > setInputArray ( 0 , input , true ) ;
ctx - > setOutputArray ( 0 , output , true ) ;
}
auto args = new Nd4jLong [ 11 ] ;
args [ 0 ] = args [ 1 ] = khw ; //Kernel
args [ 2 ] = args [ 3 ] = 1 ; //Stride
args [ 4 ] = args [ 5 ] = 0 ; //Pad
args [ 6 ] = args [ 7 ] = 1 ; //Dilation
args [ 8 ] = 1 ; //SAME
args [ 9 ] = 0 ; //Divisor mode - 0 = exclude padding in divisor
args [ 10 ] = n ; //0-nchw, 1=nhwc
ctx - > setIArguments ( args , 11 ) ;
delete [ ] args ;
return ctx ;
} ;
nd4j : : ops : : avgpool2d avgpool2d ;
DeclarableBenchmark benchmark1 ( avgpool2d , " avgpool " ) ;
output + = helper . runOperationSuit ( & benchmark1 , generator , batch , " Average Pool 2d " ) ;
nd4j : : ops : : maxpool2d maxpool2d ;
DeclarableBenchmark benchmark2 ( maxpool2d , " maxpool " ) ;
output + = helper . runOperationSuit ( & benchmark2 , generator , batch , " Max Pool 2d " ) ;
return output ;
}
template < typename T >
static std : : string lstmBenchmark ( ) {
std : : string output ;
output + = " lstm " + DataTypeUtils : : asString ( DataTypeUtils : : fromT < T > ( ) ) ;
BenchmarkHelper helper ( WARMUP , NUM_ITER ) ;
BoolParameters format ( " format " ) ; //0=TNS=[seqLen,mb,size]; 1=NST=[mb,size,seqLen]
PredefinedParameters mb ( " mb " , { 1 , 8 } ) ;
int n = 128 ;
ParametersBatch batch ( { & format , & mb } ) ;
nd4j : : ops : : lstmBlock lstmBlock ;
DeclarableBenchmark benchmark ( lstmBlock , " lstm " ) ;
int seqLength = 8 ;
auto generator = PARAMETRIC_D ( ) {
auto ctx = new Context ( 1 ) ;
int f = p . getIntParam ( " format " ) ;
int m = p . getIntParam ( " mb " ) ;
Nd4jLong l = 0 ;
ctx - > setInputArray ( 0 , NDArrayFactory : : create_ < Nd4jLong > ( l ) , true ) ; //Max TS length (unused)
if ( f = = 0 ) {
//TNS format
ctx - > setInputArray ( 1 , NDArrayFactory : : create_ < T > ( ' c ' , { seqLength , m , n } ) , true ) ; //x
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < T > ( ' c ' , { seqLength , m , n } ) , true ) ; //i
ctx - > setOutputArray ( 1 , NDArrayFactory : : create_ < T > ( ' c ' , { seqLength , m , n } ) , true ) ; //c
ctx - > setOutputArray ( 2 , NDArrayFactory : : create_ < T > ( ' c ' , { seqLength , m , n } ) , true ) ; //f
ctx - > setOutputArray ( 3 , NDArrayFactory : : create_ < T > ( ' c ' , { seqLength , m , n } ) , true ) ; //o
ctx - > setOutputArray ( 4 , NDArrayFactory : : create_ < T > ( ' c ' , { seqLength , m , n } ) , true ) ; //z
ctx - > setOutputArray ( 5 , NDArrayFactory : : create_ < T > ( ' c ' , { seqLength , m , n } ) , true ) ; //h
ctx - > setOutputArray ( 6 , NDArrayFactory : : create_ < T > ( ' c ' , { seqLength , m , n } ) , true ) ; //y
} else {
//NST format
ctx - > setInputArray ( 1 , NDArrayFactory : : create_ < T > ( ' f ' , { m , n , seqLength } ) , true ) ; //x
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < T > ( ' f ' , { m , n , seqLength } ) , true ) ; //i
ctx - > setOutputArray ( 1 , NDArrayFactory : : create_ < T > ( ' f ' , { m , n , seqLength } ) , true ) ; //c
ctx - > setOutputArray ( 2 , NDArrayFactory : : create_ < T > ( ' f ' , { m , n , seqLength } ) , true ) ; //f
ctx - > setOutputArray ( 3 , NDArrayFactory : : create_ < T > ( ' f ' , { m , n , seqLength } ) , true ) ; //o
ctx - > setOutputArray ( 4 , NDArrayFactory : : create_ < T > ( ' f ' , { m , n , seqLength } ) , true ) ; //z
ctx - > setOutputArray ( 5 , NDArrayFactory : : create_ < T > ( ' f ' , { m , n , seqLength } ) , true ) ; //h
ctx - > setOutputArray ( 6 , NDArrayFactory : : create_ < T > ( ' f ' , { m , n , seqLength } ) , true ) ; //y
}
auto cLast = NDArrayFactory : : create_ < T > ( ' c ' , { m , n } ) ;
auto yLast = NDArrayFactory : : create_ < T > ( ' c ' , { m , n } ) ;
auto W = NDArrayFactory : : create_ < T > ( ' c ' , { 2 * n , 4 * n } ) ;
auto Wci = NDArrayFactory : : create_ < T > ( ' c ' , { n } ) ;
auto Wcf = NDArrayFactory : : create_ < T > ( ' c ' , { n } ) ;
auto Wco = NDArrayFactory : : create_ < T > ( ' c ' , { n } ) ;
auto b = NDArrayFactory : : create_ < T > ( ' c ' , { 4 * n } ) ;
ctx - > setInputArray ( 2 , cLast , true ) ;
ctx - > setInputArray ( 3 , yLast , true ) ;
ctx - > setInputArray ( 4 , W , true ) ;
ctx - > setInputArray ( 5 , Wci , true ) ;
ctx - > setInputArray ( 6 , Wcf , true ) ;
ctx - > setInputArray ( 7 , Wco , true ) ;
ctx - > setInputArray ( 8 , b , true ) ;
auto iargs = new Nd4jLong [ 2 ] ;
iargs [ 0 ] = 0 ; //No peephole
iargs [ 1 ] = f ;
ctx - > setIArguments ( iargs , 2 ) ;
delete [ ] iargs ;
auto targs = new double [ 2 ] ;
targs [ 0 ] = 1.0 ; //forget bias
targs [ 1 ] = 0.0 ; //cell clipping value
ctx - > setTArguments ( targs , 2 ) ;
delete [ ] targs ;
return ctx ;
} ;
output + = helper . runOperationSuit ( & benchmark , generator , batch , " LSTMBlock " ) ;
return output ;
}
static std : : string broadcast2d ( ) {
std : : string output ;
BenchmarkHelper helper ( WARMUP , NUM_ITER ) ;
int rows = 65536 ;
IntPowerParameters cols ( " cols " , 2 , 2 , 12 , 4 ) ; //2^2 to 2^12 in steps of 2 - 2^1=2, ..., 2^10=1024
BoolParameters axis ( " axis " ) ;
BoolParameters inplace ( " inplace " ) ;
ParametersBatch batch ( { & cols , & axis , & inplace } ) ;
auto generator = PARAMETRIC_D ( ) {
auto a = p . getIntParam ( " axis " ) ;
auto arr = NDArrayFactory : : create_ < float > ( ' c ' , { rows , p . getIntParam ( " cols " ) } ) ;
auto ctx = new Context ( 1 ) ;
ctx - > setInputArray ( 0 , arr , true ) ;
if ( a = = 0 ) {
ctx - > setInputArray ( 1 , NDArrayFactory : : create_ < float > ( ' c ' , { rows , 1 } ) , true ) ;
} else {
ctx - > setInputArray ( 1 , NDArrayFactory : : create_ < float > ( ' c ' , { 1 , p . getIntParam ( " cols " ) } ) , true ) ;
}
if ( p . getIntParam ( " inplace " ) = = 1 ) {
ctx - > setOutputArray ( 0 , arr ) ;
ctx - > markInplace ( true ) ;
} else {
ctx - > setOutputArray ( 0 , NDArrayFactory : : create_ < float > ( ' c ' , { rows , p . getIntParam ( " cols " ) } ) , true ) ;
}
return ctx ;
} ;
std : : string s ( " add " ) ;
nd4j : : ops : : add op ;
DeclarableBenchmark benchmark ( op , " add " ) ;
output + = helper . runOperationSuit ( & benchmark , generator , batch , " Broadcast (Custom) Add - 2d " ) ;
return output ;
}
std : : string LightBenchmarkSuit : : runSuit ( ) {
2019-11-13 15:15:18 +01:00
# ifdef RELEASE_BUILD
2019-07-12 07:21:15 +02:00
std : : vector < nd4j : : DataType > dtypes ( { nd4j : : DataType : : FLOAT32 , nd4j : : DataType : : HALF } ) ;
# else
std : : vector < nd4j : : DataType > dtypes ( { nd4j : : DataType : : FLOAT32 } ) ;
# endif
std : : string result ;
for ( auto t : dtypes ) {
nd4j_printf ( " Running LightBenchmarkSuite.transformBenchmark [%s] \n " , DataTypeUtils : : asString ( t ) . c_str ( ) ) ;
BUILD_SINGLE_SELECTOR ( t , result + = transformBenchmark , ( ) , LIBND4J_TYPES ) ;
nd4j_printf ( " Running LightBenchmarkSuite.scalarBenchmark [%s] \n " , DataTypeUtils : : asString ( t ) . c_str ( ) ) ;
BUILD_SINGLE_SELECTOR ( t , result + = scalarBenchmark , ( ) , LIBND4J_TYPES ) ;
nd4j_printf ( " Running LightBenchmarkSuite.pairwiseBenchmark [%s] \n " , DataTypeUtils : : asString ( t ) . c_str ( ) ) ;
BUILD_SINGLE_SELECTOR ( t , result + = pairwiseBenchmark , ( ) , LIBND4J_TYPES ) ;
2019-11-13 15:15:18 +01:00
/*
2019-07-12 07:21:15 +02:00
nd4j_printf ( " Running LightBenchmarkSuite.reduceFullBenchmark [%s] \n " , DataTypeUtils : : asString ( t ) . c_str ( ) ) ;
BUILD_SINGLE_SELECTOR ( t , result + = reduceFullBenchmark , ( ) , LIBND4J_TYPES ) ;
nd4j_printf ( " Running LightBenchmarkSuite.reduceDimBenchmark [%s] \n " , DataTypeUtils : : asString ( t ) . c_str ( ) ) ;
BUILD_SINGLE_SELECTOR ( t , result + = reduceDimBenchmark , ( ) , LIBND4J_TYPES ) ;
nd4j_printf ( " Running LightBenchmarkSuite.gemmBenchmark [%s] \n " , DataTypeUtils : : asString ( t ) . c_str ( ) ) ;
BUILD_SINGLE_SELECTOR ( t , result + = gemmBenchmark , ( ) , LIBND4J_TYPES ) ;
nd4j_printf ( " Running LightBenchmarkSuite.conv2d [%s] \n " , DataTypeUtils : : asString ( t ) . c_str ( ) ) ;
BUILD_SINGLE_SELECTOR ( t , result + = conv2d , ( ) , LIBND4J_TYPES ) ;
nd4j_printf ( " Running LightBenchmarkSuite.pool2d [%s] \n " , DataTypeUtils : : asString ( t ) . c_str ( ) ) ;
BUILD_SINGLE_SELECTOR ( t , result + = pool2d , ( ) , LIBND4J_TYPES ) ;
nd4j_printf ( " Running LightBenchmarkSuite.lstmBenchmark [%s] \n " , DataTypeUtils : : asString ( t ) . c_str ( ) ) ;
BUILD_SINGLE_SELECTOR ( t , result + = lstmBenchmark , ( ) , LIBND4J_TYPES ) ;
2019-11-13 15:15:18 +01:00
*/
2019-07-12 07:21:15 +02:00
}
nd4j_printf ( " Running LightBenchmarkSuite.broadcast2d \n " , " " ) ;
2019-11-13 15:15:18 +01:00
//result += broadcast2d();
2019-07-12 07:21:15 +02:00
nd4j_printf ( " Running LightBenchmarkSuite.mismatchedOrderAssign \n " , " " ) ;
2019-11-13 15:15:18 +01:00
//result += mismatchedOrderAssign();
2019-07-12 07:21:15 +02:00
return result ;
}
}