R119 tests (#238)

* one small test

Signed-off-by: raver119 <raver119@gmail.com>

* one small test

Signed-off-by: raver119 <raver119@gmail.com>

* bert test

Signed-off-by: raver119 <raver119@gmail.com>

* Graph FlowPath fix

Signed-off-by: raver119 <raver119@gmail.com>

* - GraphProfiler tweaks
- NodeProfile now includes shapes

Signed-off-by: raver119 <raver119@gmail.com>

* RELU_layer inplace tweak

Signed-off-by: raver119 <raver119@gmail.com>

* meh

Signed-off-by: raver119 <raver119@gmail.com>

* identity tweaks

Signed-off-by: raver119 <raver119@gmail.com>

* bert result validation

Signed-off-by: raver119 <raver119@gmail.com>

* - bunch of Shape ops have inplace exec forbidden now
- Legacy ops have inplace exec disabled by default now

Signed-off-by: raver119 <raver119@gmail.com>

* ffast-math enabled

Signed-off-by: raver119 <raver119@gmail.com>

* ffast-math enabled

Signed-off-by: raver119 <raver119@gmail.com>

* allow some legacy ops to be inplace

Signed-off-by: raver119 <raver119@gmail.com>

* disable -fast_math

Signed-off-by: raver119 <raver119@gmail.com>

* disable expensive test for cuda

Signed-off-by: raver119 <raver119@gmail.com>
master
raver119 2020-02-13 20:59:35 +03:00 committed by GitHub
parent fe47f52896
commit 3de3cd8277
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 306 additions and 62 deletions

View File

@ -179,7 +179,7 @@ namespace graph {
nd4j_debug("Embedded graph execution finished. %i variable(s) migrated\n", cnt);
} else if (node->hasCustomOp()) {
// if we have something to execute - lets just execute it.
// now, if we have something to execute - lets just execute it.
auto status = node->getCustomOp()->execute(&context);
if (status != ND4J_STATUS_OK)
return status;
@ -494,8 +494,10 @@ Nd4jStatus GraphExecutioner::execute(Graph *graph, VariableSpace* variableSpace)
nd4j::memory::MemoryRegistrator::getInstance()->setGraphMemoryFootprintIfGreater(h, m);
}
if (tempFlow)
if (tempFlow) {
delete flowPath;
__variableSpace->setFlowPath(nullptr);
}
return Status::OK();
}

View File

@ -58,6 +58,7 @@ namespace nd4j {
virtual void putVariable(int id, Variable *variable);
virtual void putVariable(int id, NDArray *array);
virtual void putVariable(int id, int idx, NDArray *array);
virtual void putVariable(int id, int idx, NDArray &array);
virtual void putVariable(int id, int idx, Variable *array);
virtual void replaceVariable(Variable *variable);

View File

@ -100,6 +100,7 @@ namespace nd4j {
virtual void putVariable(int id, Variable *variable);
virtual void putVariable(int id, NDArray *array);
virtual void putVariable(int id, int idx, NDArray *array);
virtual void putVariable(int id, int idx, NDArray &array);
virtual void putVariable(int id, int idx, Variable *array);
virtual void dropVariable(std::pair<int,int> &pair);

View File

@ -60,8 +60,11 @@ namespace nd4j {
result->_name = this->_name;
result->_index = this->_index;
if (this->_ndarray != nullptr)
if (this->_ndarray != nullptr) {
result->_ndarray = new NDArray(this->_ndarray->dup(this->_ndarray->ordering()));
result->_readOnly = false;
result->_removable = true;
}
if (this->_list != nullptr)
result->_list = this->_list->clone();

View File

@ -191,6 +191,9 @@ namespace nd4j {
_current->putVariable(id, array);
}
void nd4j::graph::VariableProxy::putVariable(int id, int idx, NDArray &array) {
_current->putVariable(id, idx, array);
}
void VariableProxy::putVariable(int id, int idx, NDArray *array) {
_current->putVariable(id, idx, array);

View File

@ -263,19 +263,19 @@ namespace nd4j {
void nd4j::graph::VariableSpace::putVariable(int id, Variable *variable) {
// we don't want to add variables more then once
if (_variables.count(id) > 0 || _temporary.count(id) > 0) {
// nd4j_verbose("Trying to update variable for node_%i\n", id);
auto local = id < 0 ? _variables.at(id) : _temporary.at(id);
if (!local->hasNDArray() && variable->hasNDArray()) {
// nd4j_verbose("Saving variable for node_%i\n", id);
local->setNDArray(variable->getNDArray());
// we're inheriting this from Variable
local->markReadOnly(variable->isReadOnly());
local->markRemovable(variable->isRemovable());
}
return;
}
//nd4j_debug("Adding Variable to Space: id: %i; Array is null: %i;\n", id, variable->getNDArray() == nullptr);
_varmap.lock();
_handles->emplace_back(variable);
@ -314,6 +314,21 @@ namespace nd4j {
}
}
void nd4j::graph::VariableSpace::putVariable(int id, int idx, NDArray &array) {
auto *var = new nd4j::graph::Variable(&array, "", id, idx);
var->markRemovable(false);
var->markReadOnly(true);
// let's see if this op needs
bool d = this->hasVariable(id, idx);
this->putVariable(id, var);
// if var for this nodeid already exists - we'll just delete variable
if (d)
delete var;
}
void nd4j::graph::VariableSpace::putVariable(int id, NDArray *array) {
auto *var = new nd4j::graph::Variable(array);
this->putVariable(id, var);

View File

@ -24,6 +24,7 @@
#include <pointercast.h>
#include <dll.h>
#include <string>
#include <vector>
namespace nd4j {
namespace graph {
@ -65,6 +66,9 @@ namespace nd4j {
// total amount of memory used during execution
Nd4jLong _memoryTotal = 0L;
std::vector<std::string> _inputShapes;
std::vector<std::string> _outputShapes;
public:
NodeProfile() = default;
~NodeProfile() = default;
@ -84,10 +88,15 @@ namespace nd4j {
void setObjectsSize(Nd4jLong bytes);
void setTotalSize(Nd4jLong bytes);
Nd4jLong getActivationsSize();
Nd4jLong getTemporarySize();
Nd4jLong getObjectsSize();
Nd4jLong getTotalSize();
void addInputShape(Nd4jLong *shapeInfo);
void addOutputShape(Nd4jLong *shapeInfo);
Nd4jLong getActivationsSize() const;
Nd4jLong getTemporarySize() const;
Nd4jLong getObjectsSize() const;
Nd4jLong getTotalSize() const;
Nd4jLong getExecutionTime() const;
std::string& name();

View File

@ -21,6 +21,8 @@
#include <graph/profiling/GraphProfile.h>
#include <helpers/logger.h>
#include <chrono>
#include <templatemath.h>
#include <algorithm>
namespace nd4j {
namespace graph {
@ -184,8 +186,25 @@ namespace nd4j {
if (_profiles.empty())
nd4j_printf("No nodes in graph\n","");
for (auto v: _profiles)
// printint out stuff
std::vector<NodeProfile*> sorted;
for (auto v: _profiles) {
v->printOut();
sorted.emplace_back(v);
}
if (_profiles.size() > 1) {
// building hot spots
std::sort(sorted.begin(), sorted.end(), [](const NodeProfile *a, const NodeProfile *b) -> bool {
return a->getExecutionTime() > b->getExecutionTime();
});
nd4j_printf("\nTop 30 reports by EXEC:\n", "");
auto limit = nd4j::math::nd4j_min<int>(30, sorted.size());
for (int e = 0; e < limit; e++) {
sorted[e]->printOut();
}
}
nd4j_printf("\nSpecial timers:\n", "");
if (_timings.empty())

View File

@ -32,7 +32,7 @@ namespace nd4j {
// graph->printOut();
// warm up
for (int e = 0; e < 1000; e++) {
for (int e = 0; e < iterations; e++) {
FlowPath fp;
auto _vs = varSpace->clone();

View File

@ -20,6 +20,7 @@
#include <helpers/logger.h>
#include <graph/profiling/NodeProfile.h>
#include <helpers/ShapeUtils.h>
namespace nd4j {
namespace graph {
@ -35,9 +36,23 @@ namespace nd4j {
nd4j_printf(" Memory: ACT: %lld; TMP: %lld; OBJ: %lld; TTL: %lld;\n", _memoryActivations / _merges, _memoryTemporary / _merges, _memoryObjects / _merges, _memoryTotal / _merges);
nd4j_printf(" Time: PREP: %lld ns; EXEC: %lld ns; TTL: %lld ns;\n", _preparationTime / _merges, _executionTime / _merges, _totalTime / _merges);
nd4j_printf(" PREP: INPUT: %lld ns; SHAPE: %lld ns; ARRAY: %lld ns;\n", _inputTime / _merges, _shapeTime / _merges, _arrayTime / _merges);
std::string inputs;
std::string outputs;
int cnt = 0;
for (const auto &v: _inputShapes)
inputs += v + " ";
for (const auto &v: _outputShapes)
outputs += v + " ";
nd4j_printf(" Inputs: %s\n", inputs.c_str());
nd4j_printf(" Outputs: %s\n", outputs.c_str());
};
Nd4jLong NodeProfile::getActivationsSize() {
Nd4jLong NodeProfile::getActivationsSize() const {
return _memoryActivations;
}
@ -53,15 +68,15 @@ namespace nd4j {
_inputTime = time;
}
Nd4jLong NodeProfile::getTemporarySize() {
Nd4jLong NodeProfile::getTemporarySize() const{
return _memoryTemporary;
}
Nd4jLong NodeProfile::getObjectsSize() {
Nd4jLong NodeProfile::getObjectsSize() const{
return _memoryObjects;
}
Nd4jLong NodeProfile::getTotalSize() {
Nd4jLong NodeProfile::getTotalSize() const{
return _memoryTotal;
}
@ -97,6 +112,18 @@ namespace nd4j {
_memoryTotal = bytes;
}
Nd4jLong NodeProfile::getExecutionTime() const {
return _executionTime;
}
void NodeProfile::addInputShape(Nd4jLong *shapeInfo) {
_inputShapes.emplace_back(ShapeUtils::shapeAsString(shapeInfo));
}
void NodeProfile::addOutputShape(Nd4jLong *shapeInfo) {
_outputShapes.emplace_back(ShapeUtils::shapeAsString(shapeInfo));
}
void NodeProfile::merge(NodeProfile *other) {
_merges += other->_merges;
_memoryObjects += other->_memoryObjects;
@ -110,6 +137,9 @@ namespace nd4j {
_shapeTime += other->_shapeTime;
_arrayTime += other->_arrayTime;
_inputTime += other->_inputTime;
_inputShapes = other->_inputShapes;
_outputShapes = other->_outputShapes;
}
std::string& NodeProfile::name() {
@ -129,6 +159,9 @@ namespace nd4j {
_shapeTime = other->_shapeTime;
_arrayTime = other->_arrayTime;
_inputTime = other->_inputTime;
_inputShapes = other->_inputShapes;
_outputShapes = other->_outputShapes;
}
}
}

View File

@ -147,6 +147,9 @@ namespace nd4j {
// returns TRUE if this op allows in-place execution
bool allowsInplace();
// this method allows you to enable/disable inplace call for a given op
void allowInplace(bool reallyAllow);
// this method returns opNum (applicable for legacy XYZ ops only)
int getOpNum();

View File

@ -27,12 +27,10 @@ namespace nd4j {
namespace ops {
OP_IMPL(identity, 1, 1, true) {
auto first = INPUT_VARIABLE(0);
auto z = this->getZ(block);
auto z = OUTPUT_VARIABLE(0);
// just for lulz
first->applyTransform(nd4j::transform::Identity, *z);
STORE_RESULT(*z);
if (!block.isInplace())
first->applyTransform(nd4j::transform::Identity, *z);
return Status::OK();
}
@ -60,8 +58,8 @@ namespace nd4j {
DECLARE_TYPES(identity_bp) {
getOpDescriptor()
->setAllowedInputTypes(0, DataType::ANY)
->setAllowedInputTypes(1, {DataType::FLOAT32, DataType ::DOUBLE, DataType::HALF})
->setAllowedOutputTypes(0, {DataType::FLOAT32, DataType ::DOUBLE, DataType::HALF});
->setAllowedInputTypes(1, {ALL_FLOATS})
->setAllowedOutputTypes(0, {ALL_FLOATS});
}
}
}

View File

@ -31,22 +31,17 @@ namespace nd4j {
REQUIRE_TRUE(w->isMatrix(), 0, "relu_layer: weights argument should be a 2D tensor, but got rank %i instead!", w->rankOf());
REQUIRE_TRUE(b->isVector(), 0, "relu_layer: biases argument should be a 1D tensor, but got rank %i instead!", b->rankOf());
REQUIRE_TRUE(b->lengthOf() == w->sizeAt(1), 0, "relu_layer: biases array length should match to columns of weights matrix, however got length = %i and columns = %i!", b->lengthOf(), w->sizeAt(1));
REQUIRE_TRUE(x->sizeAt(1) == w->sizeAt(0), 0, "relu_layer: number of x columns should match to row number of weights matrix, but got x_columns = %i and weights_rows = %i!",
x->sizeAt(1), w->sizeAt(0));
REQUIRE_TRUE(x->sizeAt(1) == w->sizeAt(0), 0, "relu_layer: number of x columns should match to row number of weights matrix, but got x_columns = %i and weights_rows = %i!", x->sizeAt(1), w->sizeAt(0));
auto output = OUTPUT_VARIABLE(0);
//T bound = (T)0.f;
//nd4j_printf("Matrix x(%ix%i), Matrix w(%ix%i), b(1x%i)\n", x->sizeAt(0), x->sizeAt(1), w->sizeAt(0), w->sizeAt(1), b->lengthOf());
nd4j::ops::xw_plus_b op;
std::unique_ptr<ResultSet> result(op.evaluate({x, w, b}));
REQUIRE_TRUE(Status::OK() == result->status(), 0, "relu_layer: xw_plus_b op failed on input data.");
auto status = op.execute({x, w, b}, {output});
REQUIRE_TRUE(Status::OK() == status, 0, "relu_layer: xw_plus_b op failed on input data.");
auto scalar = block.numT() > 0 ? block.getTArguments()->at(0) : 0.0;
auto xw = result->at(0);
xw->applyScalar(nd4j::scalar::RELU, scalar, *output);
output->applyScalar(nd4j::scalar::RELU, scalar, *output);
return Status::OK();
}

View File

@ -28,7 +28,7 @@ namespace nd4j {
//////////////////////////////////////////////////////////////////////////
// here iArgs is a vector with (optional) negative of order as first element:
// ({-order, dim1, dim2, dim3, ...})
CUSTOM_OP_IMPL(reshape, 1, 1, true, 0, -2) {
CUSTOM_OP_IMPL(reshape, 1, 1, false, 0, -2) {
auto x = INPUT_VARIABLE(0);
if (block.width() == 1) {

View File

@ -28,7 +28,7 @@ namespace nd4j {
//////////////////////////////////////////////////////////////////////////
CUSTOM_OP_IMPL(reshapeas, 2, 1, true, 0, 0) {
CUSTOM_OP_IMPL(reshapeas, 2, 1, false, 0, 0) {
auto x = INPUT_VARIABLE(0);
auto y = INPUT_VARIABLE(1);

View File

@ -25,7 +25,7 @@
namespace nd4j {
namespace ops {
CUSTOM_OP_IMPL(tile_to_shape, 1, 1, true, 0, -1) {
CUSTOM_OP_IMPL(tile_to_shape, 1, 1, false, 0, -1) {
auto input = INPUT_VARIABLE(0);
auto output = OUTPUT_VARIABLE(0);

View File

@ -28,7 +28,7 @@ namespace nd4j {
namespace ops {
//////////////////////////////////////////////////////////////////////////
CUSTOM_OP_IMPL(transpose, 1, 1, true, 0, 0) {
CUSTOM_OP_IMPL(transpose, 1, 1, false, 0, 0) {
auto x = INPUT_VARIABLE(0);
if (block.width() == 1) {
if (block.isInplace()) {

View File

@ -26,15 +26,15 @@
namespace nd4j {
namespace ops {
#if NOT_EXCLUDED(OP_permute)
DECLARE_CUSTOM_OP(permute, 1, 1, true, 0, -2);
DECLARE_CUSTOM_OP(permute, 1, 1, false, 0, -2);
#endif
#if NOT_EXCLUDED(OP_reshapeas)
DECLARE_CUSTOM_OP(reshapeas, 2, 1, true, 0, 0);
DECLARE_CUSTOM_OP(reshapeas, 2, 1, false, 0, 0);
#endif
#if NOT_EXCLUDED(OP_transpose)
DECLARE_CUSTOM_OP(transpose, 1, 1, true, 0, 0);
DECLARE_CUSTOM_OP(transpose, 1, 1, false, 0, 0);
#endif
#if NOT_EXCLUDED(OP_shape_of)
@ -46,7 +46,7 @@ namespace nd4j {
#endif
#if NOT_EXCLUDED(OP_squeeze)
DECLARE_CUSTOM_OP(squeeze, 1, 1, true, 0, -2);
DECLARE_CUSTOM_OP(squeeze, 1, 1, false, 0, -2);
#endif
#if NOT_EXCLUDED(OP_expand_dims)
@ -54,11 +54,11 @@ namespace nd4j {
#endif
#if NOT_EXCLUDED(OP_reshape)
DECLARE_CUSTOM_OP(reshape, 1, 1, true, 0, -2);
DECLARE_CUSTOM_OP(reshape, 1, 1, false, 0, -2);
#endif
#if NOT_EXCLUDED(OP_size_at)
DECLARE_CUSTOM_OP(size_at, 1, 1, true, 0, 1);
DECLARE_CUSTOM_OP(size_at, 1, 1, false, 0, 1);
#endif
/**
@ -80,8 +80,8 @@ namespace nd4j {
* @tparam T
*/
#if NOT_EXCLUDED(OP_tile_to_shape)
DECLARE_CUSTOM_OP(tile_to_shape, 1, 1, true, 0, -1);
DECLARE_CUSTOM_OP(tile_to_shape_bp, 2, 1, true, 0, -1);
DECLARE_CUSTOM_OP(tile_to_shape, 1, 1, false, 0, -1);
DECLARE_CUSTOM_OP(tile_to_shape_bp, 2, 1, false, 0, -1);
#endif
/**

View File

@ -150,6 +150,22 @@ namespace nd4j {
}
if (ctx.isInplace()) {
if (Environment::getInstance()->isProfiling() && node != nullptr) {
if (ctx.isFastPath()) {
//
} else {
for (auto p: *ctx.inputs()) {
auto var = ctx.variable(p);
if (var->variableType() == VariableType::NDARRAY) {
NDArray *array = var->getNDArray();
node->addInputShape(array->shapeInfo());
node->addOutputShape(array->shapeInfo());
}
}
}
}
// do nothing, getZ result will do the trick
return static_cast<int>(ctx.width());
} else {
@ -192,6 +208,10 @@ namespace nd4j {
auto inputTime = std::chrono::duration_cast<std::chrono::nanoseconds>(inputEnd - inputStart).count();
node->setInputTime(inputTime);
// saving output shapes in profile
for (int e = 0; e < inSha.size(); e++)
node->addInputShape(inSha.at(e));
shapeStart = std::chrono::system_clock::now();
}
@ -204,6 +224,10 @@ namespace nd4j {
auto prepTime = std::chrono::duration_cast<std::chrono::nanoseconds>(shapeEnd - shapeStart).count();
node->setShapeFunctionTime(prepTime);
// saving output shapes in profile
for (int e = 0; e < outSha->size(); e++)
node->addOutputShape(outSha->at(e));
arrayStart = std::chrono::system_clock::now();
}
@ -562,7 +586,7 @@ namespace nd4j {
block->setInnerTime(outerTime);
}
if (Environment::getInstance()->isProfiling()) {
if (Environment::getInstance()->isProfiling() && !block->isFastPath()) {
auto fp = block->getVariableSpace()->flowPath();
if (fp != nullptr) {
auto p = fp->profile();

View File

@ -23,11 +23,11 @@
namespace nd4j {
namespace ops {
LegacyOp::LegacyOp(int numInputs) : DeclarableOp::DeclarableOp(numInputs , 1, "LegacyOp", true) {
LegacyOp::LegacyOp(int numInputs) : DeclarableOp::DeclarableOp(numInputs , 1, "LegacyOp", false) {
_numInputs = numInputs;
}
LegacyOp::LegacyOp(int numInputs, int opNum) : DeclarableOp::DeclarableOp(numInputs , 1, "LegacyOp", true) {
LegacyOp::LegacyOp(int numInputs, int opNum) : DeclarableOp::DeclarableOp(numInputs , 1, "LegacyOp", false) {
_opNum = opNum;
_numInputs = numInputs;
}

View File

@ -25,11 +25,11 @@
namespace nd4j {
namespace ops {
LegacyPairwiseTransformOp::LegacyPairwiseTransformOp() : LegacyOp::LegacyOp(2) {
// just a no-op
this->getOpDescriptor()->allowInplace(true);
}
LegacyPairwiseTransformOp::LegacyPairwiseTransformOp(int opNum) : LegacyOp::LegacyOp(2, opNum) {
// just a no-op
this->getOpDescriptor()->allowInplace(true);
}
LegacyOp* LegacyPairwiseTransformOp::clone() {

View File

@ -26,11 +26,11 @@
namespace nd4j {
namespace ops {
LegacyScalarOp::LegacyScalarOp() : LegacyOp::LegacyOp(1) {
// no-op
this->getOpDescriptor()->allowInplace(true);
}
LegacyScalarOp::LegacyScalarOp(int opNum) : LegacyOp::LegacyOp(1, opNum){
// no-op
this->getOpDescriptor()->allowInplace(true);
}
LegacyOp* LegacyScalarOp::clone() {
@ -66,6 +66,7 @@ namespace nd4j {
NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(), extras.argumentsAsT(z->dataType()));
NDArray::registerSpecialUse({z}, {x, y});
} else if (block.getTArguments()->size() > 0) {
auto y = NDArrayFactory::create(x->dataType(), T_ARG(0), block.launchContext());
@ -78,10 +79,9 @@ namespace nd4j {
NDArray::prepareSpecialUse({z}, {x, _scalar});
NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), _scalar->buffer(), _scalar->shapeInfo(), _scalar->specialBuffer(), _scalar->specialShapeInfo(), extras.argumentsAsT(z->dataType()));
}
manager.synchronize();
STORE_RESULT(*z);
NDArray::registerSpecialUse({z}, {x, _scalar});
}
return Status::OK();
}

View File

@ -26,11 +26,11 @@
namespace nd4j {
namespace ops {
LegacyTransformSameOp::LegacyTransformSameOp() : LegacyOp::LegacyOp(1) {
// just a no-op
this->getOpDescriptor()->allowInplace(true);
}
LegacyTransformSameOp::LegacyTransformSameOp(int opNum) : LegacyOp::LegacyOp(1, opNum) {
// just a no-op
this->getOpDescriptor()->allowInplace(true);
}
LegacyOp* LegacyTransformSameOp::clone() {

View File

@ -26,11 +26,11 @@
namespace nd4j {
namespace ops {
LegacyTransformStrictOp::LegacyTransformStrictOp() : LegacyOp::LegacyOp(1) {
// just a no-op
this->getOpDescriptor()->allowInplace(true);
}
LegacyTransformStrictOp::LegacyTransformStrictOp(int opNum) : LegacyOp::LegacyOp(1, opNum) {
// just a no-op
this->getOpDescriptor()->allowInplace(true);
}
LegacyOp* LegacyTransformStrictOp::clone() {

View File

@ -50,6 +50,9 @@ namespace nd4j {
_scalar = isScalar;
}
void OpDescriptor::allowInplace(bool reallyAllow){
_allowsInplace = reallyAllow;
}
bool OpDescriptor::operator==(const OpDescriptor& other) const {
if (_hash == -1 && other._hash == -1)

View File

@ -52,7 +52,7 @@ elseif(WIN32)
set(CMAKE_CXX_FLAGS " -fPIC -fmax-errors=2")
endif()
else()
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
set(CMAKE_CXX_FLAGS " -fPIC -fmax-errors=2")
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64*")
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -mcpu=native")

View File

@ -3087,6 +3087,10 @@ TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_03_3) {
////////////////////////////////////////////////////////////////////
TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_4) {
#ifdef FFAST_MATH
if (1 > 0)
return;
#endif
NDArray x = NDArrayFactory::create<float>('c', {2,4,5,3});
NDArray exp = NDArrayFactory::create<float>('c', {2,4,5,3},{

View File

@ -78,6 +78,11 @@ TEST_F(DeclarableOpsTests14, Test_Inf_Comparison_1) {
}
TEST_F(DeclarableOpsTests14, Test_Inf_Comparison_2) {
#ifdef FFAST_MATH
if (1 > 0)
return;
#endif
auto x = NDArrayFactory::create<double>('c', {5}, {1, 2, 3, std::numeric_limits<double>::infinity(), 5});
auto y = NDArrayFactory::create<double>('c', {5}, {1, 2, 3, -std::numeric_limits<double>::infinity(), 5});
@ -332,6 +337,10 @@ TEST_F(DeclarableOpsTests14, test_empty_reduce_max_1) {
}
TEST_F(DeclarableOpsTests14, test_empty_reduce_sum_1) {
#ifdef FFAST_MATH
if (1 > 0)
return;
#endif
auto e = NDArrayFactory::create<float>('c', {1, 0});
nd4j::ops::reduce_sum sumOp;
@ -343,6 +352,10 @@ TEST_F(DeclarableOpsTests14, test_empty_reduce_sum_1) {
}
TEST_F(DeclarableOpsTests14, test_empty_reduce_mean_1) {
#ifdef FFAST_MATH
if (1 > 0)
return;
#endif
auto e = NDArrayFactory::create<float>('c', {1, 0});
nd4j::ops::reduce_mean sumOp;

View File

@ -584,6 +584,11 @@ TEST_F(DeclarableOpsTests15, test_check_numeric_1) {
}
TEST_F(DeclarableOpsTests15, test_check_numeric_2) {
#ifdef FFAST_MATH
if (1 > 0)
return;
#endif
auto x = NDArrayFactory::create<float>('c', {3},{1.f, 2.f, std::numeric_limits<float>::infinity()});
auto y = NDArrayFactory::string("should trigger");
auto z = NDArrayFactory::create<float>('c', {3} );
@ -598,6 +603,11 @@ TEST_F(DeclarableOpsTests15, test_check_numeric_2) {
}
TEST_F(DeclarableOpsTests15, test_check_numeric_3) {
#ifdef FFAST_MATH
if (1 > 0)
return;
#endif
auto x = NDArrayFactory::create<float>('c', {3},{1.f, 2.f, std::numeric_limits<float>::quiet_NaN()});
auto y = NDArrayFactory::string("should trigger");
auto z = NDArrayFactory::create<float>('c', {3} );
@ -1530,6 +1540,10 @@ TEST_F(DeclarableOpsTests15, Pow_BP_Test10) {
}
TEST_F(DeclarableOpsTests15, Pow_BP_Test11) {
#ifdef FFAST_MATH
if (1 > 0)
return;
#endif
NDArray xB('c', { 3,2,1 }, { .4, 3, 5, .8, -9, -12 }, nd4j::DataType::FLOAT32);
NDArray yB('c', { 1,2,3 }, { 3, -2, .4, -4, 10, .8 }, nd4j::DataType::FLOAT32);

View File

@ -65,6 +65,110 @@ TEST_F(PlaygroundTests, test_avx) {
nd4j_printf("Optimal level: %i; Binary level: %i;\n", ::optimalLevel(), ::binaryLevel());
}
TEST_F(PlaygroundTests, test_bert_1) {
// this test will run ONLY if this model exists
if (nd4j::graph::getFileSize("/home/raver119/Downloads/Bert_minimal_model/bert_minimal_model.fb") < 0)
return;
auto graph = GraphExecutioner::importFromFlatBuffers("/home/raver119/Downloads/Bert_minimal_model/bert_minimal_model.fb");
auto t = NDArrayFactory::fromNpyFile("/home/raver119/Downloads/Bert_minimal_model/bert_minimal_input_IteratorGetNext.numpy");
auto u = NDArrayFactory::fromNpyFile("/home/raver119/Downloads/Bert_minimal_model/bert_minimal_input_IteratorGetNext_1.numpy");
auto v = NDArrayFactory::fromNpyFile("/home/raver119/Downloads/Bert_minimal_model/bert_minimal_input_IteratorGetNext_4.numpy");
auto z = NDArrayFactory::fromNpyFile("/home/raver119/Downloads/Bert_minimal_model/bert_minimal_model_output.numpy");
//graph->printOut();
graph->tagInplaceNodes();
graph->getVariableSpace()->putVariable(85,0, t);
graph->getVariableSpace()->putVariable(86,0, u);
graph->getVariableSpace()->putVariable(87,0, v);
/*
// validating graph now
auto status = GraphExecutioner::execute(graph);
ASSERT_EQ(Status::OK(), status);
ASSERT_TRUE(graph->getVariableSpace()->hasVariable(198));
auto array = graph->getVariableSpace()->getVariable(198)->getNDArray();
ASSERT_EQ(z, *array);
*/
nd4j::Environment::getInstance()->setProfiling(true);
auto profile = GraphProfilingHelper::profile(graph, 1);
profile->printOut();
nd4j::Environment::getInstance()->setProfiling(false);
delete profile;
/*
std::vector<Nd4jLong> values;
for (int e = 0; e < 1; e++) {
auto timeStart = std::chrono::system_clock::now();
GraphExecutioner::execute(graph);
auto timeEnd = std::chrono::system_clock::now();
auto outerTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeStart).count();
values.emplace_back(outerTime);
}
std::sort(values.begin(), values.end());
nd4j_printf("Time: %lld us;\n", values[values.size() / 2]);
*/
delete graph;
}
/*
TEST_F(PlaygroundTests, test_broadcast_1) {
int pool = 10;
std::vector<NDArray*> aX(pool);
std::vector<NDArray*> aY(pool);
std::vector<NDArray*> aZ(pool);
for (int e = 0; e < pool; e++) {
aX[e] = NDArrayFactory::create_<float>('c', {64, 128, 1});
aY[e] = NDArrayFactory::create_<float>('c', {768});
aZ[e] = NDArrayFactory::create_<float>('c', {64, 128, 768});
aX[e]->assign(119 * (e+1));
aY[e]->assign(119 * (e+3));
}
std::vector<Nd4jLong> values;
for (int e = 0; e < 1000; e++) {
auto x = aX[e < pool ? e : e % pool];
auto y = aY[e < pool ? e : e % pool];
auto z = aZ[e < pool ? e : e % pool];
auto timeStart = std::chrono::system_clock::now();
x->applyTrueBroadcast(BroadcastOpsTuple::Multiply(), *y, *z);
auto timeEnd = std::chrono::system_clock::now();
auto outerTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeStart).count();
values.emplace_back(outerTime);
}
std::sort(values.begin(), values.end());
nd4j_printf("Time: %lld us;\n", values[values.size() / 2]);
for (int e = 0; e < pool; e++) {
delete aX[e];
delete aY[e];
delete aZ[e];
}
}
*/
/*
TEST_F(PlaygroundTests, test_s_0) {