R119 tests (#238)
* one small test Signed-off-by: raver119 <raver119@gmail.com> * one small test Signed-off-by: raver119 <raver119@gmail.com> * bert test Signed-off-by: raver119 <raver119@gmail.com> * Graph FlowPath fix Signed-off-by: raver119 <raver119@gmail.com> * - GraphProfiler tweaks - NodeProfile now includes shapes Signed-off-by: raver119 <raver119@gmail.com> * RELU_layer inplace tweak Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * identity tweaks Signed-off-by: raver119 <raver119@gmail.com> * bert result validation Signed-off-by: raver119 <raver119@gmail.com> * - bunch of Shape ops have inplace exec forbidden now - Legacy ops have inplace exec disabled by default now Signed-off-by: raver119 <raver119@gmail.com> * ffast-math enabled Signed-off-by: raver119 <raver119@gmail.com> * ffast-math enabled Signed-off-by: raver119 <raver119@gmail.com> * allow some legacy ops to be inplace Signed-off-by: raver119 <raver119@gmail.com> * disable -fast_math Signed-off-by: raver119 <raver119@gmail.com> * disable expensive test for cuda Signed-off-by: raver119 <raver119@gmail.com>master
parent
fe47f52896
commit
3de3cd8277
|
@ -179,7 +179,7 @@ namespace graph {
|
||||||
nd4j_debug("Embedded graph execution finished. %i variable(s) migrated\n", cnt);
|
nd4j_debug("Embedded graph execution finished. %i variable(s) migrated\n", cnt);
|
||||||
|
|
||||||
} else if (node->hasCustomOp()) {
|
} else if (node->hasCustomOp()) {
|
||||||
// if we have something to execute - lets just execute it.
|
// now, if we have something to execute - lets just execute it.
|
||||||
auto status = node->getCustomOp()->execute(&context);
|
auto status = node->getCustomOp()->execute(&context);
|
||||||
if (status != ND4J_STATUS_OK)
|
if (status != ND4J_STATUS_OK)
|
||||||
return status;
|
return status;
|
||||||
|
@ -494,8 +494,10 @@ Nd4jStatus GraphExecutioner::execute(Graph *graph, VariableSpace* variableSpace)
|
||||||
nd4j::memory::MemoryRegistrator::getInstance()->setGraphMemoryFootprintIfGreater(h, m);
|
nd4j::memory::MemoryRegistrator::getInstance()->setGraphMemoryFootprintIfGreater(h, m);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tempFlow)
|
if (tempFlow) {
|
||||||
delete flowPath;
|
delete flowPath;
|
||||||
|
__variableSpace->setFlowPath(nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,6 +58,7 @@ namespace nd4j {
|
||||||
virtual void putVariable(int id, Variable *variable);
|
virtual void putVariable(int id, Variable *variable);
|
||||||
virtual void putVariable(int id, NDArray *array);
|
virtual void putVariable(int id, NDArray *array);
|
||||||
virtual void putVariable(int id, int idx, NDArray *array);
|
virtual void putVariable(int id, int idx, NDArray *array);
|
||||||
|
virtual void putVariable(int id, int idx, NDArray &array);
|
||||||
virtual void putVariable(int id, int idx, Variable *array);
|
virtual void putVariable(int id, int idx, Variable *array);
|
||||||
|
|
||||||
virtual void replaceVariable(Variable *variable);
|
virtual void replaceVariable(Variable *variable);
|
||||||
|
|
|
@ -100,6 +100,7 @@ namespace nd4j {
|
||||||
virtual void putVariable(int id, Variable *variable);
|
virtual void putVariable(int id, Variable *variable);
|
||||||
virtual void putVariable(int id, NDArray *array);
|
virtual void putVariable(int id, NDArray *array);
|
||||||
virtual void putVariable(int id, int idx, NDArray *array);
|
virtual void putVariable(int id, int idx, NDArray *array);
|
||||||
|
virtual void putVariable(int id, int idx, NDArray &array);
|
||||||
virtual void putVariable(int id, int idx, Variable *array);
|
virtual void putVariable(int id, int idx, Variable *array);
|
||||||
|
|
||||||
virtual void dropVariable(std::pair<int,int> &pair);
|
virtual void dropVariable(std::pair<int,int> &pair);
|
||||||
|
|
|
@ -60,8 +60,11 @@ namespace nd4j {
|
||||||
result->_name = this->_name;
|
result->_name = this->_name;
|
||||||
result->_index = this->_index;
|
result->_index = this->_index;
|
||||||
|
|
||||||
if (this->_ndarray != nullptr)
|
if (this->_ndarray != nullptr) {
|
||||||
result->_ndarray = new NDArray(this->_ndarray->dup(this->_ndarray->ordering()));
|
result->_ndarray = new NDArray(this->_ndarray->dup(this->_ndarray->ordering()));
|
||||||
|
result->_readOnly = false;
|
||||||
|
result->_removable = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (this->_list != nullptr)
|
if (this->_list != nullptr)
|
||||||
result->_list = this->_list->clone();
|
result->_list = this->_list->clone();
|
||||||
|
|
|
@ -191,6 +191,9 @@ namespace nd4j {
|
||||||
_current->putVariable(id, array);
|
_current->putVariable(id, array);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void nd4j::graph::VariableProxy::putVariable(int id, int idx, NDArray &array) {
|
||||||
|
_current->putVariable(id, idx, array);
|
||||||
|
}
|
||||||
|
|
||||||
void VariableProxy::putVariable(int id, int idx, NDArray *array) {
|
void VariableProxy::putVariable(int id, int idx, NDArray *array) {
|
||||||
_current->putVariable(id, idx, array);
|
_current->putVariable(id, idx, array);
|
||||||
|
|
|
@ -263,19 +263,19 @@ namespace nd4j {
|
||||||
void nd4j::graph::VariableSpace::putVariable(int id, Variable *variable) {
|
void nd4j::graph::VariableSpace::putVariable(int id, Variable *variable) {
|
||||||
// we don't want to add variables more then once
|
// we don't want to add variables more then once
|
||||||
if (_variables.count(id) > 0 || _temporary.count(id) > 0) {
|
if (_variables.count(id) > 0 || _temporary.count(id) > 0) {
|
||||||
// nd4j_verbose("Trying to update variable for node_%i\n", id);
|
|
||||||
|
|
||||||
auto local = id < 0 ? _variables.at(id) : _temporary.at(id);
|
auto local = id < 0 ? _variables.at(id) : _temporary.at(id);
|
||||||
|
|
||||||
if (!local->hasNDArray() && variable->hasNDArray()) {
|
if (!local->hasNDArray() && variable->hasNDArray()) {
|
||||||
// nd4j_verbose("Saving variable for node_%i\n", id);
|
|
||||||
local->setNDArray(variable->getNDArray());
|
local->setNDArray(variable->getNDArray());
|
||||||
|
|
||||||
|
// we're inheriting this from Variable
|
||||||
|
local->markReadOnly(variable->isReadOnly());
|
||||||
|
local->markRemovable(variable->isRemovable());
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
//nd4j_debug("Adding Variable to Space: id: %i; Array is null: %i;\n", id, variable->getNDArray() == nullptr);
|
|
||||||
|
|
||||||
_varmap.lock();
|
_varmap.lock();
|
||||||
|
|
||||||
_handles->emplace_back(variable);
|
_handles->emplace_back(variable);
|
||||||
|
@ -314,6 +314,21 @@ namespace nd4j {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void nd4j::graph::VariableSpace::putVariable(int id, int idx, NDArray &array) {
|
||||||
|
auto *var = new nd4j::graph::Variable(&array, "", id, idx);
|
||||||
|
var->markRemovable(false);
|
||||||
|
var->markReadOnly(true);
|
||||||
|
|
||||||
|
// let's see if this op needs
|
||||||
|
bool d = this->hasVariable(id, idx);
|
||||||
|
|
||||||
|
this->putVariable(id, var);
|
||||||
|
|
||||||
|
// if var for this nodeid already exists - we'll just delete variable
|
||||||
|
if (d)
|
||||||
|
delete var;
|
||||||
|
}
|
||||||
|
|
||||||
void nd4j::graph::VariableSpace::putVariable(int id, NDArray *array) {
|
void nd4j::graph::VariableSpace::putVariable(int id, NDArray *array) {
|
||||||
auto *var = new nd4j::graph::Variable(array);
|
auto *var = new nd4j::graph::Variable(array);
|
||||||
this->putVariable(id, var);
|
this->putVariable(id, var);
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
#include <pointercast.h>
|
#include <pointercast.h>
|
||||||
#include <dll.h>
|
#include <dll.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
namespace nd4j {
|
namespace nd4j {
|
||||||
namespace graph {
|
namespace graph {
|
||||||
|
@ -65,6 +66,9 @@ namespace nd4j {
|
||||||
|
|
||||||
// total amount of memory used during execution
|
// total amount of memory used during execution
|
||||||
Nd4jLong _memoryTotal = 0L;
|
Nd4jLong _memoryTotal = 0L;
|
||||||
|
|
||||||
|
std::vector<std::string> _inputShapes;
|
||||||
|
std::vector<std::string> _outputShapes;
|
||||||
public:
|
public:
|
||||||
NodeProfile() = default;
|
NodeProfile() = default;
|
||||||
~NodeProfile() = default;
|
~NodeProfile() = default;
|
||||||
|
@ -84,10 +88,15 @@ namespace nd4j {
|
||||||
void setObjectsSize(Nd4jLong bytes);
|
void setObjectsSize(Nd4jLong bytes);
|
||||||
void setTotalSize(Nd4jLong bytes);
|
void setTotalSize(Nd4jLong bytes);
|
||||||
|
|
||||||
Nd4jLong getActivationsSize();
|
void addInputShape(Nd4jLong *shapeInfo);
|
||||||
Nd4jLong getTemporarySize();
|
void addOutputShape(Nd4jLong *shapeInfo);
|
||||||
Nd4jLong getObjectsSize();
|
|
||||||
Nd4jLong getTotalSize();
|
Nd4jLong getActivationsSize() const;
|
||||||
|
Nd4jLong getTemporarySize() const;
|
||||||
|
Nd4jLong getObjectsSize() const;
|
||||||
|
Nd4jLong getTotalSize() const;
|
||||||
|
|
||||||
|
Nd4jLong getExecutionTime() const;
|
||||||
|
|
||||||
std::string& name();
|
std::string& name();
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,8 @@
|
||||||
#include <graph/profiling/GraphProfile.h>
|
#include <graph/profiling/GraphProfile.h>
|
||||||
#include <helpers/logger.h>
|
#include <helpers/logger.h>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <templatemath.h>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
namespace nd4j {
|
namespace nd4j {
|
||||||
namespace graph {
|
namespace graph {
|
||||||
|
@ -184,8 +186,25 @@ namespace nd4j {
|
||||||
if (_profiles.empty())
|
if (_profiles.empty())
|
||||||
nd4j_printf("No nodes in graph\n","");
|
nd4j_printf("No nodes in graph\n","");
|
||||||
|
|
||||||
for (auto v: _profiles)
|
// printint out stuff
|
||||||
|
std::vector<NodeProfile*> sorted;
|
||||||
|
for (auto v: _profiles) {
|
||||||
v->printOut();
|
v->printOut();
|
||||||
|
sorted.emplace_back(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_profiles.size() > 1) {
|
||||||
|
// building hot spots
|
||||||
|
std::sort(sorted.begin(), sorted.end(), [](const NodeProfile *a, const NodeProfile *b) -> bool {
|
||||||
|
return a->getExecutionTime() > b->getExecutionTime();
|
||||||
|
});
|
||||||
|
|
||||||
|
nd4j_printf("\nTop 30 reports by EXEC:\n", "");
|
||||||
|
auto limit = nd4j::math::nd4j_min<int>(30, sorted.size());
|
||||||
|
for (int e = 0; e < limit; e++) {
|
||||||
|
sorted[e]->printOut();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
nd4j_printf("\nSpecial timers:\n", "");
|
nd4j_printf("\nSpecial timers:\n", "");
|
||||||
if (_timings.empty())
|
if (_timings.empty())
|
||||||
|
|
|
@ -32,7 +32,7 @@ namespace nd4j {
|
||||||
// graph->printOut();
|
// graph->printOut();
|
||||||
|
|
||||||
// warm up
|
// warm up
|
||||||
for (int e = 0; e < 1000; e++) {
|
for (int e = 0; e < iterations; e++) {
|
||||||
FlowPath fp;
|
FlowPath fp;
|
||||||
|
|
||||||
auto _vs = varSpace->clone();
|
auto _vs = varSpace->clone();
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
|
|
||||||
#include <helpers/logger.h>
|
#include <helpers/logger.h>
|
||||||
#include <graph/profiling/NodeProfile.h>
|
#include <graph/profiling/NodeProfile.h>
|
||||||
|
#include <helpers/ShapeUtils.h>
|
||||||
|
|
||||||
namespace nd4j {
|
namespace nd4j {
|
||||||
namespace graph {
|
namespace graph {
|
||||||
|
@ -35,9 +36,23 @@ namespace nd4j {
|
||||||
nd4j_printf(" Memory: ACT: %lld; TMP: %lld; OBJ: %lld; TTL: %lld;\n", _memoryActivations / _merges, _memoryTemporary / _merges, _memoryObjects / _merges, _memoryTotal / _merges);
|
nd4j_printf(" Memory: ACT: %lld; TMP: %lld; OBJ: %lld; TTL: %lld;\n", _memoryActivations / _merges, _memoryTemporary / _merges, _memoryObjects / _merges, _memoryTotal / _merges);
|
||||||
nd4j_printf(" Time: PREP: %lld ns; EXEC: %lld ns; TTL: %lld ns;\n", _preparationTime / _merges, _executionTime / _merges, _totalTime / _merges);
|
nd4j_printf(" Time: PREP: %lld ns; EXEC: %lld ns; TTL: %lld ns;\n", _preparationTime / _merges, _executionTime / _merges, _totalTime / _merges);
|
||||||
nd4j_printf(" PREP: INPUT: %lld ns; SHAPE: %lld ns; ARRAY: %lld ns;\n", _inputTime / _merges, _shapeTime / _merges, _arrayTime / _merges);
|
nd4j_printf(" PREP: INPUT: %lld ns; SHAPE: %lld ns; ARRAY: %lld ns;\n", _inputTime / _merges, _shapeTime / _merges, _arrayTime / _merges);
|
||||||
|
|
||||||
|
std::string inputs;
|
||||||
|
std::string outputs;
|
||||||
|
|
||||||
|
int cnt = 0;
|
||||||
|
for (const auto &v: _inputShapes)
|
||||||
|
inputs += v + " ";
|
||||||
|
|
||||||
|
for (const auto &v: _outputShapes)
|
||||||
|
outputs += v + " ";
|
||||||
|
|
||||||
|
|
||||||
|
nd4j_printf(" Inputs: %s\n", inputs.c_str());
|
||||||
|
nd4j_printf(" Outputs: %s\n", outputs.c_str());
|
||||||
};
|
};
|
||||||
|
|
||||||
Nd4jLong NodeProfile::getActivationsSize() {
|
Nd4jLong NodeProfile::getActivationsSize() const {
|
||||||
return _memoryActivations;
|
return _memoryActivations;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,15 +68,15 @@ namespace nd4j {
|
||||||
_inputTime = time;
|
_inputTime = time;
|
||||||
}
|
}
|
||||||
|
|
||||||
Nd4jLong NodeProfile::getTemporarySize() {
|
Nd4jLong NodeProfile::getTemporarySize() const{
|
||||||
return _memoryTemporary;
|
return _memoryTemporary;
|
||||||
}
|
}
|
||||||
|
|
||||||
Nd4jLong NodeProfile::getObjectsSize() {
|
Nd4jLong NodeProfile::getObjectsSize() const{
|
||||||
return _memoryObjects;
|
return _memoryObjects;
|
||||||
}
|
}
|
||||||
|
|
||||||
Nd4jLong NodeProfile::getTotalSize() {
|
Nd4jLong NodeProfile::getTotalSize() const{
|
||||||
return _memoryTotal;
|
return _memoryTotal;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -97,6 +112,18 @@ namespace nd4j {
|
||||||
_memoryTotal = bytes;
|
_memoryTotal = bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Nd4jLong NodeProfile::getExecutionTime() const {
|
||||||
|
return _executionTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
void NodeProfile::addInputShape(Nd4jLong *shapeInfo) {
|
||||||
|
_inputShapes.emplace_back(ShapeUtils::shapeAsString(shapeInfo));
|
||||||
|
}
|
||||||
|
|
||||||
|
void NodeProfile::addOutputShape(Nd4jLong *shapeInfo) {
|
||||||
|
_outputShapes.emplace_back(ShapeUtils::shapeAsString(shapeInfo));
|
||||||
|
}
|
||||||
|
|
||||||
void NodeProfile::merge(NodeProfile *other) {
|
void NodeProfile::merge(NodeProfile *other) {
|
||||||
_merges += other->_merges;
|
_merges += other->_merges;
|
||||||
_memoryObjects += other->_memoryObjects;
|
_memoryObjects += other->_memoryObjects;
|
||||||
|
@ -110,6 +137,9 @@ namespace nd4j {
|
||||||
_shapeTime += other->_shapeTime;
|
_shapeTime += other->_shapeTime;
|
||||||
_arrayTime += other->_arrayTime;
|
_arrayTime += other->_arrayTime;
|
||||||
_inputTime += other->_inputTime;
|
_inputTime += other->_inputTime;
|
||||||
|
|
||||||
|
_inputShapes = other->_inputShapes;
|
||||||
|
_outputShapes = other->_outputShapes;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string& NodeProfile::name() {
|
std::string& NodeProfile::name() {
|
||||||
|
@ -129,6 +159,9 @@ namespace nd4j {
|
||||||
_shapeTime = other->_shapeTime;
|
_shapeTime = other->_shapeTime;
|
||||||
_arrayTime = other->_arrayTime;
|
_arrayTime = other->_arrayTime;
|
||||||
_inputTime = other->_inputTime;
|
_inputTime = other->_inputTime;
|
||||||
|
|
||||||
|
_inputShapes = other->_inputShapes;
|
||||||
|
_outputShapes = other->_outputShapes;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -147,6 +147,9 @@ namespace nd4j {
|
||||||
// returns TRUE if this op allows in-place execution
|
// returns TRUE if this op allows in-place execution
|
||||||
bool allowsInplace();
|
bool allowsInplace();
|
||||||
|
|
||||||
|
// this method allows you to enable/disable inplace call for a given op
|
||||||
|
void allowInplace(bool reallyAllow);
|
||||||
|
|
||||||
// this method returns opNum (applicable for legacy XYZ ops only)
|
// this method returns opNum (applicable for legacy XYZ ops only)
|
||||||
int getOpNum();
|
int getOpNum();
|
||||||
|
|
||||||
|
|
|
@ -27,12 +27,10 @@ namespace nd4j {
|
||||||
namespace ops {
|
namespace ops {
|
||||||
OP_IMPL(identity, 1, 1, true) {
|
OP_IMPL(identity, 1, 1, true) {
|
||||||
auto first = INPUT_VARIABLE(0);
|
auto first = INPUT_VARIABLE(0);
|
||||||
auto z = this->getZ(block);
|
auto z = OUTPUT_VARIABLE(0);
|
||||||
|
|
||||||
// just for lulz
|
if (!block.isInplace())
|
||||||
first->applyTransform(nd4j::transform::Identity, *z);
|
first->applyTransform(nd4j::transform::Identity, *z);
|
||||||
|
|
||||||
STORE_RESULT(*z);
|
|
||||||
|
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
@ -60,8 +58,8 @@ namespace nd4j {
|
||||||
DECLARE_TYPES(identity_bp) {
|
DECLARE_TYPES(identity_bp) {
|
||||||
getOpDescriptor()
|
getOpDescriptor()
|
||||||
->setAllowedInputTypes(0, DataType::ANY)
|
->setAllowedInputTypes(0, DataType::ANY)
|
||||||
->setAllowedInputTypes(1, {DataType::FLOAT32, DataType ::DOUBLE, DataType::HALF})
|
->setAllowedInputTypes(1, {ALL_FLOATS})
|
||||||
->setAllowedOutputTypes(0, {DataType::FLOAT32, DataType ::DOUBLE, DataType::HALF});
|
->setAllowedOutputTypes(0, {ALL_FLOATS});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,22 +31,17 @@ namespace nd4j {
|
||||||
REQUIRE_TRUE(w->isMatrix(), 0, "relu_layer: weights argument should be a 2D tensor, but got rank %i instead!", w->rankOf());
|
REQUIRE_TRUE(w->isMatrix(), 0, "relu_layer: weights argument should be a 2D tensor, but got rank %i instead!", w->rankOf());
|
||||||
REQUIRE_TRUE(b->isVector(), 0, "relu_layer: biases argument should be a 1D tensor, but got rank %i instead!", b->rankOf());
|
REQUIRE_TRUE(b->isVector(), 0, "relu_layer: biases argument should be a 1D tensor, but got rank %i instead!", b->rankOf());
|
||||||
REQUIRE_TRUE(b->lengthOf() == w->sizeAt(1), 0, "relu_layer: biases array length should match to columns of weights matrix, however got length = %i and columns = %i!", b->lengthOf(), w->sizeAt(1));
|
REQUIRE_TRUE(b->lengthOf() == w->sizeAt(1), 0, "relu_layer: biases array length should match to columns of weights matrix, however got length = %i and columns = %i!", b->lengthOf(), w->sizeAt(1));
|
||||||
REQUIRE_TRUE(x->sizeAt(1) == w->sizeAt(0), 0, "relu_layer: number of x columns should match to row number of weights matrix, but got x_columns = %i and weights_rows = %i!",
|
REQUIRE_TRUE(x->sizeAt(1) == w->sizeAt(0), 0, "relu_layer: number of x columns should match to row number of weights matrix, but got x_columns = %i and weights_rows = %i!", x->sizeAt(1), w->sizeAt(0));
|
||||||
x->sizeAt(1), w->sizeAt(0));
|
|
||||||
|
|
||||||
|
|
||||||
auto output = OUTPUT_VARIABLE(0);
|
auto output = OUTPUT_VARIABLE(0);
|
||||||
//T bound = (T)0.f;
|
|
||||||
//nd4j_printf("Matrix x(%ix%i), Matrix w(%ix%i), b(1x%i)\n", x->sizeAt(0), x->sizeAt(1), w->sizeAt(0), w->sizeAt(1), b->lengthOf());
|
|
||||||
|
|
||||||
nd4j::ops::xw_plus_b op;
|
nd4j::ops::xw_plus_b op;
|
||||||
std::unique_ptr<ResultSet> result(op.evaluate({x, w, b}));
|
auto status = op.execute({x, w, b}, {output});
|
||||||
REQUIRE_TRUE(Status::OK() == result->status(), 0, "relu_layer: xw_plus_b op failed on input data.");
|
REQUIRE_TRUE(Status::OK() == status, 0, "relu_layer: xw_plus_b op failed on input data.");
|
||||||
|
|
||||||
auto scalar = block.numT() > 0 ? block.getTArguments()->at(0) : 0.0;
|
auto scalar = block.numT() > 0 ? block.getTArguments()->at(0) : 0.0;
|
||||||
|
|
||||||
auto xw = result->at(0);
|
output->applyScalar(nd4j::scalar::RELU, scalar, *output);
|
||||||
xw->applyScalar(nd4j::scalar::RELU, scalar, *output);
|
|
||||||
|
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,7 +28,7 @@ namespace nd4j {
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// here iArgs is a vector with (optional) negative of order as first element:
|
// here iArgs is a vector with (optional) negative of order as first element:
|
||||||
// ({-order, dim1, dim2, dim3, ...})
|
// ({-order, dim1, dim2, dim3, ...})
|
||||||
CUSTOM_OP_IMPL(reshape, 1, 1, true, 0, -2) {
|
CUSTOM_OP_IMPL(reshape, 1, 1, false, 0, -2) {
|
||||||
auto x = INPUT_VARIABLE(0);
|
auto x = INPUT_VARIABLE(0);
|
||||||
|
|
||||||
if (block.width() == 1) {
|
if (block.width() == 1) {
|
||||||
|
|
|
@ -28,7 +28,7 @@ namespace nd4j {
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
CUSTOM_OP_IMPL(reshapeas, 2, 1, true, 0, 0) {
|
CUSTOM_OP_IMPL(reshapeas, 2, 1, false, 0, 0) {
|
||||||
|
|
||||||
auto x = INPUT_VARIABLE(0);
|
auto x = INPUT_VARIABLE(0);
|
||||||
auto y = INPUT_VARIABLE(1);
|
auto y = INPUT_VARIABLE(1);
|
||||||
|
|
|
@ -25,7 +25,7 @@
|
||||||
|
|
||||||
namespace nd4j {
|
namespace nd4j {
|
||||||
namespace ops {
|
namespace ops {
|
||||||
CUSTOM_OP_IMPL(tile_to_shape, 1, 1, true, 0, -1) {
|
CUSTOM_OP_IMPL(tile_to_shape, 1, 1, false, 0, -1) {
|
||||||
|
|
||||||
auto input = INPUT_VARIABLE(0);
|
auto input = INPUT_VARIABLE(0);
|
||||||
auto output = OUTPUT_VARIABLE(0);
|
auto output = OUTPUT_VARIABLE(0);
|
||||||
|
|
|
@ -28,7 +28,7 @@ namespace nd4j {
|
||||||
namespace ops {
|
namespace ops {
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
CUSTOM_OP_IMPL(transpose, 1, 1, true, 0, 0) {
|
CUSTOM_OP_IMPL(transpose, 1, 1, false, 0, 0) {
|
||||||
auto x = INPUT_VARIABLE(0);
|
auto x = INPUT_VARIABLE(0);
|
||||||
if (block.width() == 1) {
|
if (block.width() == 1) {
|
||||||
if (block.isInplace()) {
|
if (block.isInplace()) {
|
||||||
|
|
|
@ -26,15 +26,15 @@
|
||||||
namespace nd4j {
|
namespace nd4j {
|
||||||
namespace ops {
|
namespace ops {
|
||||||
#if NOT_EXCLUDED(OP_permute)
|
#if NOT_EXCLUDED(OP_permute)
|
||||||
DECLARE_CUSTOM_OP(permute, 1, 1, true, 0, -2);
|
DECLARE_CUSTOM_OP(permute, 1, 1, false, 0, -2);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if NOT_EXCLUDED(OP_reshapeas)
|
#if NOT_EXCLUDED(OP_reshapeas)
|
||||||
DECLARE_CUSTOM_OP(reshapeas, 2, 1, true, 0, 0);
|
DECLARE_CUSTOM_OP(reshapeas, 2, 1, false, 0, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if NOT_EXCLUDED(OP_transpose)
|
#if NOT_EXCLUDED(OP_transpose)
|
||||||
DECLARE_CUSTOM_OP(transpose, 1, 1, true, 0, 0);
|
DECLARE_CUSTOM_OP(transpose, 1, 1, false, 0, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if NOT_EXCLUDED(OP_shape_of)
|
#if NOT_EXCLUDED(OP_shape_of)
|
||||||
|
@ -46,7 +46,7 @@ namespace nd4j {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if NOT_EXCLUDED(OP_squeeze)
|
#if NOT_EXCLUDED(OP_squeeze)
|
||||||
DECLARE_CUSTOM_OP(squeeze, 1, 1, true, 0, -2);
|
DECLARE_CUSTOM_OP(squeeze, 1, 1, false, 0, -2);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if NOT_EXCLUDED(OP_expand_dims)
|
#if NOT_EXCLUDED(OP_expand_dims)
|
||||||
|
@ -54,11 +54,11 @@ namespace nd4j {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if NOT_EXCLUDED(OP_reshape)
|
#if NOT_EXCLUDED(OP_reshape)
|
||||||
DECLARE_CUSTOM_OP(reshape, 1, 1, true, 0, -2);
|
DECLARE_CUSTOM_OP(reshape, 1, 1, false, 0, -2);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if NOT_EXCLUDED(OP_size_at)
|
#if NOT_EXCLUDED(OP_size_at)
|
||||||
DECLARE_CUSTOM_OP(size_at, 1, 1, true, 0, 1);
|
DECLARE_CUSTOM_OP(size_at, 1, 1, false, 0, 1);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -80,8 +80,8 @@ namespace nd4j {
|
||||||
* @tparam T
|
* @tparam T
|
||||||
*/
|
*/
|
||||||
#if NOT_EXCLUDED(OP_tile_to_shape)
|
#if NOT_EXCLUDED(OP_tile_to_shape)
|
||||||
DECLARE_CUSTOM_OP(tile_to_shape, 1, 1, true, 0, -1);
|
DECLARE_CUSTOM_OP(tile_to_shape, 1, 1, false, 0, -1);
|
||||||
DECLARE_CUSTOM_OP(tile_to_shape_bp, 2, 1, true, 0, -1);
|
DECLARE_CUSTOM_OP(tile_to_shape_bp, 2, 1, false, 0, -1);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -150,6 +150,22 @@ namespace nd4j {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx.isInplace()) {
|
if (ctx.isInplace()) {
|
||||||
|
if (Environment::getInstance()->isProfiling() && node != nullptr) {
|
||||||
|
if (ctx.isFastPath()) {
|
||||||
|
//
|
||||||
|
} else {
|
||||||
|
for (auto p: *ctx.inputs()) {
|
||||||
|
auto var = ctx.variable(p);
|
||||||
|
if (var->variableType() == VariableType::NDARRAY) {
|
||||||
|
NDArray *array = var->getNDArray();
|
||||||
|
|
||||||
|
node->addInputShape(array->shapeInfo());
|
||||||
|
node->addOutputShape(array->shapeInfo());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// do nothing, getZ result will do the trick
|
// do nothing, getZ result will do the trick
|
||||||
return static_cast<int>(ctx.width());
|
return static_cast<int>(ctx.width());
|
||||||
} else {
|
} else {
|
||||||
|
@ -192,6 +208,10 @@ namespace nd4j {
|
||||||
auto inputTime = std::chrono::duration_cast<std::chrono::nanoseconds>(inputEnd - inputStart).count();
|
auto inputTime = std::chrono::duration_cast<std::chrono::nanoseconds>(inputEnd - inputStart).count();
|
||||||
node->setInputTime(inputTime);
|
node->setInputTime(inputTime);
|
||||||
|
|
||||||
|
// saving output shapes in profile
|
||||||
|
for (int e = 0; e < inSha.size(); e++)
|
||||||
|
node->addInputShape(inSha.at(e));
|
||||||
|
|
||||||
shapeStart = std::chrono::system_clock::now();
|
shapeStart = std::chrono::system_clock::now();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -204,6 +224,10 @@ namespace nd4j {
|
||||||
auto prepTime = std::chrono::duration_cast<std::chrono::nanoseconds>(shapeEnd - shapeStart).count();
|
auto prepTime = std::chrono::duration_cast<std::chrono::nanoseconds>(shapeEnd - shapeStart).count();
|
||||||
node->setShapeFunctionTime(prepTime);
|
node->setShapeFunctionTime(prepTime);
|
||||||
|
|
||||||
|
// saving output shapes in profile
|
||||||
|
for (int e = 0; e < outSha->size(); e++)
|
||||||
|
node->addOutputShape(outSha->at(e));
|
||||||
|
|
||||||
arrayStart = std::chrono::system_clock::now();
|
arrayStart = std::chrono::system_clock::now();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -562,7 +586,7 @@ namespace nd4j {
|
||||||
block->setInnerTime(outerTime);
|
block->setInnerTime(outerTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Environment::getInstance()->isProfiling()) {
|
if (Environment::getInstance()->isProfiling() && !block->isFastPath()) {
|
||||||
auto fp = block->getVariableSpace()->flowPath();
|
auto fp = block->getVariableSpace()->flowPath();
|
||||||
if (fp != nullptr) {
|
if (fp != nullptr) {
|
||||||
auto p = fp->profile();
|
auto p = fp->profile();
|
||||||
|
|
|
@ -23,11 +23,11 @@
|
||||||
|
|
||||||
namespace nd4j {
|
namespace nd4j {
|
||||||
namespace ops {
|
namespace ops {
|
||||||
LegacyOp::LegacyOp(int numInputs) : DeclarableOp::DeclarableOp(numInputs , 1, "LegacyOp", true) {
|
LegacyOp::LegacyOp(int numInputs) : DeclarableOp::DeclarableOp(numInputs , 1, "LegacyOp", false) {
|
||||||
_numInputs = numInputs;
|
_numInputs = numInputs;
|
||||||
}
|
}
|
||||||
|
|
||||||
LegacyOp::LegacyOp(int numInputs, int opNum) : DeclarableOp::DeclarableOp(numInputs , 1, "LegacyOp", true) {
|
LegacyOp::LegacyOp(int numInputs, int opNum) : DeclarableOp::DeclarableOp(numInputs , 1, "LegacyOp", false) {
|
||||||
_opNum = opNum;
|
_opNum = opNum;
|
||||||
_numInputs = numInputs;
|
_numInputs = numInputs;
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,11 +25,11 @@
|
||||||
namespace nd4j {
|
namespace nd4j {
|
||||||
namespace ops {
|
namespace ops {
|
||||||
LegacyPairwiseTransformOp::LegacyPairwiseTransformOp() : LegacyOp::LegacyOp(2) {
|
LegacyPairwiseTransformOp::LegacyPairwiseTransformOp() : LegacyOp::LegacyOp(2) {
|
||||||
// just a no-op
|
this->getOpDescriptor()->allowInplace(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
LegacyPairwiseTransformOp::LegacyPairwiseTransformOp(int opNum) : LegacyOp::LegacyOp(2, opNum) {
|
LegacyPairwiseTransformOp::LegacyPairwiseTransformOp(int opNum) : LegacyOp::LegacyOp(2, opNum) {
|
||||||
// just a no-op
|
this->getOpDescriptor()->allowInplace(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
LegacyOp* LegacyPairwiseTransformOp::clone() {
|
LegacyOp* LegacyPairwiseTransformOp::clone() {
|
||||||
|
|
|
@ -26,11 +26,11 @@
|
||||||
namespace nd4j {
|
namespace nd4j {
|
||||||
namespace ops {
|
namespace ops {
|
||||||
LegacyScalarOp::LegacyScalarOp() : LegacyOp::LegacyOp(1) {
|
LegacyScalarOp::LegacyScalarOp() : LegacyOp::LegacyOp(1) {
|
||||||
// no-op
|
this->getOpDescriptor()->allowInplace(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
LegacyScalarOp::LegacyScalarOp(int opNum) : LegacyOp::LegacyOp(1, opNum){
|
LegacyScalarOp::LegacyScalarOp(int opNum) : LegacyOp::LegacyOp(1, opNum){
|
||||||
// no-op
|
this->getOpDescriptor()->allowInplace(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
LegacyOp* LegacyScalarOp::clone() {
|
LegacyOp* LegacyScalarOp::clone() {
|
||||||
|
@ -66,6 +66,7 @@ namespace nd4j {
|
||||||
|
|
||||||
NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(), extras.argumentsAsT(z->dataType()));
|
NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(), extras.argumentsAsT(z->dataType()));
|
||||||
|
|
||||||
|
NDArray::registerSpecialUse({z}, {x, y});
|
||||||
} else if (block.getTArguments()->size() > 0) {
|
} else if (block.getTArguments()->size() > 0) {
|
||||||
auto y = NDArrayFactory::create(x->dataType(), T_ARG(0), block.launchContext());
|
auto y = NDArrayFactory::create(x->dataType(), T_ARG(0), block.launchContext());
|
||||||
|
|
||||||
|
@ -78,10 +79,9 @@ namespace nd4j {
|
||||||
NDArray::prepareSpecialUse({z}, {x, _scalar});
|
NDArray::prepareSpecialUse({z}, {x, _scalar});
|
||||||
|
|
||||||
NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), _scalar->buffer(), _scalar->shapeInfo(), _scalar->specialBuffer(), _scalar->specialShapeInfo(), extras.argumentsAsT(z->dataType()));
|
NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), _scalar->buffer(), _scalar->shapeInfo(), _scalar->specialBuffer(), _scalar->specialShapeInfo(), extras.argumentsAsT(z->dataType()));
|
||||||
}
|
|
||||||
|
|
||||||
manager.synchronize();
|
NDArray::registerSpecialUse({z}, {x, _scalar});
|
||||||
STORE_RESULT(*z);
|
}
|
||||||
|
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,11 +26,11 @@
|
||||||
namespace nd4j {
|
namespace nd4j {
|
||||||
namespace ops {
|
namespace ops {
|
||||||
LegacyTransformSameOp::LegacyTransformSameOp() : LegacyOp::LegacyOp(1) {
|
LegacyTransformSameOp::LegacyTransformSameOp() : LegacyOp::LegacyOp(1) {
|
||||||
// just a no-op
|
this->getOpDescriptor()->allowInplace(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
LegacyTransformSameOp::LegacyTransformSameOp(int opNum) : LegacyOp::LegacyOp(1, opNum) {
|
LegacyTransformSameOp::LegacyTransformSameOp(int opNum) : LegacyOp::LegacyOp(1, opNum) {
|
||||||
// just a no-op
|
this->getOpDescriptor()->allowInplace(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
LegacyOp* LegacyTransformSameOp::clone() {
|
LegacyOp* LegacyTransformSameOp::clone() {
|
||||||
|
|
|
@ -26,11 +26,11 @@
|
||||||
namespace nd4j {
|
namespace nd4j {
|
||||||
namespace ops {
|
namespace ops {
|
||||||
LegacyTransformStrictOp::LegacyTransformStrictOp() : LegacyOp::LegacyOp(1) {
|
LegacyTransformStrictOp::LegacyTransformStrictOp() : LegacyOp::LegacyOp(1) {
|
||||||
// just a no-op
|
this->getOpDescriptor()->allowInplace(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
LegacyTransformStrictOp::LegacyTransformStrictOp(int opNum) : LegacyOp::LegacyOp(1, opNum) {
|
LegacyTransformStrictOp::LegacyTransformStrictOp(int opNum) : LegacyOp::LegacyOp(1, opNum) {
|
||||||
// just a no-op
|
this->getOpDescriptor()->allowInplace(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
LegacyOp* LegacyTransformStrictOp::clone() {
|
LegacyOp* LegacyTransformStrictOp::clone() {
|
||||||
|
|
|
@ -50,6 +50,9 @@ namespace nd4j {
|
||||||
_scalar = isScalar;
|
_scalar = isScalar;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void OpDescriptor::allowInplace(bool reallyAllow){
|
||||||
|
_allowsInplace = reallyAllow;
|
||||||
|
}
|
||||||
|
|
||||||
bool OpDescriptor::operator==(const OpDescriptor& other) const {
|
bool OpDescriptor::operator==(const OpDescriptor& other) const {
|
||||||
if (_hash == -1 && other._hash == -1)
|
if (_hash == -1 && other._hash == -1)
|
||||||
|
|
|
@ -52,7 +52,7 @@ elseif(WIN32)
|
||||||
set(CMAKE_CXX_FLAGS " -fPIC -fmax-errors=2")
|
set(CMAKE_CXX_FLAGS " -fPIC -fmax-errors=2")
|
||||||
endif()
|
endif()
|
||||||
else()
|
else()
|
||||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
|
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
|
||||||
set(CMAKE_CXX_FLAGS " -fPIC -fmax-errors=2")
|
set(CMAKE_CXX_FLAGS " -fPIC -fmax-errors=2")
|
||||||
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64*")
|
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64*")
|
||||||
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -mcpu=native")
|
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -mcpu=native")
|
||||||
|
|
|
@ -3087,6 +3087,10 @@ TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_03_3) {
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_4) {
|
TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_4) {
|
||||||
|
#ifdef FFAST_MATH
|
||||||
|
if (1 > 0)
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
|
||||||
NDArray x = NDArrayFactory::create<float>('c', {2,4,5,3});
|
NDArray x = NDArrayFactory::create<float>('c', {2,4,5,3});
|
||||||
NDArray exp = NDArrayFactory::create<float>('c', {2,4,5,3},{
|
NDArray exp = NDArrayFactory::create<float>('c', {2,4,5,3},{
|
||||||
|
|
|
@ -78,6 +78,11 @@ TEST_F(DeclarableOpsTests14, Test_Inf_Comparison_1) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DeclarableOpsTests14, Test_Inf_Comparison_2) {
|
TEST_F(DeclarableOpsTests14, Test_Inf_Comparison_2) {
|
||||||
|
#ifdef FFAST_MATH
|
||||||
|
if (1 > 0)
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
|
||||||
auto x = NDArrayFactory::create<double>('c', {5}, {1, 2, 3, std::numeric_limits<double>::infinity(), 5});
|
auto x = NDArrayFactory::create<double>('c', {5}, {1, 2, 3, std::numeric_limits<double>::infinity(), 5});
|
||||||
auto y = NDArrayFactory::create<double>('c', {5}, {1, 2, 3, -std::numeric_limits<double>::infinity(), 5});
|
auto y = NDArrayFactory::create<double>('c', {5}, {1, 2, 3, -std::numeric_limits<double>::infinity(), 5});
|
||||||
|
|
||||||
|
@ -332,6 +337,10 @@ TEST_F(DeclarableOpsTests14, test_empty_reduce_max_1) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DeclarableOpsTests14, test_empty_reduce_sum_1) {
|
TEST_F(DeclarableOpsTests14, test_empty_reduce_sum_1) {
|
||||||
|
#ifdef FFAST_MATH
|
||||||
|
if (1 > 0)
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
|
||||||
auto e = NDArrayFactory::create<float>('c', {1, 0});
|
auto e = NDArrayFactory::create<float>('c', {1, 0});
|
||||||
nd4j::ops::reduce_sum sumOp;
|
nd4j::ops::reduce_sum sumOp;
|
||||||
|
@ -343,6 +352,10 @@ TEST_F(DeclarableOpsTests14, test_empty_reduce_sum_1) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DeclarableOpsTests14, test_empty_reduce_mean_1) {
|
TEST_F(DeclarableOpsTests14, test_empty_reduce_mean_1) {
|
||||||
|
#ifdef FFAST_MATH
|
||||||
|
if (1 > 0)
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
|
||||||
auto e = NDArrayFactory::create<float>('c', {1, 0});
|
auto e = NDArrayFactory::create<float>('c', {1, 0});
|
||||||
nd4j::ops::reduce_mean sumOp;
|
nd4j::ops::reduce_mean sumOp;
|
||||||
|
|
|
@ -584,6 +584,11 @@ TEST_F(DeclarableOpsTests15, test_check_numeric_1) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DeclarableOpsTests15, test_check_numeric_2) {
|
TEST_F(DeclarableOpsTests15, test_check_numeric_2) {
|
||||||
|
#ifdef FFAST_MATH
|
||||||
|
if (1 > 0)
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
|
||||||
auto x = NDArrayFactory::create<float>('c', {3},{1.f, 2.f, std::numeric_limits<float>::infinity()});
|
auto x = NDArrayFactory::create<float>('c', {3},{1.f, 2.f, std::numeric_limits<float>::infinity()});
|
||||||
auto y = NDArrayFactory::string("should trigger");
|
auto y = NDArrayFactory::string("should trigger");
|
||||||
auto z = NDArrayFactory::create<float>('c', {3} );
|
auto z = NDArrayFactory::create<float>('c', {3} );
|
||||||
|
@ -598,6 +603,11 @@ TEST_F(DeclarableOpsTests15, test_check_numeric_2) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DeclarableOpsTests15, test_check_numeric_3) {
|
TEST_F(DeclarableOpsTests15, test_check_numeric_3) {
|
||||||
|
#ifdef FFAST_MATH
|
||||||
|
if (1 > 0)
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
|
||||||
auto x = NDArrayFactory::create<float>('c', {3},{1.f, 2.f, std::numeric_limits<float>::quiet_NaN()});
|
auto x = NDArrayFactory::create<float>('c', {3},{1.f, 2.f, std::numeric_limits<float>::quiet_NaN()});
|
||||||
auto y = NDArrayFactory::string("should trigger");
|
auto y = NDArrayFactory::string("should trigger");
|
||||||
auto z = NDArrayFactory::create<float>('c', {3} );
|
auto z = NDArrayFactory::create<float>('c', {3} );
|
||||||
|
@ -1530,6 +1540,10 @@ TEST_F(DeclarableOpsTests15, Pow_BP_Test10) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DeclarableOpsTests15, Pow_BP_Test11) {
|
TEST_F(DeclarableOpsTests15, Pow_BP_Test11) {
|
||||||
|
#ifdef FFAST_MATH
|
||||||
|
if (1 > 0)
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
|
||||||
NDArray xB('c', { 3,2,1 }, { .4, 3, 5, .8, -9, -12 }, nd4j::DataType::FLOAT32);
|
NDArray xB('c', { 3,2,1 }, { .4, 3, 5, .8, -9, -12 }, nd4j::DataType::FLOAT32);
|
||||||
NDArray yB('c', { 1,2,3 }, { 3, -2, .4, -4, 10, .8 }, nd4j::DataType::FLOAT32);
|
NDArray yB('c', { 1,2,3 }, { 3, -2, .4, -4, 10, .8 }, nd4j::DataType::FLOAT32);
|
||||||
|
|
|
@ -65,6 +65,110 @@ TEST_F(PlaygroundTests, test_avx) {
|
||||||
nd4j_printf("Optimal level: %i; Binary level: %i;\n", ::optimalLevel(), ::binaryLevel());
|
nd4j_printf("Optimal level: %i; Binary level: %i;\n", ::optimalLevel(), ::binaryLevel());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
TEST_F(PlaygroundTests, test_bert_1) {
|
||||||
|
// this test will run ONLY if this model exists
|
||||||
|
if (nd4j::graph::getFileSize("/home/raver119/Downloads/Bert_minimal_model/bert_minimal_model.fb") < 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
auto graph = GraphExecutioner::importFromFlatBuffers("/home/raver119/Downloads/Bert_minimal_model/bert_minimal_model.fb");
|
||||||
|
|
||||||
|
auto t = NDArrayFactory::fromNpyFile("/home/raver119/Downloads/Bert_minimal_model/bert_minimal_input_IteratorGetNext.numpy");
|
||||||
|
auto u = NDArrayFactory::fromNpyFile("/home/raver119/Downloads/Bert_minimal_model/bert_minimal_input_IteratorGetNext_1.numpy");
|
||||||
|
auto v = NDArrayFactory::fromNpyFile("/home/raver119/Downloads/Bert_minimal_model/bert_minimal_input_IteratorGetNext_4.numpy");
|
||||||
|
auto z = NDArrayFactory::fromNpyFile("/home/raver119/Downloads/Bert_minimal_model/bert_minimal_model_output.numpy");
|
||||||
|
|
||||||
|
//graph->printOut();
|
||||||
|
|
||||||
|
graph->tagInplaceNodes();
|
||||||
|
|
||||||
|
graph->getVariableSpace()->putVariable(85,0, t);
|
||||||
|
graph->getVariableSpace()->putVariable(86,0, u);
|
||||||
|
graph->getVariableSpace()->putVariable(87,0, v);
|
||||||
|
|
||||||
|
/*
|
||||||
|
// validating graph now
|
||||||
|
auto status = GraphExecutioner::execute(graph);
|
||||||
|
ASSERT_EQ(Status::OK(), status);
|
||||||
|
ASSERT_TRUE(graph->getVariableSpace()->hasVariable(198));
|
||||||
|
|
||||||
|
auto array = graph->getVariableSpace()->getVariable(198)->getNDArray();
|
||||||
|
ASSERT_EQ(z, *array);
|
||||||
|
*/
|
||||||
|
|
||||||
|
nd4j::Environment::getInstance()->setProfiling(true);
|
||||||
|
auto profile = GraphProfilingHelper::profile(graph, 1);
|
||||||
|
|
||||||
|
profile->printOut();
|
||||||
|
|
||||||
|
nd4j::Environment::getInstance()->setProfiling(false);
|
||||||
|
delete profile;
|
||||||
|
|
||||||
|
/*
|
||||||
|
std::vector<Nd4jLong> values;
|
||||||
|
|
||||||
|
for (int e = 0; e < 1; e++) {
|
||||||
|
auto timeStart = std::chrono::system_clock::now();
|
||||||
|
|
||||||
|
GraphExecutioner::execute(graph);
|
||||||
|
|
||||||
|
auto timeEnd = std::chrono::system_clock::now();
|
||||||
|
auto outerTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeStart).count();
|
||||||
|
values.emplace_back(outerTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::sort(values.begin(), values.end());
|
||||||
|
|
||||||
|
nd4j_printf("Time: %lld us;\n", values[values.size() / 2]);
|
||||||
|
*/
|
||||||
|
|
||||||
|
delete graph;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
TEST_F(PlaygroundTests, test_broadcast_1) {
|
||||||
|
int pool = 10;
|
||||||
|
std::vector<NDArray*> aX(pool);
|
||||||
|
std::vector<NDArray*> aY(pool);
|
||||||
|
std::vector<NDArray*> aZ(pool);
|
||||||
|
|
||||||
|
for (int e = 0; e < pool; e++) {
|
||||||
|
aX[e] = NDArrayFactory::create_<float>('c', {64, 128, 1});
|
||||||
|
aY[e] = NDArrayFactory::create_<float>('c', {768});
|
||||||
|
aZ[e] = NDArrayFactory::create_<float>('c', {64, 128, 768});
|
||||||
|
|
||||||
|
aX[e]->assign(119 * (e+1));
|
||||||
|
aY[e]->assign(119 * (e+3));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Nd4jLong> values;
|
||||||
|
|
||||||
|
for (int e = 0; e < 1000; e++) {
|
||||||
|
auto x = aX[e < pool ? e : e % pool];
|
||||||
|
auto y = aY[e < pool ? e : e % pool];
|
||||||
|
auto z = aZ[e < pool ? e : e % pool];
|
||||||
|
|
||||||
|
auto timeStart = std::chrono::system_clock::now();
|
||||||
|
|
||||||
|
x->applyTrueBroadcast(BroadcastOpsTuple::Multiply(), *y, *z);
|
||||||
|
|
||||||
|
auto timeEnd = std::chrono::system_clock::now();
|
||||||
|
auto outerTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeStart).count();
|
||||||
|
values.emplace_back(outerTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::sort(values.begin(), values.end());
|
||||||
|
|
||||||
|
nd4j_printf("Time: %lld us;\n", values[values.size() / 2]);
|
||||||
|
|
||||||
|
for (int e = 0; e < pool; e++) {
|
||||||
|
delete aX[e];
|
||||||
|
delete aY[e];
|
||||||
|
delete aZ[e];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
||||||
TEST_F(PlaygroundTests, test_s_0) {
|
TEST_F(PlaygroundTests, test_s_0) {
|
||||||
|
|
Loading…
Reference in New Issue