Shyrma temp (#131)
* - specifying template instantiation for certain types in float16 and bloat16 Signed-off-by: Yurii <iuriish@yahoo.com> * - polishing bfloat16 and float16 member functions template specialization Signed-off-by: Yurii <iuriish@yahoo.com> * - rewrite and overload array +-*/ scalar and scalar +-*/ arr in NDAray class Signed-off-by: Yurii <iuriish@yahoo.com> * - make corrections which have to do with and rvalue lvalue conversions Signed-off-by: Yurii <iuriish@yahoo.com> * - provide move semantic in NDArray operators array +-/* array Signed-off-by: Yurii <iuriish@yahoo.com> * float16/bfloat16 tweaks Signed-off-by: raver119 <raver119@gmail.com> * one more tweak Signed-off-by: raver119 <raver119@gmail.com> * - make float16 and bfloat16 to compile successfully on cuda Signed-off-by: Yurii <iuriish@yahoo.com> * - do not use resources of view-like arrays when move semantics is applied Signed-off-by: Yurii <iuriish@yahoo.com> * - get rid of pointers in signatures NDArray methods 1 Signed-off-by: Yurii <iuriish@yahoo.com> * - correction of signature of NDArray::dup method Signed-off-by: Yurii <iuriish@yahoo.com> * - correction of signature of NDArray::reduceAlongDimension method Signed-off-by: Yurii <iuriish@yahoo.com> * - signature correction of NDArray::applyIndexReduce and applyTrueBroadcast methods Signed-off-by: Yurii <iuriish@yahoo.com> * - signature correction of NDArray::applyReduce3 and varianceAlongDimension methods Signed-off-by: Yurii <iuriish@yahoo.com> * - signature correction of NDArray::tensorsAlongDimension and diagonal methods Signed-off-by: Yurii <iuriish@yahoo.com> * - signature correction of NDArray::allTensorsAlongDimension Signed-off-by: Yurii <iuriish@yahoo.com> * - signature correction of NDArray::reduceAlongDimension 2 Signed-off-by: Yurii <iuriish@yahoo.com> * - signature correction of NDArray::applyTransform 2 Signed-off-by: Yurii <iuriish@yahoo.com> * - signature correction of NDArray::applyPairwiseTransform 2 Signed-off-by: Yurii <iuriish@yahoo.com> * - signature correction of NDArray::applyBroadcast 2 Signed-off-by: Yurii <iuriish@yahoo.com> * - signature correction of NDArray::applyTrueBroadcast 2 Signed-off-by: Yurii <iuriish@yahoo.com> * - signature correction of NDArray::applyScalar and applyScalarArr Signed-off-by: Yurii <iuriish@yahoo.com> * - signature correction of NDArray::lambda methods Signed-off-by: Yurii <iuriish@yahoo.com> * - signature correction of NDArray::reduce3 methods 2 Signed-off-by: Yurii <iuriish@yahoo.com> * - signature correction of following NDArray methods: add/sub/mul/div row/column and fillAsTriangular Signed-off-by: Yurii <iuriish@yahoo.com> * - signature correction of NDArray::tileToShape methods Signed-off-by: Yurii <iuriish@yahoo.com> * - signature correction of NDArray::isShapeSameStrict method Signed-off-by: Yurii <iuriish@yahoo.com> * minor corrections in tests Signed-off-by: Yurii <iuriish@yahoo.com> * - replace reduce op in batchnorm mkldnn Signed-off-by: Yurii <iuriish@yahoo.com> * - add explicit templates instantiations for operator+(NDArray&&. const scalar) Signed-off-by: Yurii <iuriish@yahoo.com> * - corrections of casts in float16/bfloat16 Signed-off-by: Yurii <iuriish@yahoo.com> * - provide move semantics in following NDArray methods: transform, applyTrueBroadcast, transpose, reshape, permute Signed-off-by: Yurii <iuriish@yahoo.com> * - get rid of input array A duplicate in svd cuda op Signed-off-by: Yurii <iuriish@yahoo.com> * - avoid available bug in svd cuda API Signed-off-by: Yurii <iuriish@yahoo.com> * - add temporary global memory buffer in svd cuda when calcUV = false and m != n Signed-off-by: Yurii <iuriish@yahoo.com> * - remove test with blfoat16 type for betainC Signed-off-by: Yurii <iuriish@yahoo.com> * - resolve conflicts after master has been merged in Signed-off-by: Yurii <iuriish@yahoo.com> * - changed type of affected input array in fused_batch_norm Signed-off-by: Yurii <iuriish@yahoo.com> * - add several explicit type castings Signed-off-by: Yurii <iuriish@yahoo.com> * - add ND4J_EXPORT to operators Signed-off-by: Yurii <iuriish@yahoo.com> * - add explicit template types in instantiations of template arithm operators of NDArray class Signed-off-by: Yurii <iuriish@yahoo.com> * - one more test fix Signed-off-by: Yurii <iuriish@yahoo.com> Co-authored-by: raver119 <raver119@gmail.com>master
parent
3e0afadea1
commit
5d9b2a16e5
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -133,7 +133,7 @@ namespace graph {
|
|||
if (variableSpace->hasVariable(v->getName())) {
|
||||
// symbolic feeder
|
||||
auto array = variableSpace->getVariable(v->getName())->getNDArray();
|
||||
auto vr = array->dup();
|
||||
auto vr = new NDArray(array->dup());
|
||||
// deletables.push_back(vr);
|
||||
v->setNDArray(vr);
|
||||
} else {
|
||||
|
@ -145,7 +145,7 @@ namespace graph {
|
|||
// if we're not using symbolic lookup - we'll use sequential approach then
|
||||
auto p = node->input()->at(cnt);
|
||||
auto array = variableSpace->getVariable(p)->getNDArray();
|
||||
auto vr = array->dup();
|
||||
auto vr = new NDArray(array->dup());
|
||||
//deletables.push_back(vr);
|
||||
v->setNDArray(vr);
|
||||
}
|
||||
|
|
|
@ -71,44 +71,41 @@ void NDArray::makeBothBuffersActual() const { }
|
|||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
template <typename T>
|
||||
void NDArray::fillAsTriangular(const float val, int lower, int upper, const char direction, NDArray* target) {
|
||||
void NDArray::fillAsTriangular(const float val, int lower, int upper, NDArray& target, const char direction) {
|
||||
|
||||
if (isS())
|
||||
throw std::runtime_error("NDArray::fillArrayAsTriangular: you can't use this method on String array!");
|
||||
|
||||
if(target == nullptr)
|
||||
target = this;
|
||||
|
||||
if(!isSameShape(target) && !(rankOf() == 1 && target->rankOf() == 2 && sizeAt(0) == target->sizeAt(0) && sizeAt(0) == target->sizeAt(1)))
|
||||
if(!isSameShape(target) && !(rankOf() == 1 && target.rankOf() == 2 && sizeAt(0) == target.sizeAt(0) && sizeAt(0) == target.sizeAt(1)))
|
||||
throw std::string("NDArray::fillArrayAsTriangular method: wrong shape of target array !");
|
||||
|
||||
if (direction == 'u')
|
||||
lower = -target->sizeAt(-2);
|
||||
lower = -target.sizeAt(-2);
|
||||
else if (direction == 'l')
|
||||
upper = target->sizeAt(-1);
|
||||
upper = target.sizeAt(-1);
|
||||
|
||||
const T value = static_cast<T>(val);
|
||||
const auto x = reinterpret_cast<const T*>(getBuffer());
|
||||
auto z = reinterpret_cast<T*>(target->getBuffer());
|
||||
auto z = reinterpret_cast<T*>(target.getBuffer());
|
||||
|
||||
const int xRank = rankOf();
|
||||
const int zRank = target->rankOf();
|
||||
const int zRank = target.rankOf();
|
||||
|
||||
const auto zLen = target->lengthOf();
|
||||
const auto zLen = target.lengthOf();
|
||||
|
||||
const bool areSameOffsets = shape::haveSameShapeAndStrides(getShapeInfo(), target->getShapeInfo());
|
||||
const bool areSameOffsets = shape::haveSameShapeAndStrides(getShapeInfo(), target.getShapeInfo());
|
||||
|
||||
|
||||
auto func = PRAGMA_THREADS_FOR {
|
||||
Nd4jLong coords[MAX_RANK];
|
||||
for (auto i = start; i < stop; i += increment) {
|
||||
shape::index2coords(i, target->getShapeInfo(), coords);
|
||||
const auto zOffset = shape::getOffset(target->getShapeInfo(), coords);
|
||||
shape::index2coords(i, target.getShapeInfo(), coords);
|
||||
const auto zOffset = shape::getOffset(target.getShapeInfo(), coords);
|
||||
|
||||
// if( (row + upper < col) || (row + lower > col) )
|
||||
if ((coords[zRank - 2] + upper < coords[zRank - 1]) || (coords[zRank - 2] + lower > coords[zRank - 1]))
|
||||
z[zOffset] = value;
|
||||
else if (this != target) { // when this and target are different arrays
|
||||
else if (this != &target) { // when this and target are different arrays
|
||||
if (xRank != zRank)
|
||||
coords[0] = coords[1];
|
||||
|
||||
|
@ -120,7 +117,7 @@ void NDArray::fillAsTriangular(const float val, int lower, int upper, const char
|
|||
|
||||
samediff::Threads::parallel_for(func, 0, zLen);
|
||||
}
|
||||
BUILD_SINGLE_TEMPLATE(template void NDArray::fillAsTriangular, (const float val, int lower, int upper, const char direction, NDArray* target), LIBND4J_TYPES);
|
||||
BUILD_SINGLE_TEMPLATE(template void NDArray::fillAsTriangular, (const float val, int lower, int upper, NDArray& target, const char direction), LIBND4J_TYPES);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
void NDArray::setIdentity() {
|
||||
|
@ -405,11 +402,11 @@ static void repeat_(const NDArray& input, NDArray& output, const std::vector<int
|
|||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// create new array by repeating it the number of times given by repeats
|
||||
NDArray* NDArray::repeat(const int axis, const std::vector<int>& repeats) const {
|
||||
NDArray NDArray::repeat(const int axis, const std::vector<int>& repeats) const {
|
||||
|
||||
auto output = new NDArray('c', ShapeUtils::evalRepeatShape(axis, repeats, *this), dataType(), getContext());
|
||||
NDArray output('c', ShapeUtils::evalRepeatShape(axis, repeats, *this), dataType(), getContext());
|
||||
|
||||
BUILD_SINGLE_SELECTOR_TWICE(dataType(), repeat_, (*this, *output, repeats, axis), LIBND4J_TYPES);
|
||||
BUILD_SINGLE_SELECTOR_TWICE(dataType(), repeat_, (*this, output, repeats, axis), LIBND4J_TYPES);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
|
|
@ -2,35 +2,24 @@
|
|||
|
||||
|
||||
template<typename T>
|
||||
void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<T(T, T, T)>& func, NDArray* target) {
|
||||
if (target == nullptr)
|
||||
target = this;
|
||||
void NDArray::applyTriplewiseLambda(NDArray& second, NDArray& third, const std::function<T(T, T, T)>& func, NDArray& target) {
|
||||
|
||||
if (second == nullptr) {
|
||||
nd4j_printf("applyTriplewiseLambda requires three operands to be valid NDArrays, but Second is NULL\n","");
|
||||
throw std::runtime_error("second is null");
|
||||
}
|
||||
|
||||
if (third == nullptr) {
|
||||
nd4j_printf("applyTriplewiseLambda requires three operands to be valid NDArrays, but Third is NULL\n","");
|
||||
throw std::runtime_error("third is null");
|
||||
}
|
||||
if(dataType() != DataTypeUtils::fromT<T>())
|
||||
throw std::runtime_error("NDArray::applyTriplewiseLambda<T> method: wrong template parameter T, its type should be the same as type of this array!");
|
||||
if(dataType() != second->dataType() || dataType() != third->dataType() || dataType() != target->dataType())
|
||||
if(dataType() != second.dataType() || dataType() != third.dataType() || dataType() != target.dataType())
|
||||
throw std::runtime_error("NDArray::applyTriplewiseLambda<T> method: bother four arrays (this, second, third, target) should have the same type !");
|
||||
|
||||
if (this->lengthOf() != second->lengthOf() || this->lengthOf() != third->lengthOf() || !this->isSameShape(second) || !this->isSameShape(third)) {
|
||||
if (this->lengthOf() != second.lengthOf() || this->lengthOf() != third.lengthOf() || !this->isSameShape(second) || !this->isSameShape(third)) {
|
||||
nd4j_printf("applyPairwiseLambda requires both operands to have the same shape\n","");
|
||||
throw std::runtime_error("Shapes mismach");
|
||||
}
|
||||
|
||||
auto f = this->bufferAsT<T>();
|
||||
auto s = second->bufferAsT<T>();
|
||||
auto t = third->bufferAsT<T>();
|
||||
auto z = target->bufferAsT<T>();
|
||||
auto s = second.bufferAsT<T>();
|
||||
auto t = third.bufferAsT<T>();
|
||||
auto z = target.bufferAsT<T>();
|
||||
|
||||
if (this->ordering() == second->ordering() && this->ordering() == third->ordering() && this->ordering() == target->ordering() && (this->ews() == 1 && target->ews() == 1) && this->ews() == second->ews() && this->ews() == third->ews()) {
|
||||
if (this->ordering() == second.ordering() && this->ordering() == third.ordering() && this->ordering() == target.ordering() && (this->ews() == 1 && target.ews() == 1) && this->ews() == second.ews() && this->ews() == third.ews()) {
|
||||
|
||||
auto loop = PRAGMA_THREADS_FOR {
|
||||
for (auto e = start; e < stop; e += increment)
|
||||
|
@ -44,8 +33,8 @@ void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::
|
|||
auto loop = PRAGMA_THREADS_FOR {
|
||||
for (auto e = start; e < stop; e += increment) {
|
||||
auto tOffset = this->getOffset(e);
|
||||
auto uOffset = second->getOffset(e);
|
||||
auto vOffset = third->getOffset(e);
|
||||
auto uOffset = second.getOffset(e);
|
||||
auto vOffset = third.getOffset(e);
|
||||
|
||||
f[tOffset] = func(f[tOffset], s[uOffset], t[vOffset]);
|
||||
}
|
||||
|
@ -57,9 +46,9 @@ void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::
|
|||
auto loop = PRAGMA_THREADS_FOR {
|
||||
for (auto e = start; e < stop; e += increment) {
|
||||
auto tOffset = this->getOffset(e);
|
||||
auto uOffset = second->getOffset(e);
|
||||
auto vOffset = third->getOffset(e);
|
||||
auto zOffset = target->getOffset(e);
|
||||
auto uOffset = second.getOffset(e);
|
||||
auto vOffset = third.getOffset(e);
|
||||
auto zOffset = target.getOffset(e);
|
||||
|
||||
z[zOffset] = func(f[tOffset], s[uOffset], t[vOffset]);
|
||||
}
|
||||
|
@ -69,46 +58,39 @@ void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::
|
|||
}
|
||||
}
|
||||
}
|
||||
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<double (double, double, double)>& func, NDArray* target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<float (float, float, float)>& func, NDArray* target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<float16 (float16, float16, float16)>& func, NDArray* target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<bfloat16 (bfloat16, bfloat16, bfloat16)>& func, NDArray* target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<Nd4jLong (Nd4jLong, Nd4jLong, Nd4jLong)>& func, NDArray* target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<int (int, int, int)>& func, NDArray* target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<int16_t (int16_t, int16_t, int16_t)>& func, NDArray* target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<uint8_t (uint8_t, uint8_t, uint8_t)>& func, NDArray* target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<uint16_t (uint16_t, uint16_t, uint16_t)>& func, NDArray* target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<uint32_t (uint32_t, uint32_t, uint32_t)>& func, NDArray* target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<uint64_t (uint64_t, uint64_t, uint64_t)>& func, NDArray* target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<int8_t (int8_t, int8_t, int8_t)>& func, NDArray* target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<bool (bool, bool, bool)>& func, NDArray* target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<double (double, double, double)>& func, NDArray& target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<float (float, float, float)>& func, NDArray& target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<float16 (float16, float16, float16)>& func, NDArray& target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<bfloat16 (bfloat16, bfloat16, bfloat16)>& func, NDArray& target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<Nd4jLong (Nd4jLong, Nd4jLong, Nd4jLong)>& func, NDArray& target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<int (int, int, int)>& func, NDArray& target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<int16_t (int16_t, int16_t, int16_t)>& func, NDArray& target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<uint8_t (uint8_t, uint8_t, uint8_t)>& func, NDArray& target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<uint16_t (uint16_t, uint16_t, uint16_t)>& func, NDArray& target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<uint32_t (uint32_t, uint32_t, uint32_t)>& func, NDArray& target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<uint64_t (uint64_t, uint64_t, uint64_t)>& func, NDArray& target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<int8_t (int8_t, int8_t, int8_t)>& func, NDArray& target);
|
||||
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<bool (bool, bool, bool)>& func, NDArray& target);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<typename T>
|
||||
void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<T(T, T)>& func, NDArray* target) {
|
||||
if (target == nullptr)
|
||||
target = this;
|
||||
|
||||
if (other == nullptr) {
|
||||
nd4j_printf("applyPairwiseLambda requires both operands to be valid NDArrays, but Y is NULL\n","");
|
||||
throw std::runtime_error("Other is null");
|
||||
}
|
||||
void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<T(T, T)>& func, NDArray& target) {
|
||||
|
||||
if(dataType() != DataTypeUtils::fromT<T>())
|
||||
throw std::runtime_error("NDArray::applyPairwiseLambda<T> method: wrong template parameter T, its type should be the same as type of this array!");
|
||||
if(dataType() != other->dataType() || dataType() != target->dataType())
|
||||
if(dataType() != other.dataType() || dataType() != target.dataType())
|
||||
throw std::runtime_error("NDArray::applyPairwiseLambda<T> method: all three arrays (this, other, target) must have the same type !");
|
||||
|
||||
if (this->lengthOf() != other->lengthOf()) {
|
||||
if (this->lengthOf() != other.lengthOf()) {
|
||||
nd4j_printf("applyPairwiseLambda requires both operands to have the same shape\n","");
|
||||
throw std::runtime_error("Shapes mismach");
|
||||
}
|
||||
|
||||
auto f = this->bufferAsT<T>();
|
||||
auto s = other->bufferAsT<T>();
|
||||
auto z = target->bufferAsT<T>();
|
||||
auto s = other.bufferAsT<T>();
|
||||
auto z = target.bufferAsT<T>();
|
||||
|
||||
if (this->ordering() == other->ordering() && this->ordering() == target->ordering() && (this->ews() == 1 && target->ews() == 1) && this->ews() == other->ews()) {
|
||||
if (this->ordering() == other.ordering() && this->ordering() == target.ordering() && (this->ews() == 1 && target.ews() == 1) && this->ews() == other.ews()) {
|
||||
|
||||
auto loop = PRAGMA_THREADS_FOR {
|
||||
for (auto e = start; e < stop; e += increment)
|
||||
|
@ -122,7 +104,7 @@ void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<T(T,
|
|||
auto loop = PRAGMA_THREADS_FOR {
|
||||
for (auto e = start; e < stop; e += increment) {
|
||||
auto xOffset = this->getOffset(e);
|
||||
auto yOffset = other->getOffset(e);
|
||||
auto yOffset = other.getOffset(e);
|
||||
|
||||
f[xOffset] = func(f[xOffset], s[yOffset]);
|
||||
}
|
||||
|
@ -134,8 +116,8 @@ void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<T(T,
|
|||
auto loop = PRAGMA_THREADS_FOR {
|
||||
for (auto e = start; e < stop; e += increment) {
|
||||
auto xOffset = this->getOffset(e);
|
||||
auto yOffset = other->getOffset(e);
|
||||
auto zOffset = target->getOffset(e);
|
||||
auto yOffset = other.getOffset(e);
|
||||
auto zOffset = target.getOffset(e);
|
||||
|
||||
z[zOffset] = func(f[xOffset], s[yOffset]);
|
||||
}
|
||||
|
@ -145,35 +127,33 @@ void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<T(T,
|
|||
}
|
||||
}
|
||||
}
|
||||
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<double (double, double)>& func, NDArray* target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<float (float, float)>& func, NDArray* target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<float16 (float16, float16)>& func, NDArray* target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<bfloat16 (bfloat16, bfloat16)>& func, NDArray* target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<Nd4jLong (Nd4jLong, Nd4jLong)>& func, NDArray* target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<int (int, int)>& func, NDArray* target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<int16_t (int16_t, int16_t)>& func, NDArray* target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<uint8_t (uint8_t, uint8_t)>& func, NDArray* target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<uint16_t (uint16_t, uint16_t)>& func, NDArray* target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<uint32_t (uint32_t, uint32_t)>& func, NDArray* target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<uint64_t (uint64_t, uint64_t)>& func, NDArray* target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<int8_t (int8_t, int8_t)>& func, NDArray* target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<bool (bool, bool)>& func, NDArray* target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<double (double, double)>& func, NDArray& target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<float (float, float)>& func, NDArray& target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<float16 (float16, float16)>& func, NDArray& target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<bfloat16 (bfloat16, bfloat16)>& func, NDArray& target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<Nd4jLong (Nd4jLong, Nd4jLong)>& func, NDArray& target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<int (int, int)>& func, NDArray& target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<int16_t (int16_t, int16_t)>& func, NDArray& target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<uint8_t (uint8_t, uint8_t)>& func, NDArray& target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<uint16_t (uint16_t, uint16_t)>& func, NDArray& target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<uint32_t (uint32_t, uint32_t)>& func, NDArray& target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<uint64_t (uint64_t, uint64_t)>& func, NDArray& target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<int8_t (int8_t, int8_t)>& func, NDArray& target);
|
||||
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<bool (bool, bool)>& func, NDArray& target);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<typename T>
|
||||
void NDArray::applyLambda(const std::function<T(T)>& func, NDArray* target) {
|
||||
if (target == nullptr)
|
||||
target = this;
|
||||
void NDArray::applyLambda(const std::function<T(T)>& func, NDArray& target) {
|
||||
|
||||
if(dataType() != DataTypeUtils::fromT<T>())
|
||||
throw std::runtime_error("NDArray::applyLambda<T> method: wrong template parameter T, its type should be the same as type of this array!");
|
||||
if(dataType() != target->dataType())
|
||||
if(dataType() != target.dataType())
|
||||
throw std::runtime_error("NDArray::applyLambda<T> method: types of this and target array should match !");
|
||||
|
||||
auto f = this->bufferAsT<T>();
|
||||
auto z = target->bufferAsT<T>();
|
||||
auto z = target.bufferAsT<T>();
|
||||
|
||||
if (this->ordering() == target->ordering() && (this->ews() == 1 && target->ews() == 1)) {
|
||||
if (this->ordering() == target.ordering() && (this->ews() == 1 && target.ews() == 1)) {
|
||||
|
||||
auto loop = PRAGMA_THREADS_FOR {
|
||||
for (auto e = start; e < stop; e += increment)
|
||||
|
@ -198,7 +178,7 @@ void NDArray::applyLambda(const std::function<T(T)>& func, NDArray* target) {
|
|||
auto loop = PRAGMA_THREADS_FOR {
|
||||
for (auto e = start; e < stop; e += increment) {
|
||||
auto xOffset = this->getOffset(e);
|
||||
auto zOffset = target->getOffset(e);
|
||||
auto zOffset = target.getOffset(e);
|
||||
|
||||
z[zOffset] = func(f[xOffset]);
|
||||
}
|
||||
|
@ -208,35 +188,33 @@ void NDArray::applyLambda(const std::function<T(T)>& func, NDArray* target) {
|
|||
}
|
||||
}
|
||||
}
|
||||
template void NDArray::applyLambda(const std::function<double(double)>& func, NDArray* target);
|
||||
template void NDArray::applyLambda(const std::function<float(float)>& func, NDArray* target);
|
||||
template void NDArray::applyLambda(const std::function<float16(float16)>& func, NDArray* target);
|
||||
template void NDArray::applyLambda(const std::function<bfloat16(bfloat16)>& func, NDArray* target);
|
||||
template void NDArray::applyLambda(const std::function<Nd4jLong(Nd4jLong)>& func, NDArray* target);
|
||||
template void NDArray::applyLambda(const std::function<int16_t(int16_t)>& func, NDArray* target);
|
||||
template void NDArray::applyLambda(const std::function<int32_t(int32_t)>& func, NDArray* target);
|
||||
template void NDArray::applyLambda(const std::function<uint8_t(uint8_t)>& func, NDArray* target);
|
||||
template void NDArray::applyLambda(const std::function<uint16_t(uint16_t)>& func, NDArray* target);
|
||||
template void NDArray::applyLambda(const std::function<uint32_t(uint32_t)>& func, NDArray* target);
|
||||
template void NDArray::applyLambda(const std::function<uint64_t(uint64_t)>& func, NDArray* target);
|
||||
template void NDArray::applyLambda(const std::function<int8_t(int8_t)>& func, NDArray* target);
|
||||
template void NDArray::applyLambda(const std::function<bool(bool)>& func, NDArray* target);
|
||||
template void NDArray::applyLambda(const std::function<double(double)>& func, NDArray& target);
|
||||
template void NDArray::applyLambda(const std::function<float(float)>& func, NDArray& target);
|
||||
template void NDArray::applyLambda(const std::function<float16(float16)>& func, NDArray& target);
|
||||
template void NDArray::applyLambda(const std::function<bfloat16(bfloat16)>& func, NDArray& target);
|
||||
template void NDArray::applyLambda(const std::function<Nd4jLong(Nd4jLong)>& func, NDArray& target);
|
||||
template void NDArray::applyLambda(const std::function<int16_t(int16_t)>& func, NDArray& target);
|
||||
template void NDArray::applyLambda(const std::function<int32_t(int32_t)>& func, NDArray& target);
|
||||
template void NDArray::applyLambda(const std::function<uint8_t(uint8_t)>& func, NDArray& target);
|
||||
template void NDArray::applyLambda(const std::function<uint16_t(uint16_t)>& func, NDArray& target);
|
||||
template void NDArray::applyLambda(const std::function<uint32_t(uint32_t)>& func, NDArray& target);
|
||||
template void NDArray::applyLambda(const std::function<uint64_t(uint64_t)>& func, NDArray& target);
|
||||
template void NDArray::applyLambda(const std::function<int8_t(int8_t)>& func, NDArray& target);
|
||||
template void NDArray::applyLambda(const std::function<bool(bool)>& func, NDArray& target);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<typename T>
|
||||
void NDArray::applyIndexedLambda(const std::function<T(Nd4jLong, T)>& func, NDArray* target) {
|
||||
if (target == nullptr)
|
||||
target = this;
|
||||
void NDArray::applyIndexedLambda(const std::function<T(Nd4jLong, T)>& func, NDArray& target) {
|
||||
|
||||
if(dataType() != DataTypeUtils::fromT<T>())
|
||||
throw std::runtime_error("NDArray::applyIndexedLambda<T> method: wrong template parameter T, its type should be the same as type of this array!");
|
||||
if(dataType() != target->dataType())
|
||||
if(dataType() != target.dataType())
|
||||
throw std::runtime_error("NDArray::applyIndexedLambda<T> method: types of this and target array should match !");
|
||||
|
||||
auto f = this->bufferAsT<T>();
|
||||
auto z = target->bufferAsT<T>();
|
||||
auto z = target.bufferAsT<T>();
|
||||
|
||||
if (this->ordering() == target->ordering() && (this->ews() == 1 && target->ews() == 1)) {
|
||||
if (this->ordering() == target.ordering() && (this->ews() == 1 && target.ews() == 1)) {
|
||||
|
||||
auto loop = PRAGMA_THREADS_FOR {
|
||||
for (auto e = start; e < stop; e += increment)
|
||||
|
@ -261,7 +239,7 @@ void NDArray::applyIndexedLambda(const std::function<T(Nd4jLong, T)>& func, NDAr
|
|||
auto loop = PRAGMA_THREADS_FOR {
|
||||
for (auto e = start; e < stop; e += increment) {
|
||||
auto xOffset = this->getOffset(e);
|
||||
auto zOffset = target->getOffset(e);
|
||||
auto zOffset = target.getOffset(e);
|
||||
|
||||
z[zOffset] = func(e, f[xOffset]);
|
||||
}
|
||||
|
@ -271,44 +249,38 @@ void NDArray::applyIndexedLambda(const std::function<T(Nd4jLong, T)>& func, NDAr
|
|||
}
|
||||
}
|
||||
}
|
||||
template void NDArray::applyIndexedLambda(const std::function<double(Nd4jLong, double)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<float(Nd4jLong, float)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<float16(Nd4jLong, float16)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<bfloat16(Nd4jLong, bfloat16)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<Nd4jLong(Nd4jLong, Nd4jLong)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<int(Nd4jLong, int)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<int16_t(Nd4jLong, int16_t)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<uint8_t (Nd4jLong, uint8_t)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<uint16_t (Nd4jLong, uint16_t)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<uint32_t (Nd4jLong, uint32_t)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<uint64_t (Nd4jLong, uint64_t)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<int8_t(Nd4jLong, int8_t)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<bool(Nd4jLong, bool)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<double(Nd4jLong, double)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<float(Nd4jLong, float)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<float16(Nd4jLong, float16)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<bfloat16(Nd4jLong, bfloat16)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<Nd4jLong(Nd4jLong, Nd4jLong)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<int(Nd4jLong, int)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<int16_t(Nd4jLong, int16_t)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<uint8_t (Nd4jLong, uint8_t)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<uint16_t (Nd4jLong, uint16_t)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<uint32_t (Nd4jLong, uint32_t)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<uint64_t (Nd4jLong, uint64_t)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<int8_t(Nd4jLong, int8_t)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedLambda(const std::function<bool(Nd4jLong, bool)>& func, NDArray& target);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<typename T>
|
||||
void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<T(Nd4jLong, T, T)>& func, NDArray* target) {
|
||||
if (target == nullptr)
|
||||
target = this;
|
||||
void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<T(Nd4jLong, T, T)>& func, NDArray& target) {
|
||||
|
||||
if (other == nullptr) {
|
||||
nd4j_printf("applyIndexedPairwiseLambda requires both operands to be valid NDArrays, but Y is NULL\n","");
|
||||
throw std::runtime_error("Other is null");
|
||||
}
|
||||
if(dataType() != DataTypeUtils::fromT<T>())
|
||||
throw std::runtime_error("NDArray::applyIndexedPairwiseLambda<T> method: wrong template parameter T, its type should be the same as type of this array!");
|
||||
if(dataType() != target->dataType())
|
||||
if(dataType() != target.dataType())
|
||||
throw std::runtime_error("NDArray::applyIndexedPairwiseLambda<T> method: types of this and target array should match !");
|
||||
if (this->lengthOf() != other->lengthOf()) {
|
||||
if (this->lengthOf() != other.lengthOf()) {
|
||||
nd4j_printf("applyIndexedPairwiseLambda requires both operands to have the same shape\n","");
|
||||
throw std::runtime_error("Shapes mismach");
|
||||
}
|
||||
|
||||
auto f = this->bufferAsT<T>();
|
||||
auto s = other->bufferAsT<T>();
|
||||
auto z = target->bufferAsT<T>();
|
||||
auto s = other.bufferAsT<T>();
|
||||
auto z = target.bufferAsT<T>();
|
||||
|
||||
if (this->ordering() == other->ordering() && this->ordering() == target->ordering() && (this->ews() == 1 && target->ews() == 1) && this->ews() == other->ews()) {
|
||||
if (this->ordering() == other.ordering() && this->ordering() == target.ordering() && (this->ews() == 1 && target.ews() == 1) && this->ews() == other.ews()) {
|
||||
|
||||
auto loop = PRAGMA_THREADS_FOR {
|
||||
for (auto e = start; e < stop; e += increment)
|
||||
|
@ -322,7 +294,7 @@ void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<T(N
|
|||
auto loop = PRAGMA_THREADS_FOR {
|
||||
for (auto e = start; e < stop; e += increment) {
|
||||
auto xOffset = this->getOffset(e);
|
||||
auto yOffset = other->getOffset(e);
|
||||
auto yOffset = other.getOffset(e);
|
||||
|
||||
f[xOffset] = func((Nd4jLong) e, f[xOffset], s[yOffset]);
|
||||
}
|
||||
|
@ -334,8 +306,8 @@ void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<T(N
|
|||
auto loop = PRAGMA_THREADS_FOR {
|
||||
for (auto e = start; e < stop; e += increment) {
|
||||
auto xOffset = this->getOffset(e);
|
||||
auto yOffset = other->getOffset(e);
|
||||
auto zOffset = target->getOffset(e);
|
||||
auto yOffset = other.getOffset(e);
|
||||
auto zOffset = target.getOffset(e);
|
||||
|
||||
z[zOffset] = func((Nd4jLong) e, f[xOffset], s[yOffset]);
|
||||
}
|
||||
|
@ -345,16 +317,16 @@ void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<T(N
|
|||
}
|
||||
}
|
||||
}
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<double (Nd4jLong, double, double)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<float (Nd4jLong, float, float)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<float16 (Nd4jLong, float16, float16)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<bfloat16 (Nd4jLong, bfloat16, bfloat16)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<Nd4jLong (Nd4jLong, Nd4jLong, Nd4jLong)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<int (Nd4jLong, int, int)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<int16_t (Nd4jLong, int16_t, int16_t)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<uint8_t (Nd4jLong, uint8_t, uint8_t)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<uint16_t (Nd4jLong, uint16_t, uint16_t)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<uint32_t (Nd4jLong, uint32_t, uint32_t)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<uint64_t (Nd4jLong, uint64_t, uint64_t)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<int8_t (Nd4jLong, int8_t, int8_t)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<bool (Nd4jLong, bool, bool)>& func, NDArray* target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<double (Nd4jLong, double, double)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<float (Nd4jLong, float, float)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<float16 (Nd4jLong, float16, float16)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<bfloat16 (Nd4jLong, bfloat16, bfloat16)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<Nd4jLong (Nd4jLong, Nd4jLong, Nd4jLong)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<int (Nd4jLong, int, int)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<int16_t (Nd4jLong, int16_t, int16_t)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<uint8_t (Nd4jLong, uint8_t, uint8_t)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<uint16_t (Nd4jLong, uint16_t, uint16_t)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<uint32_t (Nd4jLong, uint32_t, uint32_t)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<uint64_t (Nd4jLong, uint64_t, uint64_t)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<int8_t (Nd4jLong, int8_t, int8_t)>& func, NDArray& target);
|
||||
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<bool (Nd4jLong, bool, bool)>& func, NDArray& target);
|
|
@ -2717,25 +2717,25 @@ static void _scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSub
|
|||
|
||||
switch (opCode) {
|
||||
case 0:
|
||||
inSubArr.applyPairwiseTransform(pairwise::Add, &updSubArr, &inSubArr, nullptr);
|
||||
inSubArr.applyPairwiseTransform(pairwise::Add, updSubArr, inSubArr);
|
||||
break;
|
||||
case 1:
|
||||
inSubArr.applyPairwiseTransform(pairwise::Subtract, &updSubArr, &inSubArr, nullptr);
|
||||
inSubArr.applyPairwiseTransform(pairwise::Subtract, updSubArr, inSubArr);
|
||||
break;
|
||||
case 2:
|
||||
inSubArr.applyPairwiseTransform(pairwise::Multiply, &updSubArr, &inSubArr, nullptr);
|
||||
inSubArr.applyPairwiseTransform(pairwise::Multiply, updSubArr, inSubArr);
|
||||
break;
|
||||
case 3:
|
||||
inSubArr.applyPairwiseTransform(pairwise::Divide, &updSubArr, &inSubArr, nullptr);
|
||||
inSubArr.applyPairwiseTransform(pairwise::Divide, updSubArr, inSubArr);
|
||||
break;
|
||||
case 4:
|
||||
inSubArr.applyPairwiseTransform(pairwise::ReverseSubtract, &updSubArr, &inSubArr, nullptr);
|
||||
inSubArr.applyPairwiseTransform(pairwise::ReverseSubtract, updSubArr, inSubArr);
|
||||
break;
|
||||
case 5:
|
||||
inSubArr.applyPairwiseTransform(pairwise::ReverseDivide, &updSubArr, &inSubArr, nullptr);
|
||||
inSubArr.applyPairwiseTransform(pairwise::ReverseDivide, updSubArr, inSubArr);
|
||||
break;
|
||||
case 6:
|
||||
inSubArr.applyPairwiseTransform(pairwise::CopyPws, &updSubArr, &inSubArr, nullptr);
|
||||
inSubArr.applyPairwiseTransform(pairwise::CopyPws, updSubArr, inSubArr);
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
|
|
|
@ -122,35 +122,32 @@ __global__ static void fillAsTriangularCuda(const void* vx, const Nd4jLong* xSha
|
|||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
template<typename T>
|
||||
void NDArray::fillAsTriangular(const float val, int lower, int upper, const char direction, NDArray* target) {
|
||||
void NDArray::fillAsTriangular(const float val, int lower, int upper, NDArray& target, const char direction) {
|
||||
|
||||
if (isS())
|
||||
throw std::runtime_error("NDArray::fillAsTriangular: you can't use this method on String array!");
|
||||
|
||||
if(target == nullptr)
|
||||
target = this;
|
||||
|
||||
if(!isSameShape(target) && !(rankOf() == 1 && target->rankOf() == 2 && sizeAt(0) == target->sizeAt(0) && sizeAt(0) == target->sizeAt(1)))
|
||||
if(!isSameShape(target) && !(rankOf() == 1 && target.rankOf() == 2 && sizeAt(0) == target.sizeAt(0) && sizeAt(0) == target.sizeAt(1)))
|
||||
throw std::string("NDArray::fillAsTriangular method: wrong shape of target array !");
|
||||
|
||||
if (direction == 'u')
|
||||
lower = -target->sizeAt(-2);
|
||||
lower = -target.sizeAt(-2);
|
||||
else if (direction == 'l')
|
||||
upper = target->sizeAt(-1);
|
||||
upper = target.sizeAt(-1);
|
||||
|
||||
const int threadsPerBlock = MAX_NUM_THREADS / 4;
|
||||
const int blocksPerGrid = (target->lengthOf() + threadsPerBlock - 1) / threadsPerBlock;
|
||||
const int sharedMem = threadsPerBlock * sizeof(decltype(*target->getShapeInfo())) * target->rankOf() + 128;
|
||||
const int blocksPerGrid = (target.lengthOf() + threadsPerBlock - 1) / threadsPerBlock;
|
||||
const int sharedMem = threadsPerBlock * sizeof(decltype(*target.getShapeInfo())) * target.rankOf() + 128;
|
||||
|
||||
PointersManager manager(getContext(), "NDArray::fillAsTriangular");
|
||||
|
||||
NDArray::prepareSpecialUse({target}, {this});
|
||||
fillAsTriangularCuda<T><<<blocksPerGrid, threadsPerBlock, sharedMem, *getContext()->getCudaStream()>>>(getPlatformBuffer(), getPlatformShapeInfo(), target->getPlatformBuffer(), target->getPlatformShapeInfo(), static_cast<T>(val), lower, upper);
|
||||
NDArray::registerSpecialUse({target}, {this});
|
||||
NDArray::prepareSpecialUse({&target}, {this});
|
||||
fillAsTriangularCuda<T><<<blocksPerGrid, threadsPerBlock, sharedMem, *getContext()->getCudaStream()>>>(getPlatformBuffer(), getPlatformShapeInfo(), target.getPlatformBuffer(), target.getPlatformShapeInfo(), static_cast<T>(val), lower, upper);
|
||||
NDArray::registerSpecialUse({&target}, {this});
|
||||
|
||||
manager.synchronize();
|
||||
}
|
||||
BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT void NDArray::fillAsTriangular, (const float val, int lower, int upper, const char direction, NDArray* target), LIBND4J_TYPES);
|
||||
BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT void NDArray::fillAsTriangular, (const float val, int lower, int upper, NDArray& target, const char direction), LIBND4J_TYPES);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
template<typename T>
|
||||
|
@ -457,21 +454,21 @@ BUILD_DOUBLE_TEMPLATE(template void repeatCudaLauncher, (const int blocksPerGrid
|
|||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// create new array by repeating it the number of times given by repeats
|
||||
NDArray* NDArray::repeat(const int axis, const std::vector<int>& repeats) const {
|
||||
NDArray NDArray::repeat(const int axis, const std::vector<int>& repeats) const {
|
||||
|
||||
auto output = new NDArray('c', ShapeUtils::evalRepeatShape(axis, repeats, *this), dataType(), getContext());
|
||||
NDArray output('c', ShapeUtils::evalRepeatShape(axis, repeats, *this), dataType(), getContext());
|
||||
|
||||
const int threadsPerBlock = MAX_NUM_THREADS / 2;
|
||||
const int blocksPerGrid = (output->lengthOf() + threadsPerBlock - 1) / threadsPerBlock;
|
||||
const int sharedMem = output->rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128;
|
||||
const int blocksPerGrid = (output.lengthOf() + threadsPerBlock - 1) / threadsPerBlock;
|
||||
const int sharedMem = output.rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128;
|
||||
|
||||
PointersManager manager(getContext(), "NDArray::repeat(const int axis, const std::vector<int>& repeats)");
|
||||
|
||||
const int* reps = reinterpret_cast<int*>(manager.replicatePointer(repeats.data(), repeats.size() * sizeof(int)));
|
||||
|
||||
prepareSpecialUse({output}, {this});
|
||||
BUILD_SINGLE_SELECTOR_TWICE(dataType(), repeatCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, getContext()->getCudaStream(), getSpecialBuffer(), getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), reps, repeats.size(), axis), LIBND4J_TYPES);
|
||||
prepareSpecialUse({output}, {this});
|
||||
prepareSpecialUse({&output}, {this});
|
||||
BUILD_SINGLE_SELECTOR_TWICE(dataType(), repeatCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, getContext()->getCudaStream(), getSpecialBuffer(), getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), reps, repeats.size(), axis), LIBND4J_TYPES);
|
||||
prepareSpecialUse({&output}, {this});
|
||||
|
||||
manager.synchronize();
|
||||
|
||||
|
|
|
@ -247,73 +247,73 @@ static _CUDA_G void lambdaTriplewiseKernel(void* vw, Nd4jLong *wShapeInfo, void*
|
|||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<typename Lambda>
|
||||
void NDArray::applyLambda(Lambda func, NDArray* target) {
|
||||
auto result = target == nullptr ? this : target;
|
||||
void NDArray::applyLambda(Lambda func, NDArray& target) {
|
||||
|
||||
auto dtype = this->dataType();
|
||||
|
||||
if (dtype != result->dataType())
|
||||
if (dtype != target.dataType())
|
||||
throw std::runtime_error("NDArray::applyLambda X/Z data types must be the same");
|
||||
//throw datatype_exception::build("NDArray::applyLambda X/Z data types must be the same", dtype, result->dataType());
|
||||
prepareSpecialUse({result}, {this});
|
||||
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), result->specialBuffer(), result->specialShapeInfo(), func), LIBND4J_TYPES);
|
||||
registerSpecialUse({result}, {this});
|
||||
//throw datatype_exception::build("NDArray::applyLambda X/Z data types must be the same", dtype, target.dataType());
|
||||
prepareSpecialUse({&target}, {this});
|
||||
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), func), LIBND4J_TYPES);
|
||||
registerSpecialUse({&target}, {this});
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<typename Lambda>
|
||||
void NDArray::applyPairwiseLambda(const NDArray* other, Lambda func, NDArray* target) {
|
||||
auto result = target == nullptr ? this : target;
|
||||
void NDArray::applyPairwiseLambda(const NDArray& other, Lambda func, NDArray& target) {
|
||||
|
||||
auto dtype = this->dataType();
|
||||
|
||||
if (dtype != result->dataType() || dtype != other->dataType())
|
||||
if (dtype != target.dataType() || dtype != other.dataType())
|
||||
throw std::runtime_error("NDArray::applyPairwiseLambda X/Y/Z data types must be the same");
|
||||
//throw datatype_exception::build("NDArray::applyLambda X/Z data types must be the same", dtype, result->dataType());
|
||||
//throw datatype_exception::build("NDArray::applyLambda X/Z data types must be the same", dtype, target.dataType());
|
||||
|
||||
prepareSpecialUse({result}, {this, other});
|
||||
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaPairwiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), other->getSpecialBuffer(), other->getSpecialShapeInfo(), result->specialBuffer(), result->specialShapeInfo(), func), LIBND4J_TYPES);
|
||||
registerSpecialUse({result}, {this, other});
|
||||
prepareSpecialUse({&target}, {this, &other});
|
||||
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaPairwiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), func), LIBND4J_TYPES);
|
||||
registerSpecialUse({&target}, {this, &other});
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <typename Lambda>
|
||||
void NDArray::applyIndexedLambda(Lambda func, NDArray* target) {
|
||||
auto result = target == nullptr ? this : target;
|
||||
void NDArray::applyIndexedLambda(Lambda func, NDArray& target) {
|
||||
|
||||
auto dtype = this->dataType();
|
||||
if (dtype != result->dataType())
|
||||
if (dtype != target.dataType())
|
||||
throw std::runtime_error("NDArray::applyIndexedLambda X/Z data types must be the same");
|
||||
|
||||
prepareSpecialUse({result}, {this});
|
||||
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaIndexedLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), result->specialBuffer(), result->specialShapeInfo(), func), LIBND4J_TYPES);
|
||||
registerSpecialUse({result}, {this});
|
||||
prepareSpecialUse({&target}, {this});
|
||||
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaIndexedLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), func), LIBND4J_TYPES);
|
||||
registerSpecialUse({&target}, {this});
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <typename Lambda>
|
||||
void NDArray::applyIndexedPairwiseLambda(NDArray* other, Lambda func, NDArray* target) {
|
||||
auto result = target == nullptr ? this : target;
|
||||
void NDArray::applyIndexedPairwiseLambda(NDArray& other, Lambda func, NDArray& target) {
|
||||
|
||||
auto dtype = this->dataType();
|
||||
if (dtype != result->dataType() || dtype != other->dataType())
|
||||
if (dtype != target.dataType() || dtype != other.dataType())
|
||||
throw std::runtime_error("NDArray::applyIndexedPairwiseLambda X/Y/Z data types must be the same");
|
||||
|
||||
prepareSpecialUse({result}, {this, other});
|
||||
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaIndexedPairwiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), other->getSpecialBuffer(), other->getSpecialShapeInfo(), result->specialBuffer(), result->specialShapeInfo(), func), LIBND4J_TYPES);
|
||||
registerSpecialUse({result}, {this, other});
|
||||
prepareSpecialUse({&target}, {this, &other});
|
||||
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaIndexedPairwiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), func), LIBND4J_TYPES);
|
||||
registerSpecialUse({&target}, {this, &other});
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <typename Lambda>
|
||||
void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, Lambda func, NDArray* target) {
|
||||
auto result = target == nullptr ? this : target;
|
||||
void NDArray::applyTriplewiseLambda(NDArray& second, NDArray& third, Lambda func, NDArray& target) {
|
||||
|
||||
auto dtype = this->dataType();
|
||||
|
||||
if (dtype != result->dataType() || dtype != second->dataType() || dtype != third->dataType())
|
||||
if (dtype != target.dataType() || dtype != second.dataType() || dtype != third.dataType())
|
||||
throw std::runtime_error("NDArray::applyTriplewiseLambda X/Y/Z data types must be the same");
|
||||
|
||||
prepareSpecialUse({result}, {this, second, third});
|
||||
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaTriplewiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), second->specialBuffer(), second->specialShapeInfo(), third->specialBuffer(), third->specialShapeInfo(), result->specialBuffer(), result->specialShapeInfo(), func), LIBND4J_TYPES);
|
||||
registerSpecialUse({result}, {this, second, third});
|
||||
prepareSpecialUse({&target}, {this, &second, &third});
|
||||
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaTriplewiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), second.specialBuffer(), second.specialShapeInfo(), third.specialBuffer(), third.specialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), func), LIBND4J_TYPES);
|
||||
registerSpecialUse({&target}, {this, &second, &third});
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -91,6 +91,10 @@ namespace nd4j {
|
|||
|
||||
template <typename T>
|
||||
FORCEINLINE static bool castShapeInfo(const Nd4jLong *originalShapeInfo, T *newShapeInfo);
|
||||
|
||||
template<typename T>
|
||||
// struct scalarTypesForNDarray { static bool const value = std::is_same<double, T>::value || std::is_same<float, T>::value || std::is_same<int, T>::value || std::is_same<bfloat16, T>::value || std::is_same<float16, T>::value || std::is_same<long long, T>::value; };
|
||||
struct scalarTypesForNDarray { static bool const value = std::is_same<double, T>::value || std::is_same<float, T>::value || std::is_same<int, T>::value || std::is_same<unsigned int, T>::value || std::is_same<long long, T>::value || std::is_same<unsigned long long, T>::value || std::is_same<long int, T>::value || std::is_same<long unsigned int, T>::value || std::is_same<int8_t, T>::value || std::is_same<uint8_t, T>::value || std::is_same<int16_t, T>::value || std::is_same<uint16_t, T>::value || std::is_same<bool, T>::value || std::is_same<bfloat16, T>::value || std::is_same<float16, T>::value; };
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ namespace nd4j {
|
|||
}
|
||||
|
||||
NDArray* NDArrayList::read(int idx) {
|
||||
return readRaw(idx)->dup();
|
||||
return new NDArray(readRaw(idx)->dup());
|
||||
}
|
||||
|
||||
nd4j::DataType NDArrayList::dataType() {
|
||||
|
@ -136,11 +136,10 @@ namespace nd4j {
|
|||
std::vector<int> args({axis});
|
||||
auto newAxis = ShapeUtils::evalDimsToExclude(array->rankOf(), args);
|
||||
auto result = array->allTensorsAlongDimension(newAxis);
|
||||
for (int e = 0; e < result->size(); e++) {
|
||||
auto chunk = result->at(e);//->dup(array->ordering());
|
||||
write(e, chunk->dup(array->ordering()));
|
||||
for (int e = 0; e < result.size(); e++) {
|
||||
auto chunk = result.at(e);//->dup(array->ordering());
|
||||
write(e, new NDArray(chunk->dup(array->ordering())));
|
||||
}
|
||||
delete result;
|
||||
}
|
||||
|
||||
NDArray* NDArrayList::stack() {
|
||||
|
@ -161,7 +160,7 @@ namespace nd4j {
|
|||
|
||||
auto result = op.execute(inputs, {}, {}, {});
|
||||
|
||||
auto array = result->at(0)->dup();
|
||||
auto array = new NDArray(result->at(0)->dup());
|
||||
|
||||
delete result;
|
||||
|
||||
|
@ -214,13 +213,11 @@ namespace nd4j {
|
|||
auto tads = array->allTensorsAlongDimension(axis);
|
||||
int indicesSize = indices.size();
|
||||
|
||||
if (tads->size() != indicesSize)
|
||||
if (tads.size() != indicesSize)
|
||||
throw std::runtime_error("Number of TADs should match number of indices");
|
||||
|
||||
for (int e = 0; e < indicesSize; e++)
|
||||
tads->at(e)->assign(_chunks[indices[e]]);
|
||||
|
||||
delete tads;
|
||||
tads.at(e)->assign(_chunks[indices[e]]);
|
||||
|
||||
return array;
|
||||
}
|
||||
|
@ -234,7 +231,7 @@ namespace nd4j {
|
|||
list->_elements.store(_elements.load());
|
||||
|
||||
for (auto const& v : _chunks) {
|
||||
list->_chunks[v.first] = v.second->dup();
|
||||
list->_chunks[v.first] = new NDArray(v.second->dup());
|
||||
}
|
||||
|
||||
return list;
|
||||
|
|
|
@ -48,7 +48,7 @@ namespace nd4j {
|
|||
} else {
|
||||
// FIXME: in some cases it's possible to have no NDArray
|
||||
if (inputVar->hasNDArray())
|
||||
innerVar->setNDArray(inputVar->getNDArray()->dup());
|
||||
innerVar->setNDArray(new NDArray(inputVar->getNDArray()->dup()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -56,7 +56,7 @@ namespace nd4j {
|
|||
} else {
|
||||
// FIXME: in some cases it's possible to have no NDArray
|
||||
if (inputVar->hasNDArray())
|
||||
innerVar->setNDArray(inputVar->getNDArray()->dup());
|
||||
innerVar->setNDArray(new NDArray(inputVar->getNDArray()->dup()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ namespace nd4j {
|
|||
result->setIndex(this->_index);
|
||||
|
||||
if (this->_ndarray != nullptr)
|
||||
result->setNDArray(this->_ndarray->template asT<N>());
|
||||
result->setNDArray(new NDArray(this->_ndarray->template asT<N>()));
|
||||
|
||||
// FIXME: add support for ArrayList
|
||||
if (this->_list != nullptr) {
|
||||
|
@ -61,7 +61,7 @@ namespace nd4j {
|
|||
result->_index = this->_index;
|
||||
|
||||
if (this->_ndarray != nullptr)
|
||||
result->_ndarray = this->_ndarray->dup(this->_ndarray->ordering());
|
||||
result->_ndarray = new NDArray(this->_ndarray->dup(this->_ndarray->ordering()));
|
||||
|
||||
if (this->_list != nullptr)
|
||||
result->_list = this->_list->clone();
|
||||
|
|
|
@ -93,7 +93,7 @@ namespace nd4j {
|
|||
}
|
||||
|
||||
OpBenchmark* clone() override {
|
||||
return new ScalarBenchmark((scalar::Ops) _opNum, _testName, _x == nullptr ? _x : _x->dup() , _y == nullptr ? _y : _y->dup(), _z == nullptr ? _z : _z->dup());
|
||||
return new ScalarBenchmark((scalar::Ops) _opNum, _testName, _x == nullptr ? _x : new NDArray(_x->dup()) , _y == nullptr ? _y : new NDArray(_y->dup()), _z == nullptr ? _z : new NDArray(_z->dup()));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -230,17 +230,17 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, con
|
|||
bool cNcont = N == 1 || C->strideAt(1) == 1;
|
||||
|
||||
if(!aMcont && !aKcont) {
|
||||
pA = A->dup('f');
|
||||
pA = new NDArray(A->dup('f'));
|
||||
toDelete.push_back(pA);
|
||||
aMcont = true;
|
||||
}
|
||||
if(!bKcont && !bNcont) {
|
||||
pB = B->dup('f');
|
||||
pB = new NDArray(B->dup('f'));
|
||||
toDelete.push_back(pB);
|
||||
bKcont = true;
|
||||
}
|
||||
if(!cMcont && !cNcont) {
|
||||
pC = C->dup('f');
|
||||
pC = new NDArray(C->dup('f'));
|
||||
toDelete.push_back(pC);
|
||||
cMcont = true;
|
||||
}
|
||||
|
@ -332,7 +332,7 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, nd4j::NDArray*
|
|||
bool aNcont = N == 1 || A->strideAt(1) == 1;
|
||||
|
||||
if(!aMcont && !aNcont) {
|
||||
pA = A->dup('f');
|
||||
pA = new NDArray(A->dup('f'));
|
||||
aMcont = true;
|
||||
}
|
||||
const CBLAS_ORDER blasOrder = aMcont ? CblasColMajor : CblasRowMajor;
|
||||
|
|
|
@ -60,11 +60,10 @@ NDArray Householder<T>::evalHHmatrix(const NDArray& x) {
|
|||
w.p(Nd4jLong(0), 1.f);
|
||||
wT.assign(&w);
|
||||
|
||||
auto identity = NDArrayFactory::create(x.ordering(), {(int)x.lengthOf(), (int)x.lengthOf()}, x.dataType(), x.getContext());
|
||||
NDArray identity = NDArrayFactory::create(x.ordering(), {(int)x.lengthOf(), (int)x.lengthOf()}, x.dataType(), x.getContext());
|
||||
identity.setIdentity(); // identity matrix
|
||||
|
||||
return identity - mmul(w, wT) * coeff;
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
@ -95,9 +94,9 @@ void Householder<T>::evalHHmatrixData(const NDArray& x, NDArray& tail, T& coeff,
|
|||
coeff = -u0 / normX;
|
||||
|
||||
if(x.isRowVector())
|
||||
tail.assign(x({0,0, 1,-1}) / u0);
|
||||
tail.assign(static_cast<const NDArray&>(x({0,0, 1,-1})) / u0);
|
||||
else
|
||||
tail.assign(x({1,-1, 0,0,}) / u0);
|
||||
tail.assign(static_cast<const NDArray&>(x({1,-1, 0,0,})) / u0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -269,7 +269,7 @@ void JacobiSVD<T>::evalData(const NDArray& matrix) {
|
|||
|
||||
HHcolPivQR qr(matrix / scale);
|
||||
_m.assign(qr._qr({0,_cols, 0,_cols}));
|
||||
_m.fillAsTriangular<T>(0., 0, 0, 'l');
|
||||
_m.fillAsTriangular<T>(0., 0, 0, _m, 'l');
|
||||
|
||||
HHsequence hhSeg(qr._qr, qr._coeffs, 'u');
|
||||
|
||||
|
@ -288,7 +288,7 @@ void JacobiSVD<T>::evalData(const NDArray& matrix) {
|
|||
auto matrixT = matrix.transpose();
|
||||
HHcolPivQR qr(matrixT / scale);
|
||||
_m.assign(qr._qr({0,_rows, 0,_rows}));
|
||||
_m.fillAsTriangular<T>(0., 0, 0, 'l');
|
||||
_m.fillAsTriangular<T>(0., 0, 0, _m, 'l');
|
||||
_m.transposei();
|
||||
|
||||
HHsequence hhSeg(qr._qr, qr._coeffs, 'u'); // type = 'u' is not mistake here !
|
||||
|
@ -305,7 +305,7 @@ void JacobiSVD<T>::evalData(const NDArray& matrix) {
|
|||
}
|
||||
else {
|
||||
|
||||
_m.assign(matrix({0,_diagSize, 0,_diagSize}) / scale);
|
||||
_m.assign(static_cast<const NDArray&>(matrix({0,_diagSize, 0,_diagSize})) / scale);
|
||||
|
||||
if(_calcU)
|
||||
_u.setIdentity();
|
||||
|
@ -366,7 +366,7 @@ void JacobiSVD<T>::evalData(const NDArray& matrix) {
|
|||
_s.p(i, math::nd4j_abs<T>(_m.e<T>(i,i)));
|
||||
if(_calcU && _m.e<T>(i,i) < (T)0.) {
|
||||
auto temp = _u({0,0, i,i+1}, true);
|
||||
temp.applyTransform(transform::Neg, &temp, nullptr);
|
||||
temp.applyTransform(transform::Neg, temp, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -223,26 +223,26 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
|
|||
const T almostZero = DataTypeUtils::min<T>();
|
||||
T maxElem;
|
||||
if(len == 1)
|
||||
maxElem = math::nd4j_abs<T>(diagInterval->template e<T>(0));
|
||||
maxElem = math::nd4j_abs<T>(diagInterval.template e<T>(0));
|
||||
else
|
||||
maxElem = (*diagInterval)({1,-1, 0,0}, true).reduceNumber(reduce::AMax).template e<T>(0);
|
||||
maxElem = diagInterval({1,-1, 0,0}, true).reduceNumber(reduce::AMax).template e<T>(0);
|
||||
T maxElem0 = colVec0->reduceNumber(reduce::AMax).template e<T>(0);
|
||||
|
||||
T eps = math::nd4j_max<T>(almostZero, DataTypeUtils::eps<T>() * maxElem);
|
||||
T epsBig = (T)8. * DataTypeUtils::eps<T>() * math::nd4j_max<T>(maxElem0, maxElem);
|
||||
|
||||
if(diagInterval->template e<T>(0) < epsBig)
|
||||
diagInterval->p(Nd4jLong(0), epsBig);
|
||||
if(diagInterval.template e<T>(0) < epsBig)
|
||||
diagInterval.p(Nd4jLong(0), epsBig);
|
||||
|
||||
for(int i=1; i < len; ++i)
|
||||
if(math::nd4j_abs<T>(colVec0->template e<T>(i)) < eps)
|
||||
colVec0->p(i, 0.f);
|
||||
|
||||
for(int i=1; i < len; i++)
|
||||
if(diagInterval->template e<T>(i) < epsBig) {
|
||||
if(diagInterval.template e<T>(i) < epsBig) {
|
||||
deflation1(col1, shift, i, len);
|
||||
for(int i = 0; i < len; ++i)
|
||||
diagInterval->p(i, _m.e<T>(col1+shift+i,col1+shift+i));
|
||||
diagInterval.p(i, _m.e<T>(col1+shift+i,col1+shift+i));
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -261,7 +261,7 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
|
|||
int p = 1;
|
||||
|
||||
for(int i=1; i<len; ++i)
|
||||
if(math::nd4j_abs<T>(diagInterval->template e<T>(i)) < almostZero)
|
||||
if(math::nd4j_abs<T>(diagInterval.template e<T>(i)) < almostZero)
|
||||
permut[p++] = i;
|
||||
|
||||
int k = 1, m = ind+1;
|
||||
|
@ -271,7 +271,7 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
|
|||
permut[p] = m++;
|
||||
else if(m >= len)
|
||||
permut[p] = k++;
|
||||
else if(diagInterval->template e<T>(k) < diagInterval->template e<T>(m))
|
||||
else if(diagInterval.template e<T>(k) < diagInterval.template e<T>(m))
|
||||
permut[p] = m++;
|
||||
else
|
||||
permut[p] = k++;
|
||||
|
@ -281,7 +281,7 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
|
|||
if(totDefl) {
|
||||
for(int i=1; i<len; ++i) {
|
||||
int ki = permut[i];
|
||||
if(math::nd4j_abs<T>(diagInterval->template e<T>(ki)) < almostZero || diagInterval->template e<T>(0) < diagInterval->template e<T>(ki))
|
||||
if(math::nd4j_abs<T>(diagInterval.template e<T>(ki)) < almostZero || diagInterval.template e<T>(0) < diagInterval.template e<T>(ki))
|
||||
permut[i-1] = permut[i];
|
||||
else {
|
||||
permut[i-1] = 0;
|
||||
|
@ -303,10 +303,10 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
|
|||
const int ki = permut[len - (totDefl ? i+1 : i)];
|
||||
const int jac = tCol[ki];
|
||||
|
||||
T _e0 = diagInterval->template e<T>(jac);
|
||||
T _e0 = diagInterval.template e<T>(jac);
|
||||
//math::nd4j_swap<T>(diagInterval)(i), (*diagInterval)(jac));
|
||||
diagInterval->p(jac, diagInterval->template e<T>(i));
|
||||
diagInterval->p(i, _e0);
|
||||
diagInterval.p(jac, diagInterval.template e<T>(i));
|
||||
diagInterval.p(i, _e0);
|
||||
|
||||
if(i!=0 && jac!=0) {
|
||||
_e0 = colVec0->template e<T>(jac);
|
||||
|
@ -315,7 +315,6 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
|
|||
colVec0->p(i, _e0);
|
||||
}
|
||||
|
||||
NDArray* temp1 = nullptr, *temp2 = nullptr;
|
||||
if (_calcU) {
|
||||
auto temp1 = _u({col1,col1+len+1, col1+i, col1+i+1}, true);
|
||||
auto temp2 = _u({col1,col1+len+1, col1+jac,col1+jac+1}, true);
|
||||
|
@ -352,12 +351,12 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
|
|||
{
|
||||
int i = len-1;
|
||||
|
||||
while(i > 0 && (math::nd4j_abs<T>(diagInterval->template e<T>(i)) < almostZero || math::nd4j_abs<T>(colVec0->template e<T>(i)) < almostZero))
|
||||
while(i > 0 && (math::nd4j_abs<T>(diagInterval.template e<T>(i)) < almostZero || math::nd4j_abs<T>(colVec0->template e<T>(i)) < almostZero))
|
||||
--i;
|
||||
|
||||
for(; i > 1; --i) {
|
||||
if( (diagInterval->template e<T>(i) - diagInterval->template e<T>(i-1)) < DataTypeUtils::eps<T>()*maxElem ) {
|
||||
if (math::nd4j_abs<T>(diagInterval->template e<T>(i) - diagInterval->template e<T>(i-1)) >= epsBig)
|
||||
if( (diagInterval.template e<T>(i) - diagInterval.template e<T>(i-1)) < DataTypeUtils::eps<T>()*maxElem ) {
|
||||
if (math::nd4j_abs<T>(diagInterval.template e<T>(i) - diagInterval.template e<T>(i-1)) >= epsBig)
|
||||
throw std::runtime_error("ops::helpers::SVD::deflation: diagonal elements are not properly sorted !");
|
||||
deflation2(col1, col1 + shift, row1W, col1W, i-1, i, len);
|
||||
}
|
||||
|
@ -365,7 +364,6 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
|
|||
}
|
||||
|
||||
delete colVec0;
|
||||
delete diagInterval;
|
||||
}
|
||||
|
||||
|
||||
|
@ -609,9 +607,7 @@ void SVD<T>::calcBlockSVD(int col1, int size, NDArray& U, NDArray& singVals, NDA
|
|||
|
||||
const T almostZero = DataTypeUtils::min<T>();
|
||||
auto col0 = _m({col1, col1+size, col1, col1+1}, true);
|
||||
auto diagP = _m({col1, col1+size, col1, col1+size}, true).diagonal('c');
|
||||
auto diag = *diagP;
|
||||
delete diagP;
|
||||
auto diag = static_cast<const NDArray&>(_m({col1, col1+size, col1, col1+size}, true).diagonal('c'));
|
||||
|
||||
diag.p(Nd4jLong(0), T(0));
|
||||
singVals = NDArrayFactory::create<T>(_m.ordering(), {size, 1}, _m.getContext());
|
||||
|
@ -730,8 +726,7 @@ void SVD<T>::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif
|
|||
auto temp = _m({col1+shift,col1+shift+n+1, col1+shift,col1+shift+n}, true);
|
||||
temp.assign(0.);
|
||||
auto diag = _m.diagonal('c');
|
||||
(*diag)({col1+shift, col1+shift+n, 0,0}, true).assign(jac._s({0,n, 0,0}, true));
|
||||
delete diag;
|
||||
diag({col1+shift, col1+shift+n, 0,0}, true).assign(jac._s({0,n, 0,0}, true));
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -762,11 +757,6 @@ void SVD<T>::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif
|
|||
f.assign(_u({0,1, col1+k+1, col1+n}, true));
|
||||
}
|
||||
|
||||
// UofSVD.printIndexedBuffer();
|
||||
// VofSVD.printIndexedBuffer();
|
||||
// singVals.printIndexedBuffer();
|
||||
// printf("!! \n");
|
||||
|
||||
if (_calcV)
|
||||
_v.p(row1W+k, col1W, 1.f);
|
||||
|
||||
|
@ -789,14 +779,10 @@ void SVD<T>::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif
|
|||
temp.assign(_u({col1, col1+k+1, i, i+1}, true));
|
||||
}
|
||||
|
||||
auto temp1 = _u({col1,col1+k+1, col1,col1+1}, true);
|
||||
temp1.assign(q1 * c0);
|
||||
auto temp2 = _u({col1,col1+k+1, col2+1,col2+2}, true);
|
||||
temp2.assign(q1 * (-s0));
|
||||
auto temp3 = _u({col1+k+1,col1+n+1, col1, col1+1}, true);
|
||||
temp3.assign(_u({col1+k+1, col1+n+1, col2+1, col2+2}, true) * s0);
|
||||
auto temp4 =_u({col1+k+1,col1+n+1, col2+1,col2+2}, true);
|
||||
temp4 *= c0;
|
||||
_u({col1,col1+k+1, col1,col1+1}, true).assign(q1 * c0);
|
||||
_u({col1,col1+k+1, col2+1,col2+2}, true).assign(q1 * (-s0));
|
||||
_u({col1+k+1,col1+n+1, col1, col1+1}, true).assign(static_cast<const NDArray&>(_u({col1+k+1, col1+n+1, col2+1, col2+2}, true)) * s0);
|
||||
_u({col1+k+1,col1+n+1, col2+1,col2+2}, true) *= c0;
|
||||
}
|
||||
else {
|
||||
|
||||
|
@ -844,8 +830,7 @@ void SVD<T>::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif
|
|||
auto blockM = _m({col1+shift,col1+shift+n, col1+shift,col1+shift+n}, true);
|
||||
blockM = 0.f;
|
||||
auto diag = blockM.diagonal('c');
|
||||
diag->assign(singVals);
|
||||
delete diag;
|
||||
diag.assign(singVals);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -285,17 +285,17 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, dou
|
|||
bool cNcont = N == 1 || C->strideAt(1) == 1;
|
||||
|
||||
if(!aMcont && !aKcont) {
|
||||
pA = A->dup('f');
|
||||
pA = new NDArray(A->dup('f'));
|
||||
toDelete.push_back(pA);
|
||||
aMcont = true;
|
||||
}
|
||||
if(!bKcont && !bNcont) {
|
||||
pB = B->dup('f');
|
||||
pB = new NDArray(B->dup('f'));
|
||||
toDelete.push_back(pB);
|
||||
bKcont = true;
|
||||
}
|
||||
if(!cMcont) {
|
||||
pC = C->dup('f');
|
||||
pC = new NDArray(C->dup('f'));
|
||||
toDelete.push_back(pC);
|
||||
cMcont = true;
|
||||
}
|
||||
|
@ -418,7 +418,7 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, nd4j::NDArray*
|
|||
bool aNcont = N == 1 || A->strideAt(1) == 1;
|
||||
|
||||
if(!aMcont && !aNcont) {
|
||||
pA = A->dup('f');
|
||||
pA = new NDArray(A->dup('f'));
|
||||
aMcont = true;
|
||||
}
|
||||
|
||||
|
@ -866,12 +866,12 @@ NDArray* MmulHelper::mmulNxNold2(const NDArray* A, const NDArray* B, NDArray* C,
|
|||
bool cNcont = N == 1 || C->strideAt(-1) == 1;
|
||||
|
||||
if(!aMcont && !aKcont) {
|
||||
pA = A->dup('c');
|
||||
pA = new NDArray(A->dup('c'));
|
||||
toDelete.push_back(pA);
|
||||
aKcont = true;
|
||||
}
|
||||
if(!bKcont && !bNcont) {
|
||||
pB = B->dup('c');
|
||||
pB = new NDArray(B->dup('c'));
|
||||
toDelete.push_back(pB);
|
||||
bNcont = true;
|
||||
}
|
||||
|
|
|
@ -82,7 +82,7 @@ namespace nd4j {
|
|||
// now we actually apply quantization
|
||||
auto func = PRAGMA_THREADS_FOR {
|
||||
for (auto e = start; e < stop; e += increment) {
|
||||
rz[e] = static_cast<char>(nd4j::math::nd4j_round<float, char>(1.0f * x[e] / nd4j::math::nd4j_max<float>(amax, amin) * max_byte));
|
||||
rz[e] = static_cast<char>(nd4j::math::nd4j_round<float, char>( 1.0f * static_cast<float>(x[e]) / nd4j::math::nd4j_max<float>(amax, amin) * max_byte));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -180,7 +180,7 @@ PRAGMA_OMP_ATOMIC_ARGS(write)
|
|||
for (auto e = start; e < stop; e += increment) {
|
||||
int el = x[e];
|
||||
int ael = nd4j::math::nd4j_abs<int>(el) - 1;
|
||||
z[ael] += el > 0 ? threshold : -threshold;
|
||||
z[ael] += el > 0 ? static_cast<T>(threshold) : static_cast<T>(-threshold);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -32,21 +32,19 @@ namespace nd4j {
|
|||
REQUIRE_TRUE(x->isR(), 0, "CRELU: input must be real type");
|
||||
|
||||
auto tmp = x->dup();
|
||||
tmp->applyTransform(nd4j::transform::Neg, nullptr, nullptr);
|
||||
tmp.applyTransform(nd4j::transform::Neg, tmp);
|
||||
|
||||
auto z = OUTPUT_VARIABLE(0);
|
||||
|
||||
helpers::concat(block.launchContext(), {x, tmp}, *z, x->rankOf()-1);
|
||||
helpers::concat(block.launchContext(), {x, &tmp}, *z, x->rankOf()-1);
|
||||
// NDArrayFactory<T>::concat({x, tmp}, -1, z);
|
||||
|
||||
// TODO: make this configurable?
|
||||
double threshold = 0.0;
|
||||
z->applyScalar(nd4j::scalar::RELU, threshold);
|
||||
z->applyScalar(nd4j::scalar::RELU, threshold, *z);
|
||||
|
||||
STORE_RESULT(z);
|
||||
|
||||
delete tmp;
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -94,7 +92,7 @@ namespace nd4j {
|
|||
auto pos = dec->at(0);
|
||||
auto neg = dec->at(1);
|
||||
|
||||
pos->applyPairwiseTransform(nd4j::pairwise::Subtract, neg, epsilon, nullptr);
|
||||
pos->applyPairwiseTransform(nd4j::pairwise::Subtract, *neg, *epsilon);
|
||||
|
||||
delete tmpResult;
|
||||
delete dec;
|
||||
|
|
|
@ -31,7 +31,7 @@ namespace nd4j {
|
|||
auto input = INPUT_VARIABLE(0);
|
||||
auto output = OUTPUT_VARIABLE(0);
|
||||
|
||||
input->applyTransform(nd4j::transform::Cube, output, nullptr);
|
||||
input->applyTransform(nd4j::transform::Cube, *output);
|
||||
STORE_RESULT(output);
|
||||
|
||||
return Status::OK();
|
||||
|
|
|
@ -32,7 +32,7 @@ namespace nd4j {
|
|||
|
||||
const auto alpha = block.numT() > 0 ? T_ARG(0) : 1.f;
|
||||
|
||||
input->applyScalar(nd4j::scalar::ELU, alpha, output);
|
||||
input->applyScalar(nd4j::scalar::ELU, alpha, *output);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace nd4j {
|
|||
auto input = INPUT_VARIABLE(0);
|
||||
auto output = OUTPUT_VARIABLE(0);
|
||||
|
||||
input->applyTransform(nd4j::transform::HardSigmoid, output, nullptr);
|
||||
input->applyTransform(nd4j::transform::HardSigmoid, *output);
|
||||
STORE_RESULT(output);
|
||||
|
||||
return Status::OK();
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace nd4j {
|
|||
auto input = INPUT_VARIABLE(0);
|
||||
auto output = OUTPUT_VARIABLE(0);
|
||||
|
||||
input->applyTransform(nd4j::transform::HardTanh, output, nullptr);
|
||||
input->applyTransform(nd4j::transform::HardTanh, *output);
|
||||
STORE_RESULT(output);
|
||||
|
||||
return Status::OK();
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace nd4j {
|
|||
auto z = this->getZ(block);
|
||||
|
||||
// just for lulz
|
||||
first->applyTransform(nd4j::transform::Identity, z, nullptr);
|
||||
first->applyTransform(nd4j::transform::Identity, *z);
|
||||
|
||||
STORE_RESULT(*z);
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ namespace nd4j {
|
|||
auto x = INPUT_VARIABLE(i);
|
||||
auto z = OUTPUT_VARIABLE(i);
|
||||
|
||||
x->applyTransform(transform::Identity, z, nullptr);
|
||||
x->applyTransform(transform::Identity, *z);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ namespace nd4j {
|
|||
|
||||
float alpha = block.numT() > 0 ? T_ARG(0) : 0.01f;
|
||||
|
||||
input->applyScalar(nd4j::scalar::LeakyRELU, alpha, output);
|
||||
input->applyScalar(nd4j::scalar::LeakyRELU, alpha, *output);
|
||||
STORE_RESULT(output);
|
||||
|
||||
return Status::OK();
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace nd4j {
|
|||
auto input = INPUT_VARIABLE(0);
|
||||
auto output = OUTPUT_VARIABLE(0);
|
||||
|
||||
input->applyTransform(nd4j::transform::RationalTanh, output, nullptr);
|
||||
input->applyTransform(nd4j::transform::RationalTanh, *output);
|
||||
STORE_RESULT(output);
|
||||
|
||||
return Status::OK();
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace nd4j {
|
|||
auto input = INPUT_VARIABLE(0);
|
||||
auto output = OUTPUT_VARIABLE(0);
|
||||
|
||||
input->applyTransform(nd4j::transform::RectifiedTanh, output, nullptr);
|
||||
input->applyTransform(nd4j::transform::RectifiedTanh, *output);
|
||||
STORE_RESULT(output);
|
||||
|
||||
return Status::OK();
|
||||
|
|
|
@ -32,7 +32,7 @@ namespace nd4j {
|
|||
|
||||
auto scalar = block.numT() > 0 ? block.getTArguments()->at(0) : 0.0;
|
||||
|
||||
first->applyScalar(nd4j::scalar::RELU, scalar, z);
|
||||
first->applyScalar(nd4j::scalar::RELU, scalar, *z);
|
||||
|
||||
STORE_RESULT(*z);
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ CONFIGURABLE_OP_IMPL(relu6, 1, 1, true, 1, 0) {
|
|||
auto input = INPUT_VARIABLE(0);
|
||||
auto output = OUTPUT_VARIABLE(0);
|
||||
|
||||
input->applyScalar(nd4j::scalar::RELU6, T_ARG(0), output);
|
||||
input->applyScalar(nd4j::scalar::RELU6, T_ARG(0), *output);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace nd4j {
|
|||
auto first = INPUT_VARIABLE(0);
|
||||
auto z = OUTPUT_VARIABLE(0);
|
||||
|
||||
first->applyTransform(nd4j::transform::SELU, z, nullptr);
|
||||
first->applyTransform(nd4j::transform::SELU, *z);
|
||||
|
||||
STORE_RESULT(*z);
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ namespace nd4j {
|
|||
auto first = INPUT_VARIABLE(0);
|
||||
auto z = OUTPUT_VARIABLE(0);
|
||||
|
||||
first->applyTransform(nd4j::transform::Sigmoid, z, nullptr);
|
||||
first->applyTransform(nd4j::transform::Sigmoid, *z);
|
||||
|
||||
STORE_RESULT(*z);
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace nd4j {
|
|||
auto first = INPUT_VARIABLE(0);
|
||||
auto z = OUTPUT_VARIABLE(0);
|
||||
|
||||
first->applyTransform(nd4j::transform::SoftPlus, z, nullptr);
|
||||
first->applyTransform(nd4j::transform::SoftPlus, *z);
|
||||
|
||||
STORE_RESULT(*z);
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace nd4j {
|
|||
auto first = INPUT_VARIABLE(0);
|
||||
auto z = OUTPUT_VARIABLE(0);
|
||||
|
||||
first->applyTransform(nd4j::transform::SoftSign, z, nullptr);
|
||||
first->applyTransform(nd4j::transform::SoftSign, *z);
|
||||
|
||||
STORE_RESULT(*z);
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace nd4j {
|
|||
auto first = INPUT_VARIABLE(0);
|
||||
auto z = OUTPUT_VARIABLE(0);
|
||||
|
||||
first->applyTransform(nd4j::transform::Tanh, z, nullptr);
|
||||
first->applyTransform(nd4j::transform::Tanh, *z);
|
||||
|
||||
STORE_RESULT(*z);
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ namespace nd4j {
|
|||
|
||||
BROADCAST_CHECK_EMPTY(x,y,z);
|
||||
|
||||
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::IntOps::IntAnd, pairwise::IntOps::IntAnd, broadcast::IntOps::IntAnd), y, z, false);
|
||||
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::IntOps::IntAnd, pairwise::IntOps::IntAnd, broadcast::IntOps::IntAnd), *y, *z, false);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ namespace nd4j {
|
|||
|
||||
BROADCAST_CHECK_EMPTY(x,y,z);
|
||||
|
||||
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::IntOps::IntOr, pairwise::IntOps::IntOr, broadcast::IntOps::IntOr), y, z, false);
|
||||
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::IntOps::IntOr, pairwise::IntOps::IntOr, broadcast::IntOps::IntOr), *y, *z, false);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ namespace nd4j {
|
|||
|
||||
BROADCAST_CHECK_EMPTY(x,y,z);
|
||||
|
||||
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::IntOps::IntXor, pairwise::IntOps::IntXor, broadcast::IntOps::IntXor), y, z, false);
|
||||
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::IntOps::IntXor, pairwise::IntOps::IntXor, broadcast::IntOps::IntXor), *y, *z, false);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ namespace nd4j {
|
|||
|
||||
BROADCAST_CHECK_EMPTY(x,y,z);
|
||||
|
||||
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::CyclicShiftRight, pairwise::CyclicShiftRight, broadcast::CyclicShiftRight), y, z, false);
|
||||
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::CyclicShiftRight, pairwise::CyclicShiftRight, broadcast::CyclicShiftRight), *y, *z, false);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ namespace nd4j {
|
|||
|
||||
BROADCAST_CHECK_EMPTY(x,y,z);
|
||||
|
||||
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::CyclicShiftLeft, pairwise::CyclicShiftLeft, broadcast::CyclicShiftLeft), y, z, false);
|
||||
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::CyclicShiftLeft, pairwise::CyclicShiftLeft, broadcast::CyclicShiftLeft), *y, *z, false);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ namespace nd4j {
|
|||
|
||||
BROADCAST_CHECK_EMPTY(x,y,z);
|
||||
|
||||
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::ShiftRight, pairwise::ShiftRight, broadcast::ShiftRight), y, z, false);
|
||||
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::ShiftRight, pairwise::ShiftRight, broadcast::ShiftRight), *y, *z, false);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ namespace nd4j {
|
|||
|
||||
BROADCAST_CHECK_EMPTY(x,y,z);
|
||||
|
||||
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::ShiftLeft, pairwise::ShiftLeft, broadcast::ShiftLeft), y, z, false);
|
||||
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::ShiftLeft, pairwise::ShiftLeft, broadcast::ShiftLeft), *y, *z, false);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -44,7 +44,7 @@ namespace nd4j {
|
|||
|
||||
ExtraArguments arguments({a});
|
||||
|
||||
y->applyPairwiseTransform(pairwise::Axpy, x, z, &arguments);
|
||||
y->applyPairwiseTransform(pairwise::Axpy, *x, *z, &arguments);
|
||||
|
||||
return ND4J_STATUS_OK;
|
||||
}
|
||||
|
|
|
@ -33,8 +33,12 @@ CUSTOM_OP_IMPL(svd, 1, 1, false, 0, 3) {
|
|||
const int rank = x->rankOf();
|
||||
REQUIRE_TRUE(rank >= 2 , 0, "SVD OP: the rank of input array must be >=2, but got %i instead!", rank);
|
||||
|
||||
const bool fullUV = (bool)INT_ARG(0);
|
||||
bool fullUV = (bool)INT_ARG(0);
|
||||
const bool calcUV = (bool)INT_ARG(1);
|
||||
|
||||
if(calcUV == false)
|
||||
fullUV = false;
|
||||
|
||||
const int switchNum = INT_ARG(2);
|
||||
|
||||
// #ifndef __CUDABLAS__
|
||||
|
|
|
@ -29,7 +29,7 @@ namespace nd4j {
|
|||
auto x = INPUT_VARIABLE(0);
|
||||
auto z = OUTPUT_VARIABLE(0);
|
||||
|
||||
x->applyTransform(transform::Not, z, nullptr);
|
||||
x->applyTransform(transform::Not, *z);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -70,17 +70,13 @@ namespace nd4j {
|
|||
auto tadsY = y->allTensorsAlongDimension(dims);
|
||||
auto tadsZ = z->allTensorsAlongDimension(dims);
|
||||
|
||||
for (int e = 0; e < tadsX->size(); e++) {
|
||||
for (int e = 0; e < tadsX.size(); e++) {
|
||||
if (!cond->e<bool>(e)) {
|
||||
tadsZ->at(e)->assign(tadsY->at(e));
|
||||
tadsZ.at(e)->assign(tadsY.at(e));
|
||||
} else {
|
||||
tadsZ->at(e)->assign(tadsX->at(e));
|
||||
tadsZ.at(e)->assign(tadsX.at(e));
|
||||
}
|
||||
}
|
||||
|
||||
delete tadsX;
|
||||
delete tadsY;
|
||||
delete tadsZ;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -59,17 +59,13 @@ namespace nd4j {
|
|||
auto tadsY = y->allTensorsAlongDimension(dims);
|
||||
auto tadsZ = z->allTensorsAlongDimension(dims);
|
||||
|
||||
for (int e = 0; e < tadsX->size(); e++) {
|
||||
for (int e = 0; e < tadsX.size(); e++) {
|
||||
if (!condition->e<bool>(e)) {
|
||||
tadsZ->at(e)->assign(tadsY->at(e));
|
||||
tadsZ.at(e)->assign(tadsY.at(e));
|
||||
} else {
|
||||
tadsZ->at(e)->assign(tadsX->at(e));
|
||||
tadsZ.at(e)->assign(tadsX.at(e));
|
||||
}
|
||||
}
|
||||
|
||||
delete tadsX;
|
||||
delete tadsY;
|
||||
delete tadsZ;
|
||||
}
|
||||
} else {
|
||||
// in this case we return 2D matrix, which basically contains coordinates fo true
|
||||
|
|
|
@ -89,16 +89,12 @@ namespace nd4j {
|
|||
auto tadsY = y->allTensorsAlongDimension(dims);
|
||||
auto tadsZ = z->allTensorsAlongDimension(dims);
|
||||
|
||||
for (int e = 0; e < tadsX->size(); e++) {
|
||||
for (int e = 0; e < tadsX.size(); e++) {
|
||||
if (!condition->e<bool>(e))
|
||||
tadsZ->at(e)->assign(tadsY->at(e));
|
||||
tadsZ.at(e)->assign(tadsY.at(e));
|
||||
else
|
||||
tadsZ->at(e)->assign(tadsX->at(e));
|
||||
tadsZ.at(e)->assign(tadsX.at(e));
|
||||
}
|
||||
|
||||
delete tadsX;
|
||||
delete tadsY;
|
||||
delete tadsZ;
|
||||
}
|
||||
} else {
|
||||
// in this case we return 2D matrix, which basically contains coordinates fo true
|
||||
|
|
|
@ -82,14 +82,12 @@ namespace nd4j {
|
|||
if (axisX.size() > 0) {
|
||||
auto sum = epsNext->reduceAlongDimension(nd4j::reduce::Sum, axisX);
|
||||
gradX->assign(sum);
|
||||
delete sum;
|
||||
} else
|
||||
gradX->assign(epsNext);
|
||||
|
||||
if (axisY.size() > 0) {
|
||||
auto sum = epsNext->reduceAlongDimension(nd4j::reduce::Sum, axisY);
|
||||
gradY->assign(sum);
|
||||
delete sum;
|
||||
} else
|
||||
gradY->assign(epsNext);
|
||||
}
|
||||
|
|
|
@ -80,7 +80,6 @@ namespace nd4j {
|
|||
if (axisY.size() > 0) {
|
||||
auto sum = epsNext->reduceAlongDimension(nd4j::reduce::Sum, axisY);
|
||||
gradY->assign(sum);
|
||||
delete sum;
|
||||
} else
|
||||
gradY->assign(epsNext);
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ BROADCASTABLE_OP_IMPL(tf_atan2, 0, 0) {
|
|||
BROADCAST_CHECK_EMPTY(x,y,z);
|
||||
|
||||
// auto tZ = BroadcastHelper<T>::template broadcastApply<simdOps::Atan2<T>>(y, x, z);
|
||||
x->applyTrueBroadcast(nd4j::BroadcastOpsTuple::custom(scalar::Atan2, pairwise::Atan2, broadcast::Atan2), y, z, true);
|
||||
x->applyTrueBroadcast(nd4j::BroadcastOpsTuple::custom(scalar::Atan2, pairwise::Atan2, broadcast::Atan2), *y, *z, true);
|
||||
|
||||
// if (tZ == nullptr)
|
||||
// return ND4J_STATUS_KERNEL_FAILURE;
|
||||
|
|
|
@ -81,7 +81,7 @@ namespace nd4j {
|
|||
// Y gradient
|
||||
//epsNext->applyTriplewiseLambda(x, y, lambdaY, gradY);
|
||||
gradY->assign((*epsNext) * (*x) / ((*y) * (*y)));
|
||||
gradY->applyTransform(transform::Neg, nullptr, nullptr);
|
||||
gradY->applyTransform(transform::Neg, *gradY);
|
||||
|
||||
} else if (y->isScalar()) {
|
||||
// scalar case
|
||||
|
@ -91,17 +91,17 @@ namespace nd4j {
|
|||
//tmpX.printBuffer("SumX");
|
||||
//tmp.printBuffer("Sum Eps");
|
||||
gradY->assign(tmp * tmpX / ((*y) * (*y)));
|
||||
gradY->applyTransform(transform::Neg, nullptr, nullptr);
|
||||
gradY->applyTransform(transform::Neg, *gradY);
|
||||
|
||||
//epsNext->applyLambda(lambdaS, gradX);
|
||||
epsNext->applyScalarArr(scalar::Divide, y, gradX, nullptr);
|
||||
//epsNext->applyLambda(lambdaS, *gradX);
|
||||
epsNext->applyScalarArr(scalar::Divide, *y, *gradX);
|
||||
} else {
|
||||
// broadcast case
|
||||
|
||||
auto preX = *epsNext / *y;
|
||||
|
||||
NDArray negX(*x);
|
||||
x->applyTransform(transform::Neg, &negX);
|
||||
x->applyTransform(transform::Neg, negX);
|
||||
auto preY = *epsNext * negX / ((*y) * (*y));
|
||||
|
||||
auto axisX = ShapeUtils::evalBroadcastBackwardAxis(x->shapeInfo(), epsNext->shapeInfo());
|
||||
|
@ -110,14 +110,12 @@ namespace nd4j {
|
|||
if (axisX.size() > 0) {
|
||||
auto sum = preX.reduceAlongDimension(reduce::Sum, axisX);
|
||||
gradX->assign(sum);
|
||||
delete sum;
|
||||
} else
|
||||
gradX->assign(preX);
|
||||
|
||||
if (axisY.size() > 0) {
|
||||
auto sum = preY.reduceAlongDimension(reduce::Sum, axisY);
|
||||
gradY->assign(sum);
|
||||
delete sum;
|
||||
} else
|
||||
gradY->assign(preY);
|
||||
}
|
||||
|
|
|
@ -69,7 +69,7 @@ namespace nd4j {
|
|||
std::unique_ptr<ResultSet> tmpResult(op.execute({x, y}, {}, {}, {}));
|
||||
|
||||
if (gradY->rankOf() == gradX->rankOf())
|
||||
epsNext->applyPairwiseTransform(pairwise::Multiply, tmpResult->at(0), gradY, nullptr);
|
||||
epsNext->applyPairwiseTransform(pairwise::Multiply, *tmpResult->at(0), *gradY);
|
||||
else // epsNext is greater than gradY
|
||||
{
|
||||
std::vector<Nd4jLong> dims(epsNext->rankOf() * 2);
|
||||
|
@ -78,7 +78,7 @@ namespace nd4j {
|
|||
dims[d * 2 + 1] = 1;
|
||||
}
|
||||
auto tempIn((*tmpResult->at(0))(dims));
|
||||
(*epsNext)(dims).applyPairwiseTransform(pairwise::Multiply, &tempIn, gradY, nullptr);
|
||||
(*epsNext)(dims).applyPairwiseTransform(pairwise::Multiply, tempIn, *gradY);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -79,24 +79,24 @@ CUSTOM_OP_IMPL(multiply_bp, 3, 2, false, 0, 0) {
|
|||
const Nd4jLong yLen = y->lengthOf();
|
||||
|
||||
if(x->isScalar() && y->isScalar()) { // both are scalars
|
||||
y->applyPairwiseTransform(pairwise::Multiply, dLdz, dLdx, nullptr);
|
||||
x->applyPairwiseTransform(pairwise::Multiply, dLdz, dLdy, nullptr);
|
||||
y->applyPairwiseTransform(pairwise::Multiply, *dLdz, *dLdx);
|
||||
x->applyPairwiseTransform(pairwise::Multiply, *dLdz, *dLdy);
|
||||
//dLdx->assign((*y) * (*dLdz));
|
||||
//dLdy->assign((*x) * (*dLdz));
|
||||
|
||||
}
|
||||
else if(x->isScalar()) { // x is scalar and y is not
|
||||
dLdx->assign((*y * *dLdz).reduceNumber(reduce::Sum));
|
||||
dLdz->applyScalarArr(scalar::Multiply, x, dLdy, nullptr);
|
||||
dLdz->applyScalarArr(scalar::Multiply, *x, *dLdy);
|
||||
//dLdz->applyTrueBroadcast(broadcast::Multiply, x, dLdy, true);
|
||||
}
|
||||
else if(y->isScalar()) { // y is scalar and x is not
|
||||
dLdy->assign((*x * *dLdz).reduceNumber(reduce::Sum));
|
||||
dLdz->applyScalarArr(scalar::Multiply, y, dLdx);
|
||||
dLdz->applyScalarArr(scalar::Multiply, *y, *dLdx);
|
||||
}
|
||||
else if(x->isSameShape(y)) {
|
||||
x->applyPairwiseTransform(pairwise::Multiply, dLdz, dLdy, nullptr);
|
||||
y->applyPairwiseTransform(pairwise::Multiply, dLdz, dLdx, nullptr);
|
||||
x->applyPairwiseTransform(pairwise::Multiply, *dLdz, *dLdy);
|
||||
y->applyPairwiseTransform(pairwise::Multiply, *dLdz, *dLdx);
|
||||
}
|
||||
else if (x->isSameShape(dLdz)) {
|
||||
|
||||
|
@ -104,8 +104,8 @@ CUSTOM_OP_IMPL(multiply_bp, 3, 2, false, 0, 0) {
|
|||
y->tile(yTiled);
|
||||
std::vector<int> axesForY = ShapeUtils::evalBroadcastBackwardAxis(y->getShapeInfo(), dLdz->getShapeInfo());
|
||||
|
||||
dLdy->assign( (*x * *dLdz).reduceAlongDims(reduce::Sum, axesForY) );
|
||||
yTiled.applyPairwiseTransform(pairwise::Multiply, dLdz, dLdx, nullptr);
|
||||
dLdy->assign( (*x * *dLdz).reduceAlongDimension(reduce::Sum, axesForY) );
|
||||
yTiled.applyPairwiseTransform(pairwise::Multiply, *dLdz, *dLdx);
|
||||
}
|
||||
else if (y->isSameShape(dLdz)) {
|
||||
|
||||
|
@ -113,8 +113,8 @@ CUSTOM_OP_IMPL(multiply_bp, 3, 2, false, 0, 0) {
|
|||
x->tile(xTiled);
|
||||
std::vector<int> axesForX = ShapeUtils::evalBroadcastBackwardAxis(x->getShapeInfo(), dLdz->getShapeInfo());
|
||||
|
||||
dLdx->assign( (*y * *dLdz).reduceAlongDims(reduce::Sum, axesForX) );
|
||||
xTiled.applyPairwiseTransform(pairwise::Multiply, dLdz, dLdy, nullptr);
|
||||
dLdx->assign( (*y * *dLdz).reduceAlongDimension(reduce::Sum, axesForX) );
|
||||
xTiled.applyPairwiseTransform(pairwise::Multiply, *dLdz, *dLdy);
|
||||
}
|
||||
else {
|
||||
|
||||
|
@ -125,8 +125,8 @@ CUSTOM_OP_IMPL(multiply_bp, 3, 2, false, 0, 0) {
|
|||
std::vector<int> axesForX = ShapeUtils::evalBroadcastBackwardAxis(x->getShapeInfo(), dLdz->getShapeInfo());
|
||||
std::vector<int> axesForY = ShapeUtils::evalBroadcastBackwardAxis(y->getShapeInfo(), dLdz->getShapeInfo());
|
||||
|
||||
dLdx->assign( (*y * *dLdz).reduceAlongDims(reduce::Sum, axesForX) );
|
||||
dLdy->assign( (*x * *dLdz).reduceAlongDims(reduce::Sum, axesForY) );
|
||||
dLdx->assign( (*y * *dLdz).reduceAlongDimension(reduce::Sum, axesForX) );
|
||||
dLdy->assign( (*x * *dLdz).reduceAlongDimension(reduce::Sum, axesForY) );
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
|
@ -182,7 +182,7 @@ DECLARE_SHAPE_FN(multiply_bp) {
|
|||
T tmpX = x->template reduceNumber<simdOps::Sum<T>>();
|
||||
gradY->assign(tmpX);
|
||||
|
||||
epsNext->applyLambda(lambdaS, gradX);
|
||||
epsNext->applyLambda(lambdaS, *gradX);
|
||||
} else {
|
||||
// broadcast case
|
||||
|
||||
|
|
|
@ -71,7 +71,7 @@ namespace nd4j {
|
|||
|
||||
// X gradient
|
||||
//epsNext->applyPairwiseLambda(y, lambdaX, gradX);
|
||||
epsNext->applyPairwiseTransform(pairwise::Divide, y, gradX, nullptr);
|
||||
epsNext->applyPairwiseTransform(pairwise::Divide, *y, *gradX);
|
||||
|
||||
// Y gradient
|
||||
//epsNext->applyTriplewiseLambda(x, y, lambdaY, gradY);
|
||||
|
@ -86,14 +86,14 @@ namespace nd4j {
|
|||
gradY->assign(tmp * -tmpX / ((*y) * (*y)));
|
||||
|
||||
//epsNext->applyLambda(lambdaS, gradX);
|
||||
epsNext->applyScalarArr(scalar::Divide, y, gradX, nullptr);
|
||||
epsNext->applyScalarArr(scalar::Divide, *y, *gradX);
|
||||
} else {
|
||||
// broadcast case
|
||||
|
||||
auto preX = *epsNext / *y;
|
||||
|
||||
NDArray negX(*x);
|
||||
x->applyTransform(transform::Neg, &negX);
|
||||
x->applyTransform(transform::Neg, negX);
|
||||
auto preY = *epsNext * negX / ((*y) * (*y));
|
||||
|
||||
auto axisX = ShapeUtils::evalBroadcastBackwardAxis(x->shapeInfo(), epsNext->shapeInfo());
|
||||
|
@ -102,14 +102,12 @@ namespace nd4j {
|
|||
if (axisX.size() > 0) {
|
||||
auto sum = preX.reduceAlongDimension(reduce::Sum, axisX);
|
||||
gradX->assign(sum);
|
||||
delete sum;
|
||||
} else
|
||||
gradX->assign(preX);
|
||||
|
||||
if (axisY.size() > 0) {
|
||||
auto sum = preY.reduceAlongDimension(reduce::Sum, axisY);
|
||||
gradY->assign(sum);
|
||||
delete sum;
|
||||
} else
|
||||
gradY->assign(preY);
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ namespace nd4j {
|
|||
BROADCAST_CHECK_EMPTY(x,y,z);
|
||||
|
||||
REQUIRE_TRUE(!x->isB(), 0, "REVERSEDIVIDE OP: you can't divide by bool array!");
|
||||
x->applyTrueBroadcast(BROADCAST(ReverseDivide), y, z, true);
|
||||
x->applyTrueBroadcast(BROADCAST(ReverseDivide), *y, *z, true);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -67,7 +67,7 @@ namespace nd4j {
|
|||
// X gradient
|
||||
//epsNext->applyTriplewiseLambda(x, y, lambdaX, gradX);
|
||||
gradX->assign((*epsNext) * (*y) / ((*x) * (*x)));
|
||||
gradX->applyTransform(transform::Neg, nullptr, nullptr);
|
||||
gradX->applyTransform(transform::Neg, *gradX);
|
||||
// Y gradient
|
||||
//epsNext->applyPairwiseLambda(x, lambdaY, gradY);
|
||||
gradY->assign((*epsNext) / (*x));
|
||||
|
@ -78,14 +78,14 @@ namespace nd4j {
|
|||
gradY->assign(tmp / tmpX);
|
||||
|
||||
gradX->assign((*epsNext) * (*y) / ((*x) * (*x)));
|
||||
gradX->applyTransform(transform::Neg, nullptr, nullptr);
|
||||
gradX->applyTransform(transform::Neg, *gradX);
|
||||
} else {
|
||||
// broadcast case
|
||||
|
||||
auto preY = (*epsNext) / (*x);
|
||||
|
||||
auto preX = *epsNext * (*y) / ((*x) * (*x));
|
||||
preX.applyTransform(transform::Neg, nullptr, nullptr);
|
||||
preX.applyTransform(transform::Neg, preX);
|
||||
|
||||
auto axisX = ShapeUtils::evalBroadcastBackwardAxis(x->shapeInfo(), epsNext->shapeInfo());
|
||||
auto axisY = ShapeUtils::evalBroadcastBackwardAxis(y->shapeInfo(), epsNext->shapeInfo());
|
||||
|
@ -93,14 +93,12 @@ namespace nd4j {
|
|||
if (axisX.size() > 0) {
|
||||
auto sum = preX.reduceAlongDimension(reduce::Sum, axisX);
|
||||
gradX->assign(sum);
|
||||
delete sum;
|
||||
} else
|
||||
gradX->assign(preX);
|
||||
|
||||
if (axisY.size() > 0) {
|
||||
auto sum = preY.reduceAlongDimension(reduce::Sum, axisY);
|
||||
gradY->assign(sum);
|
||||
delete sum;
|
||||
} else
|
||||
gradY->assign(preY);
|
||||
}
|
||||
|
|
|
@ -61,13 +61,13 @@ namespace nd4j {
|
|||
|
||||
if (x->isSameShape(y)) {
|
||||
// PWT case case
|
||||
epsNext->applyTransform(transform::Neg, gradX, nullptr);
|
||||
epsNext->applyTransform(transform::Neg, *gradX);
|
||||
gradY->assign(epsNext);
|
||||
} else if (y->isScalar()) {
|
||||
// scalar case
|
||||
auto tmp = epsNext->reduceNumber(reduce::Sum);
|
||||
gradY->assign(tmp);
|
||||
epsNext->applyTransform(transform::Neg, gradX, nullptr);
|
||||
epsNext->applyTransform(transform::Neg, *gradX);
|
||||
} else {
|
||||
// broadcastable
|
||||
auto axisX = ShapeUtils::evalBroadcastBackwardAxis(x->shapeInfo(), epsNext->shapeInfo());
|
||||
|
@ -75,16 +75,14 @@ namespace nd4j {
|
|||
|
||||
if (axisX.size() > 0) {
|
||||
auto sum = epsNext->reduceAlongDimension(reduce::Sum, axisX);
|
||||
sum->applyTransform(transform::Neg, gradX);
|
||||
delete sum;
|
||||
sum.applyTransform(transform::Neg, *gradX);
|
||||
} else {
|
||||
epsNext->applyTransform(transform::Neg, gradX, nullptr);
|
||||
epsNext->applyTransform(transform::Neg, *gradX);
|
||||
}
|
||||
|
||||
if (axisY.size() > 0) {
|
||||
auto sum = epsNext->reduceAlongDimension(reduce::Sum, axisY);
|
||||
gradY->assign(sum);
|
||||
delete sum;
|
||||
} else {
|
||||
gradY->assign(epsNext);
|
||||
}
|
||||
|
|
|
@ -98,37 +98,31 @@ namespace nd4j {
|
|||
|
||||
auto targetShape = epsNext->getShapeAsVector();
|
||||
|
||||
preX->tileToShape(targetShape);
|
||||
preY->tileToShape(targetShape);
|
||||
preX.tileToShape(targetShape, preX);
|
||||
preY.tileToShape(targetShape, preY);
|
||||
|
||||
|
||||
//epsNext->applyTriplewiseLambda(x, y, lambdaX, preX);
|
||||
//epsNext->applyTriplewiseLambda(x, y, lambdaY, preY);
|
||||
auto resX = (*epsNext) * ts * ((*x) - (*y));
|
||||
preX->assign(resX);
|
||||
preX.assign(resX);
|
||||
auto resY = (*epsNext) * ts * ((*y) - (*x));
|
||||
preY->assign(resY);
|
||||
preY.assign(resY);
|
||||
|
||||
auto axisX = ShapeUtils::evalBroadcastBackwardAxis(x->shapeInfo(), epsNext->shapeInfo());
|
||||
auto axisY = ShapeUtils::evalBroadcastBackwardAxis(y->shapeInfo(), epsNext->shapeInfo());
|
||||
|
||||
if (axisX.size() > 0) {
|
||||
auto sum = preX->reduceAlongDimension(reduce::Sum, axisX);
|
||||
auto sum = preX.reduceAlongDimension(reduce::Sum, axisX);
|
||||
gradX->assign(sum);
|
||||
delete sum;
|
||||
} else
|
||||
gradX->assign(preX);
|
||||
|
||||
if (axisY.size() > 0) {
|
||||
auto sum = preY->reduceAlongDimension(reduce::Sum, axisY);
|
||||
auto sum = preY.reduceAlongDimension(reduce::Sum, axisY);
|
||||
gradY->assign(sum);
|
||||
delete sum;
|
||||
} else
|
||||
gradY->assign(preY);
|
||||
|
||||
|
||||
delete preX;
|
||||
delete preY;
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
|
|
|
@ -62,7 +62,7 @@ namespace nd4j {
|
|||
|
||||
if (x->isSameShape(y)) {
|
||||
// PWT case case
|
||||
epsNext->applyTransform(transform::Neg, gradY, nullptr);
|
||||
epsNext->applyTransform(transform::Neg, *gradY);
|
||||
gradX->assign(epsNext);
|
||||
} else if (y->isScalar()) {
|
||||
// scalar case
|
||||
|
@ -77,16 +77,14 @@ namespace nd4j {
|
|||
if (axisX.size() > 0) {
|
||||
auto sum = epsNext->reduceAlongDimension(reduce::Sum, axisX);
|
||||
gradX->assign(sum);
|
||||
delete sum;
|
||||
} else
|
||||
gradX->assign(epsNext);
|
||||
|
||||
if (axisY.size() > 0) {
|
||||
auto sum = epsNext->reduceAlongDimension(reduce::Sum, axisY);
|
||||
sum->applyTransform(transform::Neg, gradY);
|
||||
delete sum;
|
||||
sum.applyTransform(transform::Neg, *gradY);
|
||||
} else {
|
||||
epsNext->applyTransform(transform::Neg, gradY);
|
||||
epsNext->applyTransform(transform::Neg, *gradY);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -41,10 +41,10 @@ namespace nd4j {
|
|||
// but we'll ensure only one node is active, and other is disabled
|
||||
if (condition->e<int>(0) == 0) {
|
||||
block.setBranch(0);
|
||||
this->storeResult(block, 0, input->dup());
|
||||
this->storeResult(block, 0, new NDArray(input->dup()));
|
||||
} else {
|
||||
block.setBranch(1);
|
||||
this->storeResult(block, 1, *input->dup());
|
||||
this->storeResult(block, 1, new NDArray(input->dup()));
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
|
|
|
@ -42,34 +42,34 @@ namespace nd4j {
|
|||
std::unique_ptr<NDArray> ptr;
|
||||
if (!Environment::getInstance()->isExperimentalBuild()) {
|
||||
if (y->dataType() != x->dataType()) {
|
||||
y = y->cast(x->dataType());
|
||||
y = new NDArray(y->cast(x->dataType()));
|
||||
std::unique_ptr<NDArray> ptr2(y);
|
||||
ptr.swap(ptr2);
|
||||
}
|
||||
}
|
||||
|
||||
if (!x->isScalar() && !y->isScalar() && x->isSameShape(y)) {
|
||||
x->applyPairwiseTransform(op.p, y, z, nullptr);
|
||||
x->applyPairwiseTransform(op.p, *y, *z);
|
||||
} else if (!x->isScalar() && y->isScalar()) {
|
||||
x->applyScalarArr(op.s, const_cast<const NDArray*>(y), z);
|
||||
x->applyScalarArr(op.s, const_cast<const NDArray&>(*y), *z);
|
||||
} else if (x->isScalar() && !y->isScalar()) {
|
||||
if (z->isSameShape(y)) {
|
||||
if (op.s == scalar::Add || op.s == scalar::Multiply ) {
|
||||
y->applyScalarArr(op.s, x, z, nullptr);
|
||||
y->applyScalarArr(op.s, *x, *z);
|
||||
} else if (op.s == scalar::SquaredSubtract) {
|
||||
y->applyScalarArr(scalar::SquaredReverseSubtract, x, z, nullptr);
|
||||
y->applyScalarArr(scalar::SquaredReverseSubtract, *x, *z);
|
||||
} else if (op.s == scalar::Subtract) {
|
||||
y->applyScalarArr(scalar::ReverseSubtract, x, z, nullptr);
|
||||
y->applyScalarArr(scalar::ReverseSubtract, *x, *z);
|
||||
} else if (op.s == scalar::Divide) {
|
||||
y->applyScalarArr(scalar::ReverseDivide, x, z, nullptr);
|
||||
y->applyScalarArr(scalar::ReverseDivide, *x, *z);
|
||||
} else if (op.s == scalar::Pow) {
|
||||
y->applyScalarArr(scalar::ReversePow, x, z, nullptr);
|
||||
y->applyScalarArr(scalar::ReversePow, *x, *z);
|
||||
} else if (op.s == scalar::ReverseSubtract) {
|
||||
y->applyScalarArr(scalar::Subtract, x, z, nullptr);
|
||||
y->applyScalarArr(scalar::Subtract, *x, *z);
|
||||
} else if (op.s == scalar::ReverseDivide) {
|
||||
y->applyScalarArr(scalar::Divide, x, z, nullptr);
|
||||
y->applyScalarArr(scalar::Divide, *x, *z);
|
||||
} else if (op.s == scalar::MaxPairwise || op.s == scalar::MinPairwise || op.s == scalar::AMaxPairwise || op.s == scalar::AMinPairwise) {
|
||||
y->applyScalarArr(op.s, x, z, nullptr);
|
||||
y->applyScalarArr(op.s, *x, *z);
|
||||
} else if (op.s == scalar::CopyPws) {
|
||||
z->assign(y);
|
||||
} else {
|
||||
|
@ -84,9 +84,9 @@ namespace nd4j {
|
|||
return tZ;
|
||||
}
|
||||
} else if (x->isScalar() && y->isScalar()) { // x->isScalar() && y->isScalar()
|
||||
x->applyScalarArr(op.s, const_cast<const NDArray*>(y), z, nullptr);
|
||||
x->applyScalarArr(op.s, const_cast<const NDArray&>(*y), *z);
|
||||
} else if (ShapeUtils::areShapesBroadcastable(*x, *y)) {
|
||||
x->applyTrueBroadcast(op, y, z, true, extraArgs);
|
||||
x->applyTrueBroadcast(op, *y, *z, true, extraArgs);
|
||||
return z;
|
||||
} else {
|
||||
auto sx = ShapeUtils::shapeAsString(x);
|
||||
|
@ -107,16 +107,16 @@ namespace nd4j {
|
|||
}
|
||||
|
||||
if (!x->isScalar() && !y->isScalar() && x->isSameShape(y)) {
|
||||
x->applyPairwiseTransform(op.p, y, z, nullptr);
|
||||
x->applyPairwiseTransform(op.p, *y, *z);
|
||||
} else if (ShapeUtils::areShapesBroadcastable(*x, *y)) {
|
||||
x->applyTrueBroadcast(op, y, z, true, extraArgs);
|
||||
x->applyTrueBroadcast(op, *y, *z, true, extraArgs);
|
||||
return z;
|
||||
} else if (!x->isScalar() && y->isScalar()) {
|
||||
x->applyScalarArr(op.s, const_cast<const NDArray*>(y), z);
|
||||
x->applyScalarArr(op.s, const_cast<const NDArray&>(*y), *z);
|
||||
} else if (x->isScalar() && !y->isScalar()) {
|
||||
if (z->isSameShape(y)) {
|
||||
//z->assign(x);
|
||||
x->applyPairwiseTransform(op.p, y, z, extraArgs);
|
||||
x->applyPairwiseTransform(op.p, *y, *z, extraArgs);
|
||||
return z;
|
||||
} else {
|
||||
auto v = y->getShapeAsVector();
|
||||
|
@ -125,9 +125,9 @@ namespace nd4j {
|
|||
return tZ;
|
||||
}
|
||||
} else if (x->isScalar() && y->isScalar()) { // x->isScalar() && y->isScalar()
|
||||
x->applyScalarArr(op.s, const_cast<const NDArray*>(y), z, nullptr);
|
||||
x->applyScalarArr(op.s, const_cast<const NDArray&>(*y), *z);
|
||||
} else if (ShapeUtils::areShapesBroadcastable(*x, *y)) {
|
||||
x->applyTrueBroadcast(op, y, z, true, extraArgs);
|
||||
x->applyTrueBroadcast(op, *y, *z, true, extraArgs);
|
||||
return z;
|
||||
} else {
|
||||
auto sx = ShapeUtils::shapeAsString(x);
|
||||
|
|
|
@ -51,12 +51,12 @@ namespace nd4j {
|
|||
|
||||
std::vector<int> axis = ShapeUtils::evalDimsToExclude(array->rankOf(), {0});
|
||||
auto tads = array->allTensorsAlongDimension( axis);
|
||||
for (int e = 0; e < tads->size(); e++) {
|
||||
for (int e = 0; e < tads.size(); e++) {
|
||||
auto idx = indices->e<int>(e);
|
||||
if (idx >= tads->size())
|
||||
if (idx >= tads.size())
|
||||
return ND4J_STATUS_BAD_ARGUMENTS;
|
||||
|
||||
auto arr = tads->at(e)->dup(array->ordering());
|
||||
auto arr = new NDArray(tads.at(e)->dup(array->ordering()));
|
||||
auto res = list->write(idx, arr);
|
||||
if (res != ND4J_STATUS_OK)
|
||||
return res;
|
||||
|
@ -65,7 +65,6 @@ namespace nd4j {
|
|||
if (!hasList)
|
||||
//OVERWRITE_RESULT(list);
|
||||
setupResultList(list, block);
|
||||
delete tads;
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -66,7 +66,7 @@ namespace nd4j {
|
|||
|
||||
auto subarray = (*array)(indices);
|
||||
|
||||
auto status = list->write(e, subarray.dup(array->ordering()));
|
||||
auto status = list->write(e, new NDArray(subarray.dup(array->ordering())));
|
||||
|
||||
if (status != ND4J_STATUS_OK)
|
||||
return status;
|
||||
|
|
|
@ -39,7 +39,7 @@ namespace nd4j {
|
|||
//nd4j_printf("Writing [%i]:\n", idx->e<int>(0));
|
||||
//input->printShapeInfo("input shape");
|
||||
//input->printIndexedBuffer("input buffer");
|
||||
Nd4jStatus result = list->write(idx->e<int>(0), input->dup());
|
||||
Nd4jStatus result = list->write(idx->e<int>(0), new NDArray(input->dup()));
|
||||
|
||||
auto res = NDArrayFactory::create_(list->counter(), block.launchContext());
|
||||
//res->printShapeInfo("Write_list 2 output shape");
|
||||
|
@ -52,7 +52,7 @@ namespace nd4j {
|
|||
auto input = INPUT_VARIABLE(1);
|
||||
auto idx = INT_ARG(0);
|
||||
|
||||
Nd4jStatus result = list->write(idx, input->dup());
|
||||
Nd4jStatus result = list->write(idx, new NDArray(input->dup()));
|
||||
|
||||
auto res = NDArrayFactory::create_(list->counter(), block.launchContext());
|
||||
//res->printShapeInfo("Write_list 1 output shape");
|
||||
|
|
|
@ -169,10 +169,10 @@ CUSTOM_OP_IMPL(absolute_difference_loss_grad, 3, 3, false, 0, 1) {
|
|||
NDArray E = *predictions - *labels;
|
||||
|
||||
// dE_i/dp_i = sign(p_i - y_i)
|
||||
E.applyTransform(nd4j::transform::Sign, dLdp); // dE/dp
|
||||
E.applyTransform(nd4j::transform::Sign, *dLdp); // dE/dp
|
||||
// dE_i/dy_i = -sign(p_i - y_i)
|
||||
|
||||
E.applyTransform(nd4j::transform::Abs);
|
||||
E.applyTransform(nd4j::transform::Abs, E);
|
||||
|
||||
switch (reductionMode) {
|
||||
|
||||
|
@ -184,7 +184,7 @@ CUSTOM_OP_IMPL(absolute_difference_loss_grad, 3, 3, false, 0, 1) {
|
|||
dLdw->assign(E.reduceNumber(reduce::Sum));
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign(E);
|
||||
|
@ -210,7 +210,7 @@ CUSTOM_OP_IMPL(absolute_difference_loss_grad, 3, 3, false, 0, 1) {
|
|||
*dLdw = 0.;
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum));
|
||||
|
@ -238,7 +238,7 @@ CUSTOM_OP_IMPL(absolute_difference_loss_grad, 3, 3, false, 0, 1) {
|
|||
dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights));
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
*dLdw /= numOfNonZeroWeightsScalar;
|
||||
}
|
||||
else
|
||||
|
|
|
@ -56,7 +56,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss, 3, 1, false, 0, 2) {
|
|||
REQUIRE_TRUE(weights->isScalar() || ShapeUtils::areShapesBroadcastable(*weights, *output), 0, "COSINE_DISTANCE_LOSS OP: shapes of weights and output arrays should be broadcastable, but got weights = %s and output = %s instead!", ShapeUtils::shapeAsString(weights).c_str(), ShapeUtils::shapeAsString(labels).c_str());
|
||||
}
|
||||
|
||||
NDArray E = 1. - (*predictions * *labels).reduceAlongDims(reduce::Sum, {dim}, true);
|
||||
NDArray E = 1. - (*predictions * *labels).reduceAlongDimension(reduce::Sum, {dim}, true);
|
||||
|
||||
// perform weights broadcasting/tile to E if it is necessary
|
||||
auto weightsBroad = weights;
|
||||
|
@ -194,7 +194,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss_grad, 3, 3, false, 0, 2) {
|
|||
// input dimension can't be larger than labels/predictions/weights rank
|
||||
REQUIRE_TRUE(dim < labels->rankOf(), 0, "COSINE_DISTANCE_LOSS_GRAD OP: input reduction dimension (got %i) must be < labels rank %i!", dim, labels->rankOf());
|
||||
|
||||
NDArray E = 1. - (*predictions * *labels).reduceAlongDims(reduce::Sum, {dim}, true);
|
||||
NDArray E = 1. - (*predictions * *labels).reduceAlongDimension(reduce::Sum, {dim}, true);
|
||||
|
||||
// perform weights broadcasting/tile to E if it is necessary
|
||||
auto weightsBroad = weights;
|
||||
|
@ -216,7 +216,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss_grad, 3, 3, false, 0, 2) {
|
|||
else {
|
||||
if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign(E);
|
||||
|
@ -249,7 +249,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss_grad, 3, 3, false, 0, 2) {
|
|||
|
||||
if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum));
|
||||
|
@ -284,7 +284,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss_grad, 3, 3, false, 0, 2) {
|
|||
|
||||
if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
*dLdw /= numOfNonZeroWeights;
|
||||
}
|
||||
else
|
||||
|
|
|
@ -52,7 +52,7 @@ namespace nd4j {
|
|||
|
||||
// We first need to convert binary labels to -1/1 labels (as floats)
|
||||
NDArray E = 1.f - (*labels * 2.f - 1.f) * (*logits);
|
||||
E.applyScalar(scalar::RELU, 0.0f, &E);
|
||||
E.applyScalar(scalar::RELU, 0.0f, E);
|
||||
|
||||
// multiply E on weights
|
||||
E *= *weightsBroad;
|
||||
|
@ -172,11 +172,11 @@ namespace nd4j {
|
|||
NDArray z = (*labels * 2.f - 1.f);
|
||||
|
||||
NDArray E = 1.f - z * (*logits);
|
||||
E.applyScalar(scalar::RELU, 0.0f, &E);
|
||||
E.applyScalar(scalar::RELU, 0.0f, E);
|
||||
// turn E into gradient mask
|
||||
|
||||
NDArray gradientMask(E.getShapeInfo(), block.getWorkspace());
|
||||
E.applyTransform(nd4j::transform::Sign, &gradientMask);
|
||||
E.applyTransform(nd4j::transform::Sign, gradientMask);
|
||||
|
||||
dLdp->assign(-z * gradientMask);
|
||||
dLdl->assign(-2.f * (*logits) * gradientMask);
|
||||
|
@ -192,7 +192,7 @@ namespace nd4j {
|
|||
dLdw->assign(E.reduceNumber(reduce::Sum));
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign(E);
|
||||
|
@ -220,7 +220,7 @@ namespace nd4j {
|
|||
*dLdw = 0.;
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum));
|
||||
|
@ -249,7 +249,7 @@ namespace nd4j {
|
|||
dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights));
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
*dLdw /= numOfNonZeroWeightsScalar;
|
||||
}
|
||||
else
|
||||
|
|
|
@ -53,9 +53,9 @@ CUSTOM_OP_IMPL(huber_loss, 3, 1, false, 1, 1) {
|
|||
weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo()));
|
||||
|
||||
auto error = *predictions - *labels;
|
||||
error.applyTransform(transform::Abs);
|
||||
error.applyTransform(transform::Abs, error);
|
||||
NDArray quadratic(error.getShapeInfo(), block.getWorkspace());
|
||||
error.applyScalar(scalar::MinPairwise, delta, &quadratic);
|
||||
error.applyScalar(scalar::MinPairwise, delta, quadratic);
|
||||
|
||||
NDArray E = quadratic * quadratic * 0.5f + (error - quadratic)*delta;
|
||||
|
||||
|
@ -173,24 +173,24 @@ DECLARE_SHAPE_FN(huber_loss) {
|
|||
|
||||
NDArray diff = *predictions - *labels;
|
||||
NDArray absDiff(diff);
|
||||
absDiff.applyTransform(transform::Abs);
|
||||
absDiff.applyTransform(transform::Abs, absDiff);
|
||||
NDArray quadratic(absDiff);
|
||||
absDiff.applyScalar(scalar::MinPairwise, delta, &quadratic);
|
||||
absDiff.applyScalar(scalar::MinPairwise, delta, quadratic);
|
||||
|
||||
NDArray E = quadratic * quadratic * 0.5f + (absDiff - quadratic)*delta;
|
||||
|
||||
NDArray lteMask(diff.getShapeInfo(), BOOL, true, block.launchContext());
|
||||
absDiff.applyScalar(scalar::LessThanOrEqual, delta, <eMask);
|
||||
absDiff.applyScalar(scalar::LessThanOrEqual, delta, lteMask);
|
||||
|
||||
NDArray gtMask(diff.getShapeInfo(), BOOL, true, block.launchContext());
|
||||
absDiff.applyScalar(scalar::GreaterThan, delta, >Mask);
|
||||
absDiff.applyScalar(scalar::GreaterThan, delta, gtMask);
|
||||
|
||||
NDArray signDiff(diff);
|
||||
diff.applyTransform(transform::Sign, &signDiff);
|
||||
diff.applyTransform(transform::Sign, signDiff);
|
||||
|
||||
|
||||
auto gtMaskFloat = *gtMask.cast(diff.dataType());
|
||||
auto lteMaskFloat = *lteMask.cast(diff.dataType());
|
||||
auto gtMaskFloat = gtMask.cast(diff.dataType());
|
||||
auto lteMaskFloat = lteMask.cast(diff.dataType());
|
||||
|
||||
|
||||
dLdp->assign( lteMaskFloat * diff + gtMaskFloat * delta * signDiff);
|
||||
|
@ -207,7 +207,7 @@ DECLARE_SHAPE_FN(huber_loss) {
|
|||
dLdw->assign(E.reduceNumber(reduce::Sum));
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign(E);
|
||||
|
@ -235,7 +235,7 @@ DECLARE_SHAPE_FN(huber_loss) {
|
|||
*dLdw = 0.;
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum));
|
||||
|
@ -264,7 +264,7 @@ DECLARE_SHAPE_FN(huber_loss) {
|
|||
dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights));
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
*dLdw /= numOfNonZeroWeightsScalar;
|
||||
}
|
||||
else
|
||||
|
|
|
@ -181,7 +181,7 @@ CUSTOM_OP_IMPL(log_loss_grad, 3, 3, false, 1, 1) {
|
|||
// dE_i/dp_i = (1-y_i)/(1-p_i+eps) - y_i/(p_i+eps)
|
||||
dLdp->assign(oneMinusLabels / onePlusEpsMinusPredict - *labels / predictPlusEps); // dE/dp
|
||||
// dE_i/dy_i = log((1+2eps)/(p_i+eps) - 1)
|
||||
((1. + 2. * epsilon) / predictPlusEps - 1.).applyTransform(transform::Log, dLdl); // dE/dy
|
||||
((1. + 2. * epsilon) / predictPlusEps - 1.).applyTransform(transform::Log, *dLdl); // dE/dy
|
||||
|
||||
NDArray E = -(*labels) * predictPlusEps.transform(transform::Log) - oneMinusLabels * onePlusEpsMinusPredict.transform(transform::Log);
|
||||
|
||||
|
@ -196,7 +196,7 @@ CUSTOM_OP_IMPL(log_loss_grad, 3, 3, false, 1, 1) {
|
|||
dLdw->assign(E.reduceNumber(reduce::Sum));
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign(E);
|
||||
|
@ -226,7 +226,7 @@ CUSTOM_OP_IMPL(log_loss_grad, 3, 3, false, 1, 1) {
|
|||
*dLdw = 0.;
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum));
|
||||
|
@ -254,7 +254,7 @@ CUSTOM_OP_IMPL(log_loss_grad, 3, 3, false, 1, 1) {
|
|||
dLdw->assign(E.reduceNumber(reduce::Sum) / numOfNonZeroWeights);
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
*dLdw /= numOfNonZeroWeightsScalar;
|
||||
}
|
||||
else
|
||||
|
|
|
@ -55,9 +55,9 @@ namespace ops {
|
|||
|
||||
NDArray E(labels->getShapeInfo(), block.getWorkspace());
|
||||
if (computeFullLoss)
|
||||
labels->applyPairwiseTransform(pairwise::LogPoissonLossFull, log_predictions, &E, nullptr);
|
||||
labels->applyPairwiseTransform(pairwise::LogPoissonLossFull, *log_predictions, E);
|
||||
else
|
||||
labels->applyPairwiseTransform(pairwise::LogPoissonLoss, log_predictions, &E, nullptr);
|
||||
labels->applyPairwiseTransform(pairwise::LogPoissonLoss, *log_predictions, E);
|
||||
|
||||
|
||||
// multiply E on weights
|
||||
|
@ -176,13 +176,13 @@ namespace ops {
|
|||
|
||||
NDArray E(labels->getShapeInfo(), block.getWorkspace());
|
||||
if (computeFullLoss) {
|
||||
labels->applyPairwiseTransform(pairwise::LogPoissonLossFull, log_predictions, &E, nullptr);
|
||||
labels->applyPairwiseTransform(pairwise::LogPoissonLossFull, *log_predictions, E);
|
||||
|
||||
NDArray rDiv(labels->getShapeInfo(), block.getWorkspace());
|
||||
labels->applyScalar(scalar::ReverseDivide, 0.5f, &rDiv);
|
||||
labels->applyScalar(scalar::ReverseDivide, 0.5f, rDiv);
|
||||
dLdl->assign(rDiv + labels->transform(transform::Log) + -(*log_predictions));
|
||||
} else {
|
||||
labels->applyPairwiseTransform(pairwise::LogPoissonLoss, log_predictions, &E, nullptr);
|
||||
labels->applyPairwiseTransform(pairwise::LogPoissonLoss, *log_predictions, E);
|
||||
|
||||
dLdl->assign(-(*log_predictions));
|
||||
}
|
||||
|
@ -200,7 +200,7 @@ namespace ops {
|
|||
dLdw->assign(E.reduceNumber(reduce::Sum));
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign(E);
|
||||
|
@ -228,7 +228,7 @@ namespace ops {
|
|||
*dLdw = 0.;
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum));
|
||||
|
@ -257,7 +257,7 @@ namespace ops {
|
|||
dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights));
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
*dLdw /= numOfNonZeroWeightsScalar;
|
||||
}
|
||||
else
|
||||
|
|
|
@ -112,10 +112,10 @@ namespace nd4j {
|
|||
auto n = double(labels->sizeAt(1));
|
||||
auto diffs = *predictions - *labels;
|
||||
|
||||
auto sumOfSquares = (diffs * diffs).reduceAlongDims(reduce::Sum, reductionIdx, true);
|
||||
auto sumOfSquares = (diffs * diffs).reduceAlongDimension(reduce::Sum, reductionIdx, true);
|
||||
|
||||
auto squareOfSum = diffs.reduceAlongDims(reduce::Sum, reductionIdx, true);
|
||||
squareOfSum.applyScalar(scalar::Pow, 2);
|
||||
auto squareOfSum = diffs.reduceAlongDimension(reduce::Sum, reductionIdx, true);
|
||||
squareOfSum.applyScalar(scalar::Pow, 2, squareOfSum);
|
||||
|
||||
|
||||
auto E = ((sumOfSquares * n) - squareOfSum) * (4/(n*(n-1)));
|
||||
|
@ -240,15 +240,15 @@ namespace nd4j {
|
|||
auto diffs = *predictions - *labels;
|
||||
|
||||
std::vector<int> reductionIdx = ShapeUtils::evalDimsToExclude(labels->rankOf(), {0});
|
||||
auto sumOfSquares = (diffs * diffs).reduceAlongDims(reduce::Sum, reductionIdx, true);
|
||||
auto sumOfSquares = (diffs * diffs).reduceAlongDimension(reduce::Sum, reductionIdx, true);
|
||||
|
||||
auto squareOfSum = diffs.reduceAlongDims(reduce::Sum, reductionIdx, true);
|
||||
squareOfSum.applyScalar(scalar::Pow, 2);
|
||||
auto squareOfSum = diffs.reduceAlongDimension(reduce::Sum, reductionIdx, true);
|
||||
squareOfSum.applyScalar(scalar::Pow, 2, squareOfSum);
|
||||
|
||||
auto E = ((sumOfSquares * n) - squareOfSum) * (4/(n*(n-1)));
|
||||
|
||||
auto sumPred = predictions->reduceAlongDims(reduce::Sum, reductionIdx, true);
|
||||
auto sumLabel = labels->reduceAlongDims(reduce::Sum, reductionIdx, true);
|
||||
auto sumPred = predictions->reduceAlongDimension(reduce::Sum, reductionIdx, true);
|
||||
auto sumLabel = labels->reduceAlongDimension(reduce::Sum, reductionIdx, true);
|
||||
|
||||
dLdp->assign(((diffs * n) - sumPred + sumLabel)*(8/(n*(n-1))));
|
||||
|
||||
|
@ -273,7 +273,7 @@ namespace nd4j {
|
|||
dLdw->assign(E.reduceNumber(reduce::Sum));
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign(E);
|
||||
|
@ -299,7 +299,7 @@ namespace nd4j {
|
|||
*dLdw = 0.;
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum));
|
||||
|
@ -327,7 +327,7 @@ namespace nd4j {
|
|||
dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights));
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
*dLdw /= numOfNonZeroWeightsScalar;
|
||||
}
|
||||
else
|
||||
|
|
|
@ -51,7 +51,7 @@ CUSTOM_OP_IMPL(mean_sqerr_loss, 3, 1, false, 0, 1) {
|
|||
weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo()));
|
||||
|
||||
NDArray E(labels->getShapeInfo(), false, block.launchContext());
|
||||
predictions->applyPairwiseTransform(pairwise::SquaredSubtract, labels, &E, nullptr);
|
||||
predictions->applyPairwiseTransform(pairwise::SquaredSubtract, *labels, E);
|
||||
|
||||
// multiply E on weights
|
||||
E *= (*weightsBroad);
|
||||
|
@ -191,7 +191,7 @@ CUSTOM_OP_IMPL(mean_sqerr_loss_grad, 3, 3, false, 0, 1) {
|
|||
dLdw->assign(E.reduceNumber(reduce::Sum));
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign(E);
|
||||
|
@ -217,7 +217,7 @@ CUSTOM_OP_IMPL(mean_sqerr_loss_grad, 3, 3, false, 0, 1) {
|
|||
*dLdw = 0.;
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum));
|
||||
|
@ -245,7 +245,7 @@ CUSTOM_OP_IMPL(mean_sqerr_loss_grad, 3, 3, false, 0, 1) {
|
|||
dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights));
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
*dLdw /= numOfNonZeroWeightsScalar;
|
||||
}
|
||||
else
|
||||
|
|
|
@ -56,7 +56,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss, 3, 1, false, 1, 1) {
|
|||
auto newLabels = labels;
|
||||
if(labelsSmoothing != 0.) {
|
||||
newLabels = new NDArray(*labels);
|
||||
newLabels->applyScalar(scalar::SXELogitsSmoother, labelsSmoothing, newLabels, nullptr);
|
||||
newLabels->applyScalar(scalar::SXELogitsSmoother, labelsSmoothing, *newLabels);
|
||||
}
|
||||
|
||||
NDArray E(labels, false, block.launchContext());
|
||||
|
@ -186,7 +186,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss_grad, 3, 3, false, 1, 1) {
|
|||
auto newLabels = labels;
|
||||
if(labelsSmoothing.e<float>(0) != 0.f) {
|
||||
newLabels = new NDArray(*labels);
|
||||
newLabels->applyScalar(scalar::SXELogitsSmoother, labelsSmoothing.e<float>(0), newLabels, nullptr);
|
||||
newLabels->applyScalar(scalar::SXELogitsSmoother, labelsSmoothing.e<float>(0), *newLabels);
|
||||
}
|
||||
|
||||
NDArray E(labels, false, block.launchContext());
|
||||
|
@ -211,7 +211,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss_grad, 3, 3, false, 1, 1) {
|
|||
dLdw->assign(E.reduceNumber(reduce::Sum));
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign(E);
|
||||
|
@ -239,7 +239,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss_grad, 3, 3, false, 1, 1) {
|
|||
*dLdw = 0.;
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum * sum));
|
||||
|
@ -267,7 +267,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss_grad, 3, 3, false, 1, 1) {
|
|||
dLdw->assign(E.reduceNumber(reduce::Sum) / numOfNonZeroWeightsScalar);
|
||||
else if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
*dLdw /= numOfNonZeroWeightsScalar;
|
||||
}
|
||||
else
|
||||
|
|
|
@ -54,11 +54,11 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss, 3, 1, false, 1, 1) {
|
|||
|
||||
// If label_smoothing is nonzero, smooth the labels towards 1/num_classes: new_onehot_labels = onehot_labels * (1 - label_smoothing) + label_smoothing / num_classes
|
||||
// num_classes = labels->sizeAt(1)
|
||||
auto cLabels = labels->cast(weights->dataType());
|
||||
auto newLabels = cLabels;
|
||||
NDArray* cLabels = new NDArray(labels->cast(weights->dataType()));
|
||||
NDArray* newLabels = cLabels;
|
||||
if(labelsSmoothing != 0.) {
|
||||
newLabels = new NDArray(cLabels);
|
||||
*newLabels = (1.f - labelsSmoothing) * *cLabels + labelsSmoothing / cLabels->sizeAt(1);
|
||||
newLabels->assign((1.f - labelsSmoothing) * *cLabels + labelsSmoothing / cLabels->sizeAt(1));
|
||||
}
|
||||
|
||||
// main formula: result = - sum_i(lables_i * log(softmax_i)) - sum over last dimension
|
||||
|
@ -70,9 +70,9 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss, 3, 1, false, 1, 1) {
|
|||
|
||||
|
||||
std::vector<int> dimensions = {-1};
|
||||
NDArray shiftedLogits = *logits - logits->reduceAlongDims(reduce::Max, dimensions, true);
|
||||
NDArray logSumExp = shiftedLogits.transform(transform::Exp).reduceAlongDims(reduce::Sum, dimensions, true).transform(transform::Log);
|
||||
NDArray E = (*newLabels * (logSumExp - shiftedLogits)).reduceAlongDims(reduce::Sum, dimensions);
|
||||
NDArray shiftedLogits = *logits - logits->reduceAlongDimension(reduce::Max, dimensions, true);
|
||||
NDArray logSumExp = shiftedLogits.transform(transform::Exp).reduceAlongDimension(reduce::Sum, dimensions, true).transform(transform::Log);
|
||||
NDArray E = (*newLabels * (logSumExp - shiftedLogits)).reduceAlongDimension(reduce::Sum, dimensions);
|
||||
|
||||
// perform weights broadcasting/tile to E if it is necessary
|
||||
auto weightsBroad = weights;
|
||||
|
@ -217,25 +217,25 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) {
|
|||
|
||||
// If label_smoothing is nonzero, smooth the labels towards 1/num_classes: new_onehot_labels = onehot_labels * (1 - label_smoothing) + label_smoothing / num_classes
|
||||
// num_classes = labels->sizeAt(1)
|
||||
auto cLabels = labels->cast(weights->dataType());
|
||||
auto newLabels = cLabels;
|
||||
NDArray* cLabels = new NDArray(labels->cast(weights->dataType()));
|
||||
NDArray* newLabels = cLabels;
|
||||
if(labelsSmoothing != 0.) {
|
||||
newLabels = new NDArray(labels->getShapeInfo(), dLdl->dataType(), false, block.launchContext());
|
||||
newLabels->assign((1.f - labelsSmoothing) * *cLabels + labelsSmoothing / cLabels->sizeAt(1));
|
||||
}
|
||||
|
||||
NDArray softmax = (*logits - logits->reduceAlongDims(reduce::Max, dimensions, true)).transform(transform::Exp);
|
||||
softmax /= softmax.reduceAlongDims(reduce::Sum, dimensions, true);
|
||||
NDArray softmax = (*logits - logits->reduceAlongDimension(reduce::Max, dimensions, true)).transform(transform::Exp);
|
||||
softmax /= softmax.reduceAlongDimension(reduce::Sum, dimensions, true);
|
||||
|
||||
// dEdp = softmax * sum_i(lables_i) - labels
|
||||
dLdp->assign(softmax * newLabels->reduceAlongDims(reduce::Sum, dimensions, true) - *newLabels);
|
||||
dLdp->assign(softmax * newLabels->reduceAlongDimension(reduce::Sum, dimensions, true) - *newLabels);
|
||||
|
||||
// dEdl = -log(softmax)
|
||||
dLdl->assign(-softmax.transform(transform::Log)* (1.f - labelsSmoothing));
|
||||
|
||||
NDArray shiftedLogits = *logits - logits->reduceAlongDims(reduce::Max, dimensions, true);
|
||||
NDArray logSumExp = shiftedLogits.transform(transform::Exp).reduceAlongDims(reduce::Sum, dimensions, true).transform(transform::Log);
|
||||
NDArray E = (*newLabels * (logSumExp - shiftedLogits)).reduceAlongDims(reduce::Sum, dimensions);
|
||||
NDArray shiftedLogits = *logits - logits->reduceAlongDimension(reduce::Max, dimensions, true);
|
||||
NDArray logSumExp = shiftedLogits.transform(transform::Exp).reduceAlongDimension(reduce::Sum, dimensions, true).transform(transform::Log);
|
||||
NDArray E = (*newLabels * (logSumExp - shiftedLogits)).reduceAlongDimension(reduce::Sum, dimensions);
|
||||
|
||||
// perform weights broadcasting/tile to E if it is necessary
|
||||
auto weightsBroad = weights;
|
||||
|
@ -253,12 +253,12 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) {
|
|||
*dLdl *= *weights;
|
||||
}
|
||||
else {
|
||||
dLdp->applyBroadcast(nd4j::broadcast::Multiply, dimensions, weightsBroad);
|
||||
dLdl->applyBroadcast(nd4j::broadcast::Multiply, dimensions, weightsBroad);
|
||||
dLdp->applyBroadcast(nd4j::broadcast::Multiply, dimensions, *weightsBroad, *dLdp);
|
||||
dLdl->applyBroadcast(nd4j::broadcast::Multiply, dimensions, *weightsBroad, *dLdl);
|
||||
|
||||
if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign(E);
|
||||
|
@ -289,12 +289,12 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) {
|
|||
else {
|
||||
|
||||
NDArray temp = *weightsBroad / sum;
|
||||
dLdp->applyBroadcast(nd4j::broadcast::Multiply, dimensions, &temp);
|
||||
dLdl->applyBroadcast(nd4j::broadcast::Multiply, dimensions, &temp);
|
||||
dLdp->applyBroadcast(nd4j::broadcast::Multiply, dimensions, temp, *dLdp);
|
||||
dLdl->applyBroadcast(nd4j::broadcast::Multiply, dimensions, temp, *dLdl);
|
||||
|
||||
if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
}
|
||||
else
|
||||
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum));
|
||||
|
@ -326,12 +326,12 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) {
|
|||
}
|
||||
else {
|
||||
NDArray temp = *weightsBroad / numOfNonZeroWeights;
|
||||
dLdp->applyBroadcast(nd4j::broadcast::Multiply, dimensions, &temp);
|
||||
dLdl->applyBroadcast(nd4j::broadcast::Multiply, dimensions, &temp);
|
||||
dLdp->applyBroadcast(nd4j::broadcast::Multiply, dimensions, temp, *dLdp);
|
||||
dLdl->applyBroadcast(nd4j::broadcast::Multiply, dimensions, temp, *dLdl);
|
||||
|
||||
if(weights != weightsBroad) {
|
||||
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
|
||||
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
|
||||
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
|
||||
*dLdw /= numOfNonZeroWeights;
|
||||
}
|
||||
else
|
||||
|
|
|
@ -41,11 +41,11 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_with_logits, 2, 1, false, 0, 0) {
|
|||
|
||||
std::vector<int> dimension = {classesDim};
|
||||
|
||||
auto maxAlongDim = logits->reduceAlongDims(reduce::Max, {classesDim}, true);
|
||||
auto maxAlongDim = logits->reduceAlongDimension(reduce::Max, {classesDim}, true);
|
||||
auto logExp = (*logits - maxAlongDim).transform(transform::Exp);
|
||||
auto logSoftMax = ( logExp / logExp.reduceAlongDims(reduce::Sum, {classesDim}, true) ).transform(transform::Log);
|
||||
auto logSoftMax = ( logExp / logExp.reduceAlongDimension(reduce::Sum, {classesDim}, true) ).transform(transform::Log);
|
||||
|
||||
(-(*labels) * logSoftMax).reduceAlongDimension(reduce::Sum, output, dimension);
|
||||
(-(*labels) * logSoftMax).reduceAlongDimension(reduce::Sum, *output, dimension);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -97,14 +97,14 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_with_logits_grad, 2, 2, false, 0, 0) {
|
|||
|
||||
std::vector<int> dimension = {classesDim};
|
||||
|
||||
NDArray softmax = (*logits - logits->reduceAlongDims(reduce::Max, dimension, true)).transform(transform::Exp);
|
||||
softmax /= softmax.reduceAlongDims(reduce::Sum, dimension, true);
|
||||
NDArray softmax = (*logits - logits->reduceAlongDimension(reduce::Max, dimension, true)).transform(transform::Exp);
|
||||
softmax /= softmax.reduceAlongDimension(reduce::Sum, dimension, true);
|
||||
|
||||
// dEdp = softmax * sum_i(labels_i) - labels
|
||||
dLdp->assign(softmax * labels->reduceAlongDims(reduce::Sum, dimension, true) - *labels);
|
||||
dLdp->assign(softmax * labels->reduceAlongDimension(reduce::Sum, dimension, true) - *labels);
|
||||
|
||||
// dEdl = -log(softmax)
|
||||
(-softmax).applyTransform(transform::Log, dLdl);
|
||||
(-softmax).applyTransform(transform::Log, *dLdl);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -50,9 +50,9 @@ CUSTOM_OP_IMPL(sparse_softmax_cross_entropy_loss_with_logits, 2, 1, false, 0, 0)
|
|||
|
||||
std::vector<int> dimension = {-1};
|
||||
|
||||
auto maxAlongDim = logits->reduceAlongDims(reduce::Max, dimension, true);
|
||||
auto maxAlongDim = logits->reduceAlongDimension(reduce::Max, dimension, true);
|
||||
auto logitsExp = (*logits - maxAlongDim).transform(transform::Exp, nullptr);
|
||||
auto logSoftMax = -(( logitsExp / logitsExp.reduceAlongDims(reduce::Sum, dimension, true) ).transform(transform::Log));
|
||||
auto logSoftMax = -(( logitsExp / logitsExp.reduceAlongDimension(reduce::Sum, dimension, true) ).transform(transform::Log));
|
||||
|
||||
helpers::scatterForLoss(block.launchContext(), *labels, logSoftMax, *output, false);
|
||||
|
||||
|
@ -117,8 +117,8 @@ CUSTOM_OP_IMPL(sparse_softmax_cross_entropy_loss_with_logits_grad, 2, 1, false,
|
|||
|
||||
std::vector<int> dimension = {-1};
|
||||
|
||||
NDArray softmax = (*logits - logits->reduceAlongDims(reduce::Max, dimension, true)).transform(transform::Exp);
|
||||
softmax /= softmax.reduceAlongDims(reduce::Sum, dimension, true);
|
||||
NDArray softmax = (*logits - logits->reduceAlongDimension(reduce::Max, dimension, true)).transform(transform::Exp);
|
||||
softmax /= softmax.reduceAlongDimension(reduce::Sum, dimension, true);
|
||||
|
||||
// dEdp = softmax - 1 (or 0)
|
||||
dLdp->assign(softmax);
|
||||
|
|
|
@ -229,19 +229,19 @@ CUSTOM_OP_IMPL(batchnorm_bp, 4, 3, false, 1, 2) {
|
|||
|
||||
// input - mean
|
||||
NDArray xMinusMean(input); // empty array with same shape as input
|
||||
input->applyBroadcast(nd4j::broadcast::Subtract, axes, mean, &xMinusMean);
|
||||
input->applyBroadcast(nd4j::broadcast::Subtract, axes, *mean, xMinusMean);
|
||||
|
||||
// stdInv
|
||||
NDArray stdInv = *variance + epsilon;
|
||||
stdInv.applyTransform(transform::Reciprocal); // 1 / (variance + epsilon)
|
||||
stdInv.applyTransform(transform::Sqrt); // 1 / (variance + epsilon)^0.5
|
||||
stdInv.applyTransform(transform::Reciprocal, stdInv); // 1 / (variance + epsilon)
|
||||
stdInv.applyTransform(transform::Sqrt, stdInv); // 1 / (variance + epsilon)^0.5
|
||||
|
||||
// dvdm (use dLdM as storage for dvdm)
|
||||
xMinusMean.reduceAlongDimension(nd4j::reduce::Sum, dLdM, excludedAxes, keepUnitiesInShape);
|
||||
xMinusMean.reduceAlongDimension(nd4j::reduce::Sum, *dLdM, excludedAxes, keepUnitiesInShape);
|
||||
*dLdM *= -Ninv;
|
||||
|
||||
// g_sum
|
||||
auto gSum = dLdO->reduceAlongDims(nd4j::reduce::Sum, excludedAxes, keepUnitiesInShape);
|
||||
auto gSum = dLdO->reduceAlongDimension(nd4j::reduce::Sum, excludedAxes, keepUnitiesInShape);
|
||||
|
||||
// dLdB
|
||||
if(applyOffset)
|
||||
|
@ -249,11 +249,11 @@ CUSTOM_OP_IMPL(batchnorm_bp, 4, 3, false, 1, 2) {
|
|||
|
||||
// stdInv * (g - g_sum/N) (use dLdI as storage for this expression)
|
||||
gSum *= Ninv;
|
||||
dLdO->applyBroadcast(nd4j::broadcast::Subtract, axes, &gSum, dLdI);
|
||||
dLdI->applyBroadcast(nd4j::broadcast::Multiply, axes, &stdInv);
|
||||
dLdO->applyBroadcast(nd4j::broadcast::Subtract, axes, gSum, *dLdI);
|
||||
dLdI->applyBroadcast(nd4j::broadcast::Multiply, axes, stdInv, *dLdI);
|
||||
|
||||
// dLdV <- [g*(x - m)]_sum
|
||||
(xMinusMean * *dLdO).reduceAlongDimension(nd4j::reduce::Sum, dLdV, excludedAxes, keepUnitiesInShape);
|
||||
(xMinusMean * *dLdO).reduceAlongDimension(nd4j::reduce::Sum, *dLdV, excludedAxes, keepUnitiesInShape);
|
||||
|
||||
// dLdG
|
||||
*dLdV *= stdInv;
|
||||
|
@ -265,13 +265,13 @@ CUSTOM_OP_IMPL(batchnorm_bp, 4, 3, false, 1, 2) {
|
|||
*dLdV *= -Ninv; // -0.5f * (2 / N);
|
||||
|
||||
// dfdv * (dvdm + (x - m)) (use xMinusMean as storage for this expression)
|
||||
xMinusMean.applyBroadcast(nd4j::broadcast::Add, axes, dLdM);
|
||||
xMinusMean.applyBroadcast(nd4j::broadcast::Multiply, axes, dLdV);
|
||||
xMinusMean.applyBroadcast(nd4j::broadcast::Add, axes, *dLdM, xMinusMean);
|
||||
xMinusMean.applyBroadcast(nd4j::broadcast::Multiply, axes, *dLdV, xMinusMean);
|
||||
|
||||
// dLdI
|
||||
*dLdI += xMinusMean;
|
||||
if(applyScale)
|
||||
dLdI->applyBroadcast(nd4j::broadcast::Multiply, axes, gamma);
|
||||
dLdI->applyBroadcast(nd4j::broadcast::Multiply, axes, *gamma, *dLdI);
|
||||
|
||||
*dLdM = 0; // put zeros so far
|
||||
*dLdV = 0; // put zeros so far
|
||||
|
|
|
@ -240,7 +240,7 @@ CUSTOM_OP_IMPL(conv3dnew_bp, 3, 2, false, 0, 13) {
|
|||
if(gradB) {
|
||||
if(gradB->rankOf() == 2)
|
||||
gradB = new NDArray(gradB->reshape(gradB->ordering(), {(int)gradB->lengthOf()}));
|
||||
gradO->reduceAlongDimension(reduce::Sum, gradB, gradOaxesForDot); // sum over bS oD oH oW
|
||||
gradO->reduceAlongDimension(reduce::Sum, *gradB, gradOaxesForDot); // sum over bS oD oH oW
|
||||
if(gradB != OUTPUT_VARIABLE(2))
|
||||
delete gradB;
|
||||
}
|
||||
|
|
|
@ -234,7 +234,7 @@ CUSTOM_OP_IMPL(deconv2d_bp, 3, 2, false, 0, 9) {
|
|||
if(gradB) {
|
||||
if(gradB->rankOf() == 2)
|
||||
gradB = new NDArray(gradB->reshape(gradB->ordering(), {gradB->lengthOf()}));
|
||||
gradO->reduceAlongDimension(reduce::Sum, gradB, {0, 2, 3}); // sum over bS, oH, oW
|
||||
gradO->reduceAlongDimension(reduce::Sum, *gradB, {0, 2, 3}); // sum over bS, oH, oW
|
||||
if(gradB != OUTPUT_VARIABLE(2))
|
||||
delete gradB;
|
||||
}
|
||||
|
|
|
@ -244,7 +244,7 @@ CUSTOM_OP_IMPL(deconv3d_bp, 3, 2, false, 0, 13) {
|
|||
if(gradB) {
|
||||
if(gradB->rankOf() == 2)
|
||||
gradB = new NDArray(gradB->reshape(gradB->ordering(), {(int)gradB->lengthOf()}));
|
||||
gradO->reduceAlongDimension(reduce::Sum, gradB, {0, 2, 3, 4}); // sum over bS, oD, oH, oW
|
||||
gradO->reduceAlongDimension(reduce::Sum, *gradB, {0, 2, 3, 4}); // sum over bS, oD, oH, oW
|
||||
if(gradB != OUTPUT_VARIABLE(2))
|
||||
delete gradB;
|
||||
}
|
||||
|
|
|
@ -84,7 +84,7 @@ CUSTOM_OP_IMPL(fused_batch_norm, 3, 3, false, 0, 2) {
|
|||
epsilon = 0.001;
|
||||
|
||||
const int restSize = x->lengthOf() / iD;
|
||||
auto xAffected = NDArrayFactory::create(x->ordering(), {restSize, iD}, x->dataType(), block.launchContext());
|
||||
auto xAffected = NDArrayFactory::create(x->ordering(), {restSize, iD}, mean->dataType(), block.launchContext());
|
||||
xAffected.assign(x);
|
||||
|
||||
const int restSizeMinusOne = (restSize > 1) ? (restSize - 1) : 1;
|
||||
|
@ -93,7 +93,7 @@ CUSTOM_OP_IMPL(fused_batch_norm, 3, 3, false, 0, 2) {
|
|||
const double restSizeAdjust = (double)restSize / restSizeMinusOne;
|
||||
|
||||
if(isTraining) {
|
||||
auto sum = xAffected.reduceAlongDims(reduce::Sum, {0});
|
||||
auto sum = xAffected.reduceAlongDimension(reduce::Sum, {0});
|
||||
sum *= restSizeInv;
|
||||
mean->assign(sum);
|
||||
*batchMean = *mean;
|
||||
|
@ -106,8 +106,8 @@ CUSTOM_OP_IMPL(fused_batch_norm, 3, 3, false, 0, 2) {
|
|||
|
||||
if(isTraining) {
|
||||
int power = 2;
|
||||
xAffected.applyScalar(scalar::Pow, power);
|
||||
auto sum = xAffected.reduceAlongDims(reduce::Sum, {0});
|
||||
xAffected.applyScalar(scalar::Pow, power, xAffected);
|
||||
auto sum = xAffected.reduceAlongDimension(reduce::Sum, {0});
|
||||
sum *= restSizeInv;
|
||||
variance->assign(sum);
|
||||
*batchVar = (*variance) * restSizeAdjust;
|
||||
|
|
|
@ -68,7 +68,7 @@ CONFIGURABLE_OP_IMPL(log_softmax_bp, 2, 1, true, 0, 0) {
|
|||
|
||||
helpers::softmax(block.launchContext(), *input, *gradI, dim);
|
||||
|
||||
gradI->assign( *gradO - (*gradI * *gradO).reduceAlongDims(reduce::Sum, {dim}, true) );
|
||||
gradI->assign( *gradO - (*gradI * *gradO).reduceAlongDimension(reduce::Sum, {dim}, true) );
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ namespace nd4j {
|
|||
auto scalar = block.numT() > 0 ? block.getTArguments()->at(0) : 0.0;
|
||||
|
||||
auto xw = result->at(0);
|
||||
xw->applyScalar(nd4j::scalar::RELU, scalar, output);
|
||||
xw->applyScalar(nd4j::scalar::RELU, scalar, *output);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -62,7 +62,7 @@ CONFIGURABLE_OP_IMPL(softmax_bp, 2, 1, true, 0, 0) {
|
|||
|
||||
helpers::softmax(block.launchContext(), *input, *gradI, dim);
|
||||
|
||||
auto sumAlongDim = (*gradI * *gradO).reduceAlongDims(reduce::Sum, {dim}, true);
|
||||
auto sumAlongDim = (*gradI * *gradO).reduceAlongDimension(reduce::Sum, {dim}, true);
|
||||
gradI->assign(*gradI * (*gradO - sumAlongDim));
|
||||
|
||||
return Status::OK();
|
||||
|
|
|
@ -56,7 +56,7 @@ CONFIGURABLE_OP_IMPL(adjust_contrast, 1, 1, true, 0, 0) {
|
|||
axes[i] = i;
|
||||
|
||||
// mean as reduction for last dimension set
|
||||
auto mean = input->reduceAlongDims(reduce::Mean, axes);
|
||||
auto mean = input->reduceAlongDimension(reduce::Mean, axes);
|
||||
|
||||
// this is contrast calculation
|
||||
output->assign((*input - mean) * (*factor) + mean);
|
||||
|
@ -104,13 +104,13 @@ CONFIGURABLE_OP_IMPL(adjust_contrast_v2, 1, 1, true, 0, 0) {
|
|||
std::vector<int> axes({1}); // dim 1 of pseudoresult
|
||||
|
||||
// mean as reduction for last dimension set over size (dim 1) of result3D
|
||||
auto mean = input3D.reduceAlongDims(reduce::Mean, axes);
|
||||
auto mean = input3D.reduceAlongDimension(reduce::Mean, axes);
|
||||
|
||||
// result as (x - mean) * factor + mean
|
||||
auto temp = input3D.ulike();
|
||||
input3D.applyBroadcast(broadcast::Subtract, {0, 2}, &mean, &temp, nullptr);
|
||||
temp.applyScalarArr(scalar::Multiply, factor);
|
||||
temp.applyBroadcast(broadcast::Add, {0, 2}, &mean, &output3D);
|
||||
input3D.applyBroadcast(broadcast::Subtract, {0, 2}, mean, temp);
|
||||
temp.applyScalarArr(scalar::Multiply, *factor, temp);
|
||||
temp.applyBroadcast(broadcast::Add, {0, 2}, mean, output3D);
|
||||
output->assign(output3D);
|
||||
if(block.width() == 1)
|
||||
delete factor;
|
||||
|
|
|
@ -44,11 +44,11 @@ namespace nd4j {
|
|||
auto axisVector = INPUT_VARIABLE(1);
|
||||
helpers::adjustAxis(input->rankOf(), axisVector, axis);
|
||||
|
||||
input->applyIndexReduce(indexreduce::IndexMax, output, axis);
|
||||
input->applyIndexReduce(indexreduce::IndexMax, *output, axis);
|
||||
} else {
|
||||
helpers::adjustAxis(input->rankOf(), axis);
|
||||
|
||||
input->applyIndexReduce(indexreduce::IndexMax, output, axis);
|
||||
input->applyIndexReduce(indexreduce::IndexMax, *output, axis);
|
||||
}
|
||||
|
||||
STORE_RESULT(output);
|
||||
|
|
|
@ -44,11 +44,11 @@ namespace nd4j {
|
|||
auto axisVector = INPUT_VARIABLE(1);
|
||||
helpers::adjustAxis(input->rankOf(), axisVector, axis);
|
||||
|
||||
input->applyIndexReduce(indexreduce::IndexMin, output, axis);
|
||||
input->applyIndexReduce(indexreduce::IndexMin, *output, axis);
|
||||
} else {
|
||||
helpers::adjustAxis(input->rankOf(), axis);
|
||||
|
||||
input->applyIndexReduce(indexreduce::IndexMin, output, axis);
|
||||
input->applyIndexReduce(indexreduce::IndexMin, *output, axis);
|
||||
}
|
||||
|
||||
STORE_RESULT(output);
|
||||
|
|
|
@ -82,7 +82,7 @@ CUSTOM_OP_IMPL(biasadd_bp, 3, 2, false, 0, 0) {
|
|||
|
||||
gradI->assign(gradO);
|
||||
|
||||
gradO->reduceAlongDimension(nd4j::reduce::Sum, gradB, ShapeUtils::evalDimsToExclude(gradO->rankOf(), {channelDim}));
|
||||
gradO->reduceAlongDimension(nd4j::reduce::Sum, *gradB, ShapeUtils::evalDimsToExclude(gradO->rankOf(), {channelDim}));
|
||||
|
||||
return ND4J_STATUS_OK;
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@ CUSTOM_OP_IMPL(embedding_lookup, 2, 1, false, 0, 1) {
|
|||
v = i++;
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultSet> outputView(output->allTensorsAlongDimension(dims));
|
||||
ResultSet outputView = output->allTensorsAlongDimension(dims);
|
||||
REQUIRE_TRUE(block.width() > output->sizeAt(0), 0, "embedding_lookup: input list should be greater then %i, but %i given.",
|
||||
output->sizeAt(0), block.width()
|
||||
);
|
||||
|
@ -53,7 +53,7 @@ CUSTOM_OP_IMPL(embedding_lookup, 2, 1, false, 0, 1) {
|
|||
Nd4jLong thisIndex = (*indeces).e<Nd4jLong>(e);
|
||||
input = INPUT_VARIABLE(thisIndex); // lookup param
|
||||
|
||||
outputView->at(e)->assign(input);
|
||||
outputView.at(e)->assign(input);
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
|
|
@ -49,8 +49,8 @@ namespace nd4j {
|
|||
}
|
||||
|
||||
std::vector<int>& dims = axis;
|
||||
input->varianceAlongDimension(variance::SummaryStatsVariance, variances, false, axis);
|
||||
input->reduceAlongDimension(reduce::Mean, means, axis, keepDims);
|
||||
input->varianceAlongDimension(variance::SummaryStatsVariance, *variances, false, axis);
|
||||
input->reduceAlongDimension(reduce::Mean, *means, axis, keepDims);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -52,31 +52,31 @@ namespace nd4j {
|
|||
case 0: {
|
||||
REQUIRE_TRUE(dims.size() == 2 || (input->rankOf() == 2 && dims.size() == 0), 0, "Norm: Frobenius is defined for 2D matrices or TADS only");
|
||||
// fro
|
||||
input->reduceAlongDimension(reduce::NormFrobenius, output, dims, false, output->rankOf() == 2);
|
||||
input->reduceAlongDimension(reduce::NormFrobenius, *output, dims, false, output->rankOf() == 2);
|
||||
}
|
||||
break;
|
||||
case 1: {
|
||||
// euclidean
|
||||
if ((input->rankOf() == 2 && dims.size() == 0) || dims.size() == 2) {
|
||||
input->reduceAlongDimension(reduce::NormFrobenius, output, dims, false, output->rankOf() == 2);
|
||||
input->reduceAlongDimension(reduce::NormFrobenius, *output, dims, false, output->rankOf() == 2);
|
||||
} else {
|
||||
input->reduceAlongDimension(reduce::Norm2, output, dims, false, output->rankOf() == 2);
|
||||
input->reduceAlongDimension(reduce::Norm2, *output, dims, false, output->rankOf() == 2);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 2: {
|
||||
// 1
|
||||
input->reduceAlongDimension(reduce::Norm1, output, dims, false, output->rankOf() == 2);
|
||||
input->reduceAlongDimension(reduce::Norm1, *output, dims, false, output->rankOf() == 2);
|
||||
}
|
||||
break;
|
||||
case 3: {
|
||||
// 2
|
||||
input->reduceAlongDimension(reduce::Norm2, output, dims, false, output->rankOf() == 2);
|
||||
input->reduceAlongDimension(reduce::Norm2, *output, dims, false, output->rankOf() == 2);
|
||||
}
|
||||
break;
|
||||
case 4: {
|
||||
// inf-norm
|
||||
input->reduceAlongDimension(reduce::NormMax, output, dims, false, output->rankOf() == 2);
|
||||
input->reduceAlongDimension(reduce::NormMax, *output, dims, false, output->rankOf() == 2);
|
||||
}
|
||||
break;
|
||||
default: {
|
||||
|
@ -84,7 +84,7 @@ namespace nd4j {
|
|||
REQUIRE_TRUE(block.getIArguments()->size() > 1, 0, "P-Norm reductions requires 2 TArguments, but only 1 was provided");
|
||||
// FIXME: p is required here
|
||||
//T p = T_ARG(1);
|
||||
input->reduceAlongDimension(reduce::NormP, output, dims, false, output->rankOf() == 2);
|
||||
input->reduceAlongDimension(reduce::NormP, *output, dims, false, output->rankOf() == 2);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -40,23 +40,20 @@ namespace nd4j {
|
|||
shift.assign(T_ARG(0));
|
||||
}
|
||||
|
||||
means->applyScalarArr(scalar::Divide, counts, resMeans, nullptr);
|
||||
means->applyScalarArr(scalar::Divide, *counts, *resMeans);
|
||||
|
||||
NDArray* squareMeans = resMeans->dup('c');
|
||||
NDArray* tempVariances = resVariances->dup('c');
|
||||
NDArray squareMeans = resMeans->dup('c');
|
||||
NDArray tempVariances = resVariances->dup('c');
|
||||
|
||||
squareMeans->applyTransform(transform::Square, squareMeans, nullptr);
|
||||
variances->applyScalarArr(scalar::Divide, counts, tempVariances, nullptr);
|
||||
// tempVariances->printIndexedBuffer("varianced divided by count");
|
||||
tempVariances->applyPairwiseTransform(pairwise::Subtract, squareMeans, resVariances, nullptr);
|
||||
squareMeans.applyTransform(transform::Square, squareMeans, nullptr);
|
||||
variances->applyScalarArr(scalar::Divide, *counts, tempVariances);
|
||||
// tempVariances.printIndexedBuffer("varianced divided by count");
|
||||
tempVariances.applyPairwiseTransform(pairwise::Subtract, squareMeans, *resVariances);
|
||||
|
||||
if (shift.e<double>(0) != 0) {
|
||||
resMeans->applyScalarArr(scalar::Add, &shift, resMeans, nullptr);
|
||||
resMeans->applyScalarArr(scalar::Add, shift, *resMeans);
|
||||
}
|
||||
|
||||
delete squareMeans;
|
||||
delete tempVariances;
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@ CUSTOM_OP_IMPL(reduce_mean, 1, 1, false, 0, 0) {
|
|||
for(const auto& item : dimensions)
|
||||
REQUIRE_TRUE(item >= -input->rankOf() && item < input->rankOf(), 0, "REDUCE_MEAN OP: the input dimension to reduce along must be in range [-%i, %i), but got %i instead !" , input->rankOf(), input->rankOf(), item);
|
||||
|
||||
input->reduceAlongDimension(reduce::Mean, output, dimensions, keepDims);
|
||||
input->reduceAlongDimension(reduce::Mean, *output, dimensions, keepDims);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ CUSTOM_OP_IMPL(reduce_stdev, 1, 1, false, 0, 0) {
|
|||
for(const auto& item : dimensions)
|
||||
REQUIRE_TRUE(item >= -input->rankOf() && item < input->rankOf(), 0, "REDUCE_STDEV OP: the input dimension to reduce along must be in range [-%i, %i), but got %i instead !" , input->rankOf(), input->rankOf(), item);
|
||||
|
||||
input->varianceAlongDimension(variance::SummaryStatsStandardDeviation, output, biasCorrected, dimensions);
|
||||
input->varianceAlongDimension(variance::SummaryStatsStandardDeviation, *output, biasCorrected, dimensions);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -130,10 +130,10 @@ CUSTOM_OP_IMPL(reduce_stdev_bp, 2, 1, false, 0, 0) {
|
|||
const Nd4jLong N = input->lengthOf() / gradO->lengthOf();
|
||||
const Nd4jLong NminusOne = biasCorrected ? N - 1 : N;
|
||||
|
||||
auto mean = input->reduceAlongDims(reduce::Mean, dimensions, true);
|
||||
auto mean = input->reduceAlongDimension(reduce::Mean, dimensions, true);
|
||||
|
||||
NDArray variance(mean.getShapeInfo(), true, block.launchContext()); // create empty array with shape matching shape of mean array
|
||||
input->varianceAlongDimension(variance::SummaryStatsStandardDeviation, &variance, biasCorrected, dimensions);
|
||||
input->varianceAlongDimension(variance::SummaryStatsStandardDeviation, variance, biasCorrected, dimensions);
|
||||
|
||||
gradI->assign( (*input - mean) / (variance * NminusOne)); // automatic broadcasting happens here
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ CUSTOM_OP_IMPL(reduce_variance, 1, 1, false, 0, 0) {
|
|||
for(const auto& item : dimensions)
|
||||
REQUIRE_TRUE(item >= -input->rankOf() && item < input->rankOf(), 0, "REDUCE_VARIANCE OP: the input dimension to reduce along must be in range [-%i, %i), but got %i instead !" , input->rankOf(), input->rankOf(), item);
|
||||
|
||||
input->varianceAlongDimension(variance::SummaryStatsVariance, output, biasCorrected, dimensions);
|
||||
input->varianceAlongDimension(variance::SummaryStatsVariance, *output, biasCorrected, dimensions);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -129,7 +129,7 @@ CUSTOM_OP_IMPL(reduce_variance_bp, 2, 1, false, 0, 0) {
|
|||
const double factor1 = 2.0 / NminusOne;
|
||||
const double factor2 = 2.0 / (N * NminusOne);
|
||||
|
||||
auto mean = input->reduceAlongDims(reduce::Mean, dimensions, true);
|
||||
auto mean = input->reduceAlongDimension(reduce::Mean, dimensions, true);
|
||||
|
||||
gradI->assign( (*input - mean) * (2.0f / NminusOne)); // automatic broadcasting happens here
|
||||
|
||||
|
|
|
@ -45,9 +45,9 @@ namespace ops {
|
|||
//void* whereMax = (void*)();
|
||||
auto internal = (*input);
|
||||
internal -= maxVals;
|
||||
internal.applyTransform(transform::Exp, nullptr, nullptr);
|
||||
internal.reduceAlongDimension(reduce::Sum, output, axes, keepDims, false); //, (void*)&maxVals);
|
||||
output->applyTransform(transform::Log, nullptr, nullptr);
|
||||
internal.applyTransform(transform::Exp, internal);
|
||||
internal.reduceAlongDimension(reduce::Sum, *output, axes, keepDims, false); //, (void*)&maxVals);
|
||||
output->applyTransform(transform::Log, *output);
|
||||
(*output) += maxVals;
|
||||
return ND4J_STATUS_OK;
|
||||
}
|
||||
|
|
|
@ -52,7 +52,7 @@ CUSTOM_OP_IMPL(reduce_max, 1, 1, false, 0, 0) {
|
|||
else if (block.getTArguments()->size() > 0)
|
||||
keepDims = (bool)T_ARG(0);
|
||||
|
||||
input->reduceAlongDimension(reduce::Max, output, dimensions, keepDims);
|
||||
input->reduceAlongDimension(reduce::Max, *output, dimensions, keepDims);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -122,8 +122,7 @@ CUSTOM_OP_IMPL(reduce_max_bp, 2, 1, false, 0, 0) {
|
|||
else {
|
||||
|
||||
auto indicesArr = input->applyIndexReduce(nd4j::indexreduce::IndexMax, dimensions);
|
||||
helpers::scatterSimple(block.launchContext(), 6, *gradI, *gradO, *indicesArr, ShapeUtils::evalDimsToExclude(gradI->rankOf(), dimensions)); // 6 corresponds to copy operation
|
||||
delete indicesArr;
|
||||
helpers::scatterSimple(block.launchContext(), 6, *gradI, *gradO, indicesArr, ShapeUtils::evalDimsToExclude(gradI->rankOf(), dimensions)); // 6 corresponds to copy operation
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
|
|
|
@ -52,7 +52,7 @@ CUSTOM_OP_IMPL(reduce_min, 1, 1, false, 0, 0) {
|
|||
else if (block.getTArguments()->size() > 0)
|
||||
keepDims = (bool)T_ARG(0);
|
||||
|
||||
input->reduceAlongDimension(reduce::Min, output, dimensions, keepDims);
|
||||
input->reduceAlongDimension(reduce::Min, *output, dimensions, keepDims);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -125,8 +125,7 @@ CUSTOM_OP_IMPL(reduce_min_bp, 2, 1, false, 0, 0) {
|
|||
else {
|
||||
|
||||
auto indicesArr = input->applyIndexReduce(nd4j::indexreduce::IndexMin, dimensions);
|
||||
helpers::scatterSimple(block.launchContext(), 6, *gradI, *gradO, *indicesArr, ShapeUtils::evalDimsToExclude(gradI->rankOf(), dimensions)); // 6 corresponds to copy operation
|
||||
delete indicesArr;
|
||||
helpers::scatterSimple(block.launchContext(), 6, *gradI, *gradO, indicesArr, ShapeUtils::evalDimsToExclude(gradI->rankOf(), dimensions)); // 6 corresponds to copy operation
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue