Shyrma temp (#131)

* - specifying template instantiation for certain types in float16 and bloat16

Signed-off-by: Yurii <iuriish@yahoo.com>

* - polishing bfloat16 and float16 member functions template specialization

Signed-off-by: Yurii <iuriish@yahoo.com>

* - rewrite and overload array +-*/ scalar and scalar +-*/ arr in NDAray class

Signed-off-by: Yurii <iuriish@yahoo.com>

* - make corrections which have to do with and rvalue lvalue conversions

Signed-off-by: Yurii <iuriish@yahoo.com>

* - provide move semantic in NDArray operators array +-/* array

Signed-off-by: Yurii <iuriish@yahoo.com>

* float16/bfloat16 tweaks

Signed-off-by: raver119 <raver119@gmail.com>

* one more tweak

Signed-off-by: raver119 <raver119@gmail.com>

* - make float16 and bfloat16 to compile successfully on cuda

Signed-off-by: Yurii <iuriish@yahoo.com>

* - do not use resources of view-like arrays when move semantics is applied

Signed-off-by: Yurii <iuriish@yahoo.com>

* - get rid of pointers in signatures NDArray methods 1

Signed-off-by: Yurii <iuriish@yahoo.com>

* - correction of signature of NDArray::dup method

Signed-off-by: Yurii <iuriish@yahoo.com>

* - correction of signature of NDArray::reduceAlongDimension method

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::applyIndexReduce and applyTrueBroadcast methods

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::applyReduce3 and varianceAlongDimension methods

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::tensorsAlongDimension and diagonal methods

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::allTensorsAlongDimension

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::reduceAlongDimension 2

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::applyTransform 2

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::applyPairwiseTransform 2

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::applyBroadcast 2

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::applyTrueBroadcast 2

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::applyScalar and applyScalarArr

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::lambda methods

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::reduce3 methods 2

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of following NDArray methods: add/sub/mul/div row/column and fillAsTriangular

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::tileToShape methods

Signed-off-by: Yurii <iuriish@yahoo.com>

* - signature correction of NDArray::isShapeSameStrict method

Signed-off-by: Yurii <iuriish@yahoo.com>

* minor corrections in tests

Signed-off-by: Yurii <iuriish@yahoo.com>

* - replace reduce op in batchnorm mkldnn

Signed-off-by: Yurii <iuriish@yahoo.com>

* - add explicit templates instantiations for operator+(NDArray&&. const scalar)

Signed-off-by: Yurii <iuriish@yahoo.com>

* - corrections of casts in float16/bfloat16

Signed-off-by: Yurii <iuriish@yahoo.com>

* - provide move semantics in following NDArray methods: transform, applyTrueBroadcast, transpose, reshape, permute

Signed-off-by: Yurii <iuriish@yahoo.com>

* - get rid of input array A duplicate in svd cuda op

Signed-off-by: Yurii <iuriish@yahoo.com>

* - avoid available bug in svd cuda API

Signed-off-by: Yurii <iuriish@yahoo.com>

* - add temporary global memory buffer in svd cuda when calcUV = false and  m != n

Signed-off-by: Yurii <iuriish@yahoo.com>

* - remove test with blfoat16 type for betainC

Signed-off-by: Yurii <iuriish@yahoo.com>

* - resolve conflicts after master has been merged in

Signed-off-by: Yurii <iuriish@yahoo.com>

* - changed type of affected input array in fused_batch_norm

Signed-off-by: Yurii <iuriish@yahoo.com>

* - add several explicit type castings

Signed-off-by: Yurii <iuriish@yahoo.com>

* - add ND4J_EXPORT to operators

Signed-off-by: Yurii <iuriish@yahoo.com>

* - add explicit template types in instantiations of template arithm operators of NDArray class

Signed-off-by: Yurii <iuriish@yahoo.com>

* - one more test fix

Signed-off-by: Yurii <iuriish@yahoo.com>

Co-authored-by: raver119 <raver119@gmail.com>
master
Yurii Shyrma 2019-12-20 21:35:39 +02:00 committed by raver119
parent 3e0afadea1
commit 5d9b2a16e5
237 changed files with 5235 additions and 6513 deletions

View File

@ -46,26 +46,53 @@
namespace nd4j {
template <typename T, typename = typename std::enable_if<DataTypeUtils::scalarTypesForNDarray<T>::value>::type>
ND4J_EXPORT NDArray operator+(const NDArray& arr, const T& scalar);
template <typename T, typename = typename std::enable_if<DataTypeUtils::scalarTypesForNDarray<T>::value>::type>
ND4J_EXPORT NDArray operator+(NDArray&& arr, const T& scalar);
template <typename T, typename = typename std::enable_if<DataTypeUtils::scalarTypesForNDarray<T>::value>::type>
ND4J_EXPORT NDArray operator+(const T& scalar, const NDArray& arr);
template <typename T, typename = typename std::enable_if<DataTypeUtils::scalarTypesForNDarray<T>::value>::type>
ND4J_EXPORT NDArray operator+(const T& scalar, NDArray&& arr);
ND4J_EXPORT NDArray operator-(const float&, const NDArray&);
ND4J_EXPORT NDArray operator-(const float16&, const NDArray&);
ND4J_EXPORT NDArray operator-(const double&, const NDArray&);
ND4J_EXPORT NDArray operator-(const int&, const NDArray&);
template <typename T, typename = typename std::enable_if<DataTypeUtils::scalarTypesForNDarray<T>::value>::type>
ND4J_EXPORT NDArray operator-(const NDArray& arr, const T& scalar);
template <typename T, typename = typename std::enable_if<DataTypeUtils::scalarTypesForNDarray<T>::value>::type>
ND4J_EXPORT NDArray operator-(NDArray&& arr, const T& scalar);
template <typename T, typename = typename std::enable_if<DataTypeUtils::scalarTypesForNDarray<T>::value>::type>
ND4J_EXPORT NDArray operator-(const T& scalar, const NDArray& arr);
template <typename T, typename = typename std::enable_if<DataTypeUtils::scalarTypesForNDarray<T>::value>::type>
ND4J_EXPORT NDArray operator-(const T& scalar, NDArray&& arr);
template <typename T, typename = typename std::enable_if<DataTypeUtils::scalarTypesForNDarray<T>::value>::type>
ND4J_EXPORT NDArray operator*(const NDArray& arr, const T& scalar);
template <typename T, typename = typename std::enable_if<DataTypeUtils::scalarTypesForNDarray<T>::value>::type>
ND4J_EXPORT NDArray operator*(NDArray&& arr, const T& scalar);
template <typename T, typename = typename std::enable_if<DataTypeUtils::scalarTypesForNDarray<T>::value>::type>
ND4J_EXPORT NDArray operator*(const T& scalar, const NDArray& arr);
template <typename T, typename = typename std::enable_if<DataTypeUtils::scalarTypesForNDarray<T>::value>::type>
ND4J_EXPORT NDArray operator*(const T& scalar, NDArray&& arr);
template <typename T, typename = typename std::enable_if<DataTypeUtils::scalarTypesForNDarray<T>::value>::type>
ND4J_EXPORT NDArray operator/(const NDArray& arr, const T& scalar);
template <typename T, typename = typename std::enable_if<DataTypeUtils::scalarTypesForNDarray<T>::value>::type>
ND4J_EXPORT NDArray operator/(NDArray&& arr, const T& scalar);
template <typename T, typename = typename std::enable_if<DataTypeUtils::scalarTypesForNDarray<T>::value>::type>
ND4J_EXPORT NDArray operator/(const T& scalar, const NDArray& arr);
template <typename T, typename = typename std::enable_if<DataTypeUtils::scalarTypesForNDarray<T>::value>::type>
ND4J_EXPORT NDArray operator/(const T& scalar, NDArray&& arr);
template <typename T1, typename T2, typename = typename std::enable_if<std::is_same<NDArray, typename std::decay<T1>::type>::value && std::is_same<NDArray, typename std::decay<T2>::type>::value>::type>
ND4J_EXPORT NDArray operator+(T1&& arr1, T2&& arr2);
template <typename T1, typename T2, typename = typename std::enable_if<std::is_same<NDArray, typename std::decay<T1>::type>::value && std::is_same<NDArray, typename std::decay<T2>::type>::value>::type>
ND4J_EXPORT NDArray operator-(T1&& arr1, T2&& arr2);
template <typename T1, typename T2, typename = typename std::enable_if<std::is_same<NDArray, typename std::decay<T1>::type>::value && std::is_same<NDArray, typename std::decay<T2>::type>::value>::type>
ND4J_EXPORT NDArray operator*(T1&& arr1, T2&& arr2);
template <typename T1, typename T2, typename = typename std::enable_if<std::is_same<NDArray, typename std::decay<T1>::type>::value && std::is_same<NDArray, typename std::decay<T2>::type>::value>::type>
ND4J_EXPORT NDArray operator/(T1&& arr1, T2&& arr2);
ND4J_EXPORT NDArray operator+(const float&, const NDArray&);
ND4J_EXPORT NDArray operator+(const float16&, const NDArray&);
ND4J_EXPORT NDArray operator+(const double&, const NDArray&);
ND4J_EXPORT NDArray operator+(const int&, const NDArray&);
ND4J_EXPORT NDArray operator*(const float&, const NDArray&);
ND4J_EXPORT NDArray operator*(const float16&, const NDArray&);
ND4J_EXPORT NDArray operator*(const double&, const NDArray&);
ND4J_EXPORT NDArray operator*(const int&, const NDArray&);
ND4J_EXPORT NDArray operator/(const float&, const NDArray&);
ND4J_EXPORT NDArray operator/(const float16&, const NDArray&);
ND4J_EXPORT NDArray operator/(const double&, const NDArray&);
ND4J_EXPORT NDArray operator/(const int&, const NDArray&);
ND4J_EXPORT NDArray mmul(const NDArray&, const NDArray&);
@ -323,7 +350,7 @@ namespace nd4j {
* axis - axis along which to repeat elements
* repeats - number of repetitions
*/
NDArray* repeat(const int axis, const std::vector<int>& repeats) const;
NDArray repeat(const int axis, const std::vector<int>& repeats) const;
/**
* This method fills this array with zeros
@ -336,15 +363,7 @@ namespace nd4j {
* @param array
* @return
*/
static NDArray quantize(NDArray &array);
/**
* This method returns quantized copy of given array
*
* @param array
* @return
*/
static NDArray* quantize(NDArray *array);
static NDArray quantize(const NDArray &array);
/**
* fill target array by repeating current array
@ -356,19 +375,16 @@ namespace nd4j {
/**
* creates array which points on certain sub-range of this array, sub-range is defined by given indices
*/
NDArray* subarray(IndicesList& indices) const;
NDArray* subarray(const std::initializer_list<NDIndex*>& idx) const;
NDArray* subarray(const Intervals& idx) const;
NDArray subarray(IndicesList& indices) const;
NDArray subarray(const std::initializer_list<NDIndex*>& idx) const;
NDArray subarray(const Intervals& idx) const;
/**
* cast array elements to given dtype
*/
template <typename T>
NDArray* cast();
NDArray cast(DataType dtype) const;
NDArray* cast(DataType dtype) const;
void cast(NDArray* target, DataType dtype);
void cast(NDArray& target, DataType dtype);
/**
* returns _context
@ -455,16 +471,22 @@ namespace nd4j {
/**
* permutes the dimensions in array according to "dimensions" array, new array points on _buffer of this array
*/
NDArray permute(const std::initializer_list<int>& dimensions) const;
NDArray permute(const std::vector<int>& dimensions) const;
NDArray permute(const int* dimensions, const int rank) const;
NDArray permute(const std::initializer_list<int>& dimensions) const &;
NDArray permute(const std::vector<int>& dimensions) const &;
NDArray permute(const int* dimensions, const int rank) const &;
NDArray permute(const std::initializer_list<int>& dimensions) &&;
NDArray permute(const std::vector<int>& dimensions) &&;
NDArray permute(const int* dimensions, const int rank) &&;
void permute(const int* dimensions, const int rank, NDArray& target) const;
void permute(const std::vector<int>& dimensions, NDArray& target) const;
NDArray permute(const std::initializer_list<Nd4jLong>& dimensions) const;
NDArray permute(const std::vector<Nd4jLong>& dimensions) const;
NDArray permute(const Nd4jLong* dimensions, const int rank) const;
NDArray permute(const std::initializer_list<Nd4jLong>& dimensions) const &;
NDArray permute(const std::vector<Nd4jLong>& dimensions) const &;
NDArray permute(const Nd4jLong* dimensions, const int rank) const &;
NDArray permute(const std::initializer_list<Nd4jLong>& dimensions) &&;
NDArray permute(const std::vector<Nd4jLong>& dimensions) &&;
NDArray permute(const Nd4jLong* dimensions, const int rank) &&;
void permute(const Nd4jLong* dimensions, const int rank, NDArray& target) const;
void permute(const std::vector<Nd4jLong>& dimensions, NDArray& target) const;
@ -522,24 +544,13 @@ namespace nd4j {
/**
* this method assigns given value to all elements in array
*/
void assign(const double value, bool allowParallelism = true);
void assign(const float value, bool allowParallelism = true);
void assign(const float16 value, bool allowParallelism = true);
void assign(const bfloat16& value, bool allowParallelism = true);
void assign(const Nd4jLong value, bool allowParallelism = true);
void assign(const int value, bool allowParallelism = true);
void assign(const int16_t value, bool allowParallelism = true);
void assign(const uint8_t value, bool allowParallelism = true);
void assign(const uint16_t value, bool allowParallelism = true);
void assign(const uint32_t value, bool allowParallelism = true);
void assign(const uint64_t value, bool allowParallelism = true);
void assign(const int8_t value, bool allowParallelism = true);
void assign(const bool value, bool allowParallelism = true);
template <typename T, typename = typename std::enable_if<DataTypeUtils::scalarTypesForNDarray<T>::value>::type>
void assign(const T& value, bool allowParallelism = true);
/**
* returns new copy of this array, optionally in different order
*/
NDArray *dup(const char newOrder = 'a') const;
NDArray dup(const char newOrder = 'a') const;
/**
* returns sum of all elements of array
@ -566,21 +577,17 @@ namespace nd4j {
* keepDims - if true then put unities in place of reduced dimensions
*/
NDArray* reduceAlongDimension(nd4j::reduce::FloatOps op, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray* reduceAlongDimension(nd4j::reduce::FloatOps op, const std::initializer_list<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray reduceAlongDims(nd4j::reduce::FloatOps op, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray reduceAlongDimension(nd4j::reduce::FloatOps op, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray reduceAlongDimension(nd4j::reduce::FloatOps op, const std::initializer_list<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray* reduceAlongDimension(nd4j::reduce::SameOps op, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray* reduceAlongDimension(nd4j::reduce::SameOps op, const std::initializer_list<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray reduceAlongDims(nd4j::reduce::SameOps op, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray reduceAlongDimension(nd4j::reduce::SameOps op, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray reduceAlongDimension(nd4j::reduce::SameOps op, const std::initializer_list<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray* reduceAlongDimension(nd4j::reduce::BoolOps op, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray* reduceAlongDimension(nd4j::reduce::BoolOps op, const std::initializer_list<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray reduceAlongDims(nd4j::reduce::BoolOps op, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray reduceAlongDimension(nd4j::reduce::BoolOps op, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray reduceAlongDimension(nd4j::reduce::BoolOps op, const std::initializer_list<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray* reduceAlongDimension(nd4j::reduce::LongOps op, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray* reduceAlongDimension(nd4j::reduce::LongOps op, const std::initializer_list<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray reduceAlongDims(nd4j::reduce::LongOps op, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray reduceAlongDimension(nd4j::reduce::LongOps op, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
NDArray reduceAlongDimension(nd4j::reduce::LongOps op, const std::initializer_list<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false) const;
/**
* method reduces array by excluding its shapes along dimensions present in given dimensions vector
@ -589,10 +596,10 @@ namespace nd4j {
* keepDims - if true then put unities in place of reduced dimensions
* extras - extra parameters
*/
void reduceAlongDimension(nd4j::reduce::FloatOps op, NDArray* target, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false, const bool checkTargetShape = true) const;
void reduceAlongDimension(nd4j::reduce::SameOps op, NDArray* target, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false, const bool checkTargetShape = true) const;
void reduceAlongDimension(nd4j::reduce::BoolOps op, NDArray* target, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false, const bool checkTargetShape = true) const;
void reduceAlongDimension(nd4j::reduce::LongOps op, NDArray* target, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false, const bool checkTargetShape = true) const;
void reduceAlongDimension(nd4j::reduce::FloatOps op, NDArray& target, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false, const bool checkTargetShape = true) const;
void reduceAlongDimension(nd4j::reduce::SameOps op, NDArray& target, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false, const bool checkTargetShape = true) const;
void reduceAlongDimension(nd4j::reduce::BoolOps op, NDArray& target, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false, const bool checkTargetShape = true) const;
void reduceAlongDimension(nd4j::reduce::LongOps op, NDArray& target, const std::vector<int>& dimensions, const bool keepDims = false, const bool supportOldShapes = false, const bool checkTargetShape = true) const;
/**
* return variance of array elements set
@ -631,20 +638,24 @@ namespace nd4j {
void makeBothActual() const { syncToDevice(); syncToHost(); }
void applyTransform(nd4j::transform::FloatOps op, NDArray *target = nullptr, ExtraArguments *extraParams = nullptr);
void applyTransform(nd4j::transform::SameOps op, NDArray *target = nullptr, ExtraArguments *extraParams = nullptr);
void applyTransform(nd4j::transform::AnyOps op, NDArray *target = nullptr, ExtraArguments *extraParams = nullptr);
void applyTransform(nd4j::transform::BoolOps op, NDArray *target = nullptr, ExtraArguments *extraParams = nullptr);
void applyTransform(nd4j::transform::StrictOps op, NDArray *target = nullptr, ExtraArguments *extraParams = nullptr);
void applyTransform(nd4j::transform::FloatOps op, NDArray& target, ExtraArguments *extraParams = nullptr);
void applyTransform(nd4j::transform::SameOps op, NDArray& target, ExtraArguments *extraParams = nullptr);
void applyTransform(nd4j::transform::AnyOps op, NDArray& target, ExtraArguments *extraParams = nullptr);
void applyTransform(nd4j::transform::BoolOps op, NDArray& target, ExtraArguments *extraParams = nullptr);
void applyTransform(nd4j::transform::StrictOps op, NDArray& target, ExtraArguments *extraParams = nullptr);
/**
* apply OpName transformation to this array and store result in new array to be returned
* extraParams - extra parameters for operation
*/
NDArray transform(nd4j::transform::FloatOps op, void *extraParams = nullptr) const;
NDArray transform(nd4j::transform::SameOps op, void *extraParams = nullptr) const;
NDArray transform(nd4j::transform::BoolOps op, void *extraParams = nullptr) const;
NDArray transform(nd4j::transform::StrictOps op, void *extraParams = nullptr) const;
NDArray transform(nd4j::transform::FloatOps op, void *extraParams = nullptr) const &;
NDArray transform(nd4j::transform::SameOps op, void *extraParams = nullptr) const &;
NDArray transform(nd4j::transform::BoolOps op, void *extraParams = nullptr) const &;
NDArray transform(nd4j::transform::StrictOps op, void *extraParams = nullptr) const &;
NDArray transform(nd4j::transform::FloatOps op, void *extraParams = nullptr) &&;
NDArray transform(nd4j::transform::SameOps op, void *extraParams = nullptr) &&;
NDArray transform(nd4j::transform::BoolOps op, void *extraParams = nullptr) &&;
NDArray transform(nd4j::transform::StrictOps op, void *extraParams = nullptr) &&;
/**
* apply pairwise OpName transformation based on "this" and "other" arras elements, store result in this array
@ -659,11 +670,11 @@ namespace nd4j {
* target - where to store result
* extraParams - extra parameters for operation
*/
void applyPairwiseTransform(nd4j::pairwise::Ops op, const NDArray *other, NDArray *target, ExtraArguments *extraParams = nullptr) const;
void applyPairwiseTransform(nd4j::pairwise::Ops op, const NDArray& other, NDArray& target, ExtraArguments *extraParams = nullptr) const;
void applyPairwiseTransform(nd4j::pairwise::BoolOps op, const NDArray *other, NDArray *target, ExtraArguments *extraParams = nullptr) const;
void applyPairwiseTransform(nd4j::pairwise::BoolOps op, const NDArray& other, NDArray& target, ExtraArguments *extraParams = nullptr) const;
void applyPairwiseTransform(nd4j::pairwise::IntOps op, const NDArray *other, NDArray *target, ExtraArguments *extraParams = nullptr) const;
void applyPairwiseTransform(nd4j::pairwise::IntOps op, const NDArray& other, NDArray&target, ExtraArguments *extraParams = nullptr) const;
/**
* apply operation which requires broadcasting, broadcast a smaller array (tad) along bigger one (this)
@ -672,23 +683,23 @@ namespace nd4j {
* target - where to store result
* extraParams - extra parameters for operation
*/
void applyBroadcast(nd4j::broadcast::Ops op, const std::initializer_list<int> dimensions, const NDArray* tad, NDArray* target = nullptr, ExtraArguments* extraArgs = nullptr);
void applyBroadcast(nd4j::broadcast::Ops op, const std::initializer_list<int> dimensions, const NDArray& tad, NDArray& target, ExtraArguments* extraArgs = nullptr);
void applyBroadcast(nd4j::broadcast::Ops op, const std::vector<int> &dimensions, const NDArray *tad, NDArray *target = nullptr, ExtraArguments *extraArgs = nullptr);
void applyBroadcast(nd4j::broadcast::Ops op, const std::vector<int> &dimensions, const NDArray &tad, NDArray &target, ExtraArguments *extraArgs = nullptr);
void applyBroadcast(nd4j::broadcast::BoolOps op, const std::vector<int> &dimensions, const NDArray *tad, NDArray *target = nullptr, ExtraArguments *extraArgs = nullptr);
void applyBroadcast(nd4j::broadcast::IntOps op, const std::vector<int> &dimensions, const NDArray *tad, NDArray *target = nullptr, ExtraArguments *extraArgs = nullptr);
void applyBroadcast(nd4j::broadcast::BoolOps op, const std::vector<int> &dimensions, const NDArray &tad, NDArray &target, ExtraArguments *extraArgs = nullptr);
void applyBroadcast(nd4j::broadcast::IntOps op, const std::vector<int> &dimensions, const NDArray& tad, NDArray &target, ExtraArguments *extraArgs = nullptr);
/**
* apply operation which requires broadcasting, broadcast one tensor along another, also this method checks the possibility of broadcasting
* other - input array
* extraParams - extra parameters for operation
*/
NDArray applyTrueBroadcast(nd4j::BroadcastOpsTuple op, const NDArray& other, ExtraArguments *extraArgs = nullptr) const;
NDArray* applyTrueBroadcast(nd4j::BroadcastOpsTuple op, const NDArray* other, ExtraArguments *extraArgs = nullptr) const;
NDArray applyTrueBroadcast(nd4j::BroadcastOpsTuple op, const NDArray& other, ExtraArguments *extraArgs = nullptr) const &;
NDArray applyTrueBroadcast(nd4j::BroadcastOpsTuple op, NDArray&& other, ExtraArguments *extraArgs = nullptr) const &;
NDArray applyTrueBroadcast(nd4j::BroadcastOpsTuple op, NDArray&& other, ExtraArguments *extraArgs = nullptr) &&;
NDArray applyTrueBroadcast(nd4j::BroadcastOpsTuple op, const NDArray& other, ExtraArguments *extraArgs = nullptr) &&;
/**
* apply operation which requires broadcasting, broadcast one tensor along another, also this method checks the possibility of broadcasting
@ -697,11 +708,11 @@ namespace nd4j {
* checkTargetShape - if true check whether target shape is suitable for broadcasting
* extraParams - extra parameters for operation
*/
void applyTrueBroadcast(nd4j::BroadcastOpsTuple op, const NDArray* other, NDArray* target, const bool checkTargetShape = true, ExtraArguments *extraArgs = nullptr) const;
void applyTrueBroadcast(nd4j::BroadcastOpsTuple op, const NDArray& other, NDArray& target, const bool checkTargetShape = true, ExtraArguments *extraArgs = nullptr) const;
void applyTrueBroadcast(nd4j::BroadcastBoolOpsTuple op, const NDArray* other, NDArray* target, const bool checkTargetShape = true, ExtraArguments *extraArgs = nullptr) const;
void applyTrueBroadcast(nd4j::BroadcastBoolOpsTuple op, const NDArray& other, NDArray& target, const bool checkTargetShape = true, ExtraArguments *extraArgs = nullptr) const;
void applyTrueBroadcast(nd4j::BroadcastIntOpsTuple op, const NDArray* other, NDArray* target, const bool checkTargetShape = true, ExtraArguments *extraArgs = nullptr) const;
void applyTrueBroadcast(nd4j::BroadcastIntOpsTuple op, const NDArray& other, NDArray& target, const bool checkTargetShape = true, ExtraArguments *extraArgs = nullptr) const;
/**
@ -711,13 +722,13 @@ namespace nd4j {
* extraParams - extra parameters for operation
*/
template <typename T>
void applyScalar(nd4j::scalar::Ops op, const T scalar, NDArray* target = nullptr, ExtraArguments *extraParams = nullptr);
void applyScalar(nd4j::scalar::Ops op, const T scalar, NDArray& target, ExtraArguments *extraParams = nullptr);
template <typename T>
void applyScalar(nd4j::scalar::BoolOps op, const T scalar, NDArray* target, ExtraArguments *extraParams = nullptr) const;
void applyScalar(nd4j::scalar::BoolOps op, const T scalar, NDArray& target, ExtraArguments *extraParams = nullptr) const;
template <typename T>
void applyScalar(nd4j::scalar::IntOps op, const T scalar, NDArray* target, ExtraArguments *extraParams = nullptr) const;
void applyScalar(nd4j::scalar::IntOps op, const T scalar, NDArray& target, ExtraArguments *extraParams = nullptr) const;
/**
* apply a scalar operation to an array
@ -725,27 +736,27 @@ namespace nd4j {
* target - where to store result
* extraParams - extra parameters for operation
*/
void applyScalarArr(nd4j::scalar::Ops op, const NDArray* scalar, NDArray* target = nullptr, ExtraArguments *extraParams = nullptr);
void applyScalarArr(nd4j::scalar::Ops op, const NDArray& scalar, NDArray& target, ExtraArguments *extraParams = nullptr);
void applyScalarArr(nd4j::scalar::BoolOps op, const NDArray* scalar, NDArray* target, ExtraArguments *extraParams = nullptr) const;
void applyScalarArr(nd4j::scalar::BoolOps op, const NDArray& scalar, NDArray& target, ExtraArguments *extraParams = nullptr) const;
void applyScalarArr(nd4j::scalar::IntOps op, const NDArray* scalar, NDArray* target, ExtraArguments *extraParams = nullptr) const;
void applyScalarArr(nd4j::scalar::IntOps op, const NDArray& scalar, NDArray& target, ExtraArguments *extraParams = nullptr) const;
#if defined(__CUDABLAS__) //&& defined(BUILD_TESTS)
template <typename Lambda>
FORCEINLINE void applyLambda(Lambda func, NDArray* target = nullptr);
FORCEINLINE void applyLambda(Lambda func, NDArray& target);
template <typename Lambda>
FORCEINLINE void applyPairwiseLambda(const NDArray* other, Lambda func, NDArray* target = nullptr);
FORCEINLINE void applyPairwiseLambda(const NDArray& other, Lambda func, NDArray& target);
template <typename Lambda>
FORCEINLINE void applyIndexedLambda(Lambda func, NDArray* target = nullptr);
FORCEINLINE void applyIndexedLambda(Lambda func, NDArray& target);
template <typename Lambda>
FORCEINLINE void applyIndexedPairwiseLambda(NDArray* other, Lambda func, NDArray* target = nullptr);
FORCEINLINE void applyIndexedPairwiseLambda(NDArray& other, Lambda func, NDArray& target);
template <typename Lambda>
FORCEINLINE void applyTriplewiseLambda(NDArray* second, NDArray *third, Lambda func, NDArray* target = nullptr);
FORCEINLINE void applyTriplewiseLambda(NDArray& second, NDArray& third, Lambda func, NDArray& target);
#else
/**
@ -754,7 +765,7 @@ namespace nd4j {
* target - where to store result
*/
template <typename T>
void applyLambda(const std::function<T(T)>& func, NDArray* target = nullptr);
void applyLambda(const std::function<T(T)>& func, NDArray& target);
/**
* apply pairwise operation "func" to an array
@ -763,16 +774,16 @@ namespace nd4j {
* target - where to store result
*/
template <typename T>
void applyPairwiseLambda(const NDArray* other, const std::function<T(T, T)>& func, NDArray* target = nullptr);
void applyPairwiseLambda(const NDArray& other, const std::function<T(T, T)>& func, NDArray& target);
template <typename T>
void applyIndexedLambda(const std::function<T(Nd4jLong, T)>& func, NDArray* target = nullptr);
void applyIndexedLambda(const std::function<T(Nd4jLong, T)>& func, NDArray& target);
template <typename T>
void applyIndexedPairwiseLambda(NDArray* other, const std::function<T(Nd4jLong, T, T)>& func, NDArray* target = nullptr);
void applyIndexedPairwiseLambda(NDArray& other, const std::function<T(Nd4jLong, T, T)>& func, NDArray& target);
template <typename T>
void applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<T(T, T, T)>& func, NDArray* target = nullptr);
void applyTriplewiseLambda(NDArray& second, NDArray& third, const std::function<T(T, T, T)>& func, NDArray& target);
#endif
/**
@ -780,7 +791,7 @@ namespace nd4j {
* dimensions - vector of dimensions to reduce along
* extraArgs - extra parameters for operation
*/
NDArray* applyIndexReduce(nd4j::indexreduce::Ops op, const std::vector<int>& dimensions, const ExtraArguments *extraParams = nullptr) const;
NDArray applyIndexReduce(nd4j::indexreduce::Ops op, const std::vector<int>& dimensions, const ExtraArguments *extraParams = nullptr) const;
/**
* reduces dimensions in array relying on index operation OpName
@ -788,14 +799,14 @@ namespace nd4j {
* dimensions - vector of dimensions to reduce along
* extraArgs - extra parameters for operation
*/
void applyIndexReduce(nd4j::indexreduce::Ops op, NDArray* target, const std::vector<int>& dimensions, const ExtraArguments *extraParams = nullptr) const;
void applyIndexReduce(nd4j::indexreduce::Ops op, NDArray& target, const std::vector<int>& dimensions, const ExtraArguments *extraParams = nullptr) const;
/**
* apply reduce3 operation OpName to this and other array, return result in new output array
* other - input array
* extraArgs - extra parameters for operation
*/
NDArray* applyReduce3(nd4j::reduce3::Ops op, const NDArray* other, const ExtraArguments* extraParams = nullptr) const;
NDArray applyReduce3(nd4j::reduce3::Ops op, const NDArray& other, const ExtraArguments* extraParams = nullptr) const;
/**
* apply reduce3 operation OpName to this and other array, return result in new output array
@ -803,7 +814,7 @@ namespace nd4j {
* dimensions - vector of dimensions to reduce along (tads not axis)
* extraArgs - extra parameters for operation
*/
NDArray* applyAllReduce3(nd4j::reduce3::Ops op, const NDArray* other, const std::vector<int>& dimensions, const ExtraArguments* extraParams = nullptr) const;
NDArray applyAllReduce3(nd4j::reduce3::Ops op, const NDArray& other, const std::vector<int>& dimensions, const ExtraArguments* extraParams = nullptr) const;
/**
* apply reduce3 (exec) operation OpName to this and other array, return result in new output array
@ -811,30 +822,26 @@ namespace nd4j {
* dimensions - vector of dimensions to reduce along (same as reduceAlongDimension)
* extraArgs - extra parameters for operation
*/
NDArray* applyReduce3(nd4j::reduce3::Ops op, const NDArray* other, const std::vector<int>& dimensions, const ExtraArguments* extraParams = nullptr) const;
NDArray applyReduce3(nd4j::reduce3::Ops op, const NDArray& other, const std::vector<int>& dimensions, const ExtraArguments* extraParams = nullptr) const;
/**
* returns variance along given dimensions
* biasCorrected - if true bias correction will be applied
* dimensions - vector of dimensions to calculate variance along
*/
NDArray* varianceAlongDimension(nd4j::variance::Ops op, const bool biasCorrected, const std::vector<int>& dimensions) const;
NDArray* varianceAlongDimension(nd4j::variance::Ops op, const bool biasCorrected, const std::initializer_list<int>& dimensions) const;
NDArray varianceAlongDimension(nd4j::variance::Ops op, const bool biasCorrected, const std::vector<int>& dimensions) const;
NDArray varianceAlongDimension(nd4j::variance::Ops op, const bool biasCorrected, const std::initializer_list<int>& dimensions) const;
NDArray varianceAlongDims(nd4j::variance::Ops op, const bool biasCorrected, const std::vector<int>& dimensions) const;
NDArray varianceAlongDims(nd4j::variance::Ops op, const bool biasCorrected, const std::initializer_list<int>& dimensions) const;
void varianceAlongDimension(nd4j::variance::Ops op, NDArray* target, const bool biasCorrected, const std::vector<int>& dimensions) const;
void varianceAlongDimension(nd4j::variance::Ops op, NDArray* target, const bool biasCorrected, const std::initializer_list<int>& dimensions) const;
void varianceAlongDimension(nd4j::variance::Ops op, NDArray& target, const bool biasCorrected, const std::vector<int>& dimensions) const;
void varianceAlongDimension(nd4j::variance::Ops op, NDArray& target, const bool biasCorrected, const std::initializer_list<int>& dimensions) const;
#endif
/**
* apply transpose operation to the copy of this array, that is this array remains unaffected
*/
NDArray transpose() const;
NDArray transpose() const &;
NDArray transpose() &&;
/**
* perform transpose operation and store result in target, this array remains unaffected
@ -852,8 +859,8 @@ namespace nd4j {
* index - the number of array to be returned among set of possible arrays
* dimensions - array of dimensions to point on
*/
NDArray* tensorAlongDimension(Nd4jLong index, const std::initializer_list<int>& dimensions) const;
NDArray* tensorAlongDimension(Nd4jLong index, const std::vector<int>& dimensions) const;
NDArray tensorAlongDimension(Nd4jLong index, const std::initializer_list<int>& dimensions) const;
NDArray tensorAlongDimension(Nd4jLong index, const std::vector<int>& dimensions) const;
/**
* returns the number of arrays pointing on specified dimension(s)
@ -874,54 +881,54 @@ namespace nd4j {
* add given row vector to all rows of this array
* row - row vector to add
*/
void addiRowVector(const NDArray *row);
void addiRowVector(const NDArray& row);
/**
* add given row vector to all rows of this array, store result in target
* row - row vector to add
* target - where to store result
*/
void addRowVector(const NDArray *row, NDArray* target) const;
void addRowVector(const NDArray& row, NDArray& target) const;
/**
* subtract given row vector from all rows of this array, store result in target
* row - row vector to subtract
* target - where to store result
*/
void subRowVector(const NDArray *row, NDArray* target) const;
void subRowVector(const NDArray& row, NDArray& target) const;
/**
* multiply all rows of this array on given row vector, store result in target
* row - row vector to multiply on
* target - where to store result
*/
void mulRowVector(const NDArray *row, NDArray* target) const;
void mulRowVector(const NDArray &row, NDArray& target) const;
/**
* divide all rows of this array on given row vector, store result in target
* row - row vector to divide on
* target - where to store result
*/
void divRowVector(const NDArray *row, NDArray* target) const;
void divRowVector(const NDArray &row, NDArray& target) const;
/**
* add given column vector to all columns of this array, store result in target
* column - column vector to add
* target - where to store result
*/
void addColumnVector(const NDArray *column, NDArray* target) const;
void addColumnVector(const NDArray &column, NDArray& target) const;
/**
* add given column vector to all columns of this array, this array becomes affected (in-place operation)
* column - column vector to add
*/
void addiColumnVector(const NDArray *column);
void addiColumnVector(const NDArray &column);
/**
* multiply all columns of this array on given column vector, this array becomes affected (in-place operation)
* column - column vector to multiply on
*/
void muliColumnVector(const NDArray *column);
void muliColumnVector(const NDArray &column);
/**
* returns number of bytes used by _buffer & _shapeInfo
@ -958,7 +965,8 @@ namespace nd4j {
*
* if permute have been applied before or there are weird strides, then new buffer is allocated for new array
*/
NDArray reshape(const char order, const std::vector<Nd4jLong>& shape) const;
NDArray reshape(const char order, const std::vector<Nd4jLong>& shape) const &;
NDArray reshape(const char order, const std::vector<Nd4jLong>& shape) &&;
/**
* calculate strides and set given order
@ -991,12 +999,6 @@ namespace nd4j {
*/
void tile(NDArray& target) const;
/**
* returns an array which is result of broadcasting of this and other arrays
* other - input array
*/
NDArray* broadcast(const NDArray& other);
/**
* check whether array is identity matrix
*/
@ -1007,7 +1009,6 @@ namespace nd4j {
*/
bool isUnitary();
/**
* operator returns subarray with buffer pointing at this->_buffer with offset defined by given intervals
* idx - intervals of indexes which define the subarrays to point on, idx has form {dim0Start,dim0End, dim1Start,dim1End, ....} and length (2 * this->rankOf())
@ -1038,27 +1039,6 @@ namespace nd4j {
*/
void getSubArrShapeAndOffsets(const std::vector<int>& dimsToExclude, Nd4jLong* &subArrShapeInfo, Nd4jLong* &subArrOffsets, bool keepUnitiesInShape = false) const;
/**
* addition operator: array + other
* other - input array to add
*/
NDArray operator+(const NDArray& other) const;
/**
* addition operator: array + scalar
* scalar - input scalar to add
*/
template <typename T>
NDArray operator+(const T& scalar) const;
/**
* friend functions which implement addition operator: scalar + array
* scalar - input scalar to add
*/
//template <typename T>
//friend NDArray nd4j::operator+(const T scalar, const NDArray& arr);
/**
* addition unary operator array += other
* other - input array to add
@ -1077,42 +1057,11 @@ namespace nd4j {
template <typename T>
void operator-=(const T other);
/**
* subtraction operator: array - other
* other - input array to subtract
*/
NDArray operator-(const NDArray& other) const;
/**
* subtraction operator: array - scalar
* scalar - input scalar to subtract
*/
template <typename T>
NDArray operator-(const T& scalar) const;
/**
* negative operator, it changes sign of all array elements on opposite
*/
NDArray operator-() const;
/**
* friend functions which implement subtraction operator: scalar - array
* scalar - input scalar to subtract
*/
//friend NDArray nd4j::operator-(const float scalar, const NDArray& arr);
/**
* pairwise multiplication operator: array * other
* other - input array to multiply on
*/
NDArray operator*(const NDArray& other) const;
/**
* multiplication operator: array * scalar
* scalar - input scalar to multiply on
*/
template <typename T>
NDArray operator*(const T& scalar) const;
NDArray operator-() const &;
NDArray operator-() &&;
/**
* pairwise multiplication unary operator array *= other
@ -1127,19 +1076,6 @@ namespace nd4j {
template <typename T>
void operator*=(const T scalar);
/**
* pairwise division operator: array / other
* other - input array to divide on
*/
NDArray operator/(const NDArray& other) const;
/**
* division operator: array / scalar
* scalar - input scalar to divide each array element on
*/
template <typename T>
NDArray operator/(const T& scalar) const;
/**
* pairwise division unary operator: array /= other
* other - input array to divide on
@ -1180,7 +1116,7 @@ namespace nd4j {
* return vector with buffer which points on corresponding diagonal elements of array
* type - means of vector to be returned: column ('c') or row ('r')
*/
NDArray* diagonal(const char type ) const;
NDArray diagonal(const char type ) const;
/**
* fill target matrix with given value in one or two directions from main diagonal:
@ -1194,7 +1130,7 @@ namespace nd4j {
* target and this array should have same shapes, except when this_rank = 1 (in that case should be target_rank = 2)
*/
template <typename T>
void fillAsTriangular(const float value, int lower, int upper, const char direction = 'b', NDArray* target = nullptr);
void fillAsTriangular(const float value, int lower, int upper, NDArray& target, const char direction = 'b');
/**
* change an array by repeating it the number of times in order to acquire new shape equal to the input shape
@ -1203,15 +1139,15 @@ namespace nd4j {
* target - optional argument, if target != nullptr the resulting array will be placed in target, in opposite case tile operation is done in place
*/
NDArray tileToShape(const Nd4jLong* shapeInfo);
void tileToShape(const std::vector<Nd4jLong>& shape, NDArray* target = nullptr);
void tileToShape(const std::vector<Nd4jLong>& shape, NDArray& target);
#ifndef __JAVACPP_HACK__
void tileToShape(const std::initializer_list<Nd4jLong>& shape, NDArray* target = nullptr);
void tileToShape(const std::initializer_list<Nd4jLong>& shape, NDArray& target);
#endif
template <typename N>
NDArray* asT() const;
NDArray asT() const;
NDArray* asT(DataType dtype) const;
NDArray asT(DataType dtype) const;
void linspace(const double start);
@ -1223,15 +1159,13 @@ namespace nd4j {
*/
double getTrace() const;
ResultSet* multipleTensorsAlongDimension(const std::vector<int>& indices, const std::vector<int>& dimensions) const;
ResultSet multipleTensorsAlongDimension(const std::vector<int>& indices, const std::vector<int>& dimensions) const;
ResultSet* allTensorsAlongDimension(const std::initializer_list<int>& dimensions) const;
ResultSet allTensorsAlongDimension(const std::initializer_list<int>& dimensions) const;
ResultSet* allTensorsAlongDimension(const std::vector<int>& dimensions) const;
ResultSet allTensorsAlongDimension(const std::vector<int>& dimensions) const;
//ResultSet allTensorsAlongDims(const std::vector<int>& dimensions) const;
ResultSet* allExamples()const ;
ResultSet allExamples()const ;
/**
* set _shapeInfo
@ -1356,7 +1290,7 @@ namespace nd4j {
/**
* returns true if these two NDArrays have same rank, dimensions, strides, ews and order
*/
FORCEINLINE bool isSameShapeStrict(const NDArray *other) const;
FORCEINLINE bool isSameShapeStrict(const NDArray& other) const;
/**
* returns true if buffer && shapeInfo were defined (non nullptr)
@ -1439,11 +1373,6 @@ namespace nd4j {
template <typename T>
void pIdx(const Nd4jLong* indices, const T value);
/**
* creates array which points on certain sub-range of this array, sub-range is defined by given indices
*/
NDArray* subarray(IndicesList& indices, std::vector<Nd4jLong>& strides) const;
/**
* returns true if array is 2D
*/
@ -1512,64 +1441,9 @@ namespace nd4j {
*/
bool isS() const;
/**
* inline accessing operator for matrix, i - absolute index
*/
//FORCEINLINE NDArray operator()(const Nd4jLong i) const;
/**
* inline modifying operator for matrix, i - absolute index
*/
//FORCEINLINE NDArray& operator()(const Nd4jLong i);
/**
* inline accessing operator for 2D array, i - row, j - column
*/
//FORCEINLINE NDArray operator()(const Nd4jLong i, const Nd4jLong j) const;
/**
* inline modifying operator for 2D array, i - row, j - column
*/
//FORCEINLINE NDArray& operator()(const Nd4jLong i, const Nd4jLong j);
/**
* inline accessing operator for 3D array, i - height, j - width, k - depth
*/
//FORCEINLINE NDArray operator()(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) const;
/**
* inline modifying operator for 3D array, i - height, j - width, k - depth
*/
//FORCEINLINE NDArray& operator()(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k);
/**
* inline modifying operator for 4D array, i - height, j - width, k - depth
*/
//FORCEINLINE NDArray& operator()(const Nd4jLong t, const Nd4jLong u, const Nd4jLong v, const Nd4jLong w);
/**
* inline accessing operator for 4D array, i - height, j - width, k - depth
*/
//FORCEINLINE NDArray operator()(const Nd4jLong t, const Nd4jLong u, const Nd4jLong v, const Nd4jLong w) const;
/**
* inline modifying operator for ND array
* idx - array with corresponding indexes, for example {2,10,0,5,...,8}, number of indexes should be equal to array rank
*/
//FORCEINLINE NDArray& operator()(const Nd4jLong* idx);
/**
* inline accessing operator for ND array
* idx - array with corresponding indexes, for example {2,10,0,5,...,8}, number of indexes should be equal to array rank
*/
//FORCEINLINE NDArray operator()(const Nd4jLong* idx) const;
template <typename T>
std::vector<T> asVectorT();
FORCEINLINE bool isAttached();
NDArray* detach();
@ -1759,199 +1633,6 @@ namespace nd4j {
return 0 != shape::isScalar(this->_shapeInfo);
}
//////////////////////////////////////////////////////////////////////////
// accessing operator for matrix, i - absolute index
/*
NDArray NDArray::operator()(const Nd4jLong i) const {
if (i >= shape::length(_shapeInfo))
throw std::invalid_argument("NDArray::operator(i): input index is out of array length !");
auto ews = shape::elementWiseStride(_shapeInfo);
char order = ordering();
if(ews == 1 && order == 'c') {
auto cast = reinterpret_cast<int8_t *>(_buffer) + (i * this->sizeOfT());
NDArray result(cast, nd4j::ShapeBuilders::createScalarShapeInfo(this->dataType(), this->getWorkspace()));
return result;
} else if(ews > 1 && order == 'c') {
auto cast = reinterpret_cast<int8_t *>(_buffer) + (i * ews * this->sizeOfT());
NDArray result(cast, nd4j::ShapeBuilders::createScalarShapeInfo(this->dataType(), this->getWorkspace()));
return result;
} else {
Nd4jLong idx[MAX_RANK];
shape::ind2subC(rankOf(), shapeOf(), i, idx);
auto xOffset = shape::getOffset(getShapeInfo(), idx);
auto cast = reinterpret_cast<int8_t *>(_buffer) + (xOffset * this->sizeOfT());
NDArray result(cast, nd4j::ShapeBuilders::createScalarShapeInfo(this->dataType(), this->getWorkspace()));
return result;
}
}
*/
//////////////////////////////////////////////////////////////////////////
// modifying operator for matrix, i - absolute index
/*
NDArray& NDArray::operator()(const Nd4jLong i) {
if (i >= shape::length(_shapeInfo))
throw std::invalid_argument("NDArray::operator(i): input index is out of array length !");
auto ews = shape::elementWiseStride(_shapeInfo);
auto order = ordering();
if(ews == 1 && order == 'c') {
auto cast = reinterpret_cast<int8_t *>(_buffer) + (i * this->sizeOfT());
NDArray result(cast, nd4j::ShapeBuilders::createScalarShapeInfo(this->dataType(), this->getWorkspace()));
// FIXME: bad
return result;
} else if(ews > 1 && order == 'c') {
auto cast = reinterpret_cast<int8_t *>(_buffer) + (i * ews * this->sizeOfT());
NDArray result(cast, nd4j::ShapeBuilders::createScalarShapeInfo(this->dataType(), this->getWorkspace()));
return result;
} else {
Nd4jLong idx[MAX_RANK];
shape::ind2subC(rankOf(), shapeOf(), i, idx);
auto xOffset = shape::getOffset(getShapeInfo(), idx);
auto cast = reinterpret_cast<int8_t *>(_buffer) + (xOffset * this->sizeOfT());
NDArray result(cast, nd4j::ShapeBuilders::createScalarShapeInfo(this->dataType(), this->getWorkspace()));
return result;
}
}*/
//////////////////////////////////////////////////////////////////////////
// accessing operator for 2D matrix, i - row, j - column
/*
NDArray NDArray::operator()(const Nd4jLong i, const Nd4jLong j) const {
if (rankOf() != 2 || i >= shapeOf()[0] || j >= shapeOf()[1])
throw std::invalid_argument("NDArray::operator(i,j): one of input indexes is out of array length or rank!=2 !");
Nd4jLong coords[2] = {i, j};
auto xOffset = shape::getOffset(getShapeInfo(), coords);
// TODO: do we really want a view here?
auto cast = reinterpret_cast<int8_t *>(_buffer) + (xOffset * this->sizeOfT());
NDArray result(cast, nd4j::ShapeBuilders::createScalarShapeInfo(this->dataType(), this->getWorkspace()));
return result;
}
*/
//////////////////////////////////////////////////////////////////////////
// modifying operator for 2D matrix, i - row, j - column
/*
NDArray& NDArray::operator()(const Nd4jLong i, const Nd4jLong j) {
if (rankOf() != 2 || i >= shapeOf()[0] || j >= shapeOf()[1])
throw std::invalid_argument("NDArray::operator(i,j): one of input indexes is out of array length or rank!=2 !");
Nd4jLong coords[2] = {i, j};
auto xOffset = shape::getOffset(getShapeInfo(), coords);
auto cast = reinterpret_cast<int8_t *>(_buffer) + (xOffset * this->sizeOfT());
NDArray result(cast, nd4j::ShapeBuilders::createScalarShapeInfo(this->dataType(), this->getWorkspace()));
//FIXME: bad, will crash!
return result;
}
*/
//////////////////////////////////////////////////////////////////////////
// accessing operator for 3D array, i - row, j - column
/*
NDArray NDArray::operator()(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) const {
if (rankOf() != 3 || i >= shapeOf()[0] || j >= shapeOf()[1] || j >= shapeOf()[2])
throw std::invalid_argument("NDArray::operator(i,j,k): one of input indexes is out of array length or rank!=3 !");
Nd4jLong coords[3] = {i, j, k};
auto xOffset = shape::getOffset(getShapeInfo(), coords);
auto cast = reinterpret_cast<int8_t *>(_buffer) + (xOffset * this->sizeOfT());
NDArray result(cast, nd4j::ShapeBuilders::createScalarShapeInfo(this->dataType(), this->getWorkspace()));
return result;
}
*/
//////////////////////////////////////////////////////////////////////////
// modifying operator for 3D array
/*
NDArray& NDArray::operator()(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) {
if (rankOf() != 3 || i >= shapeOf()[0] || j >= shapeOf()[1] || k >= shapeOf()[2])
throw std::invalid_argument("NDArray::operator(i,j,k): one of input indexes is out of array length or rank!=3 !");
Nd4jLong coords[3] = {i, j, k};
auto xOffset = shape::getOffset(getShapeInfo(), coords);
auto cast = reinterpret_cast<int8_t *>(_buffer) + (xOffset * this->sizeOfT());
NDArray result(cast, nd4j::ShapeBuilders::createScalarShapeInfo(this->dataType(), this->getWorkspace()));
//FIXME: bad, will crash!
return result;
}
*/
/*
NDArray NDArray::operator()(const Nd4jLong t, const Nd4jLong u, const Nd4jLong v, const Nd4jLong w) const {
if (rankOf() != 4 || t >= shapeOf()[0] || u >= shapeOf()[1] || v >= shapeOf()[2] || w >= shapeOf()[3])
throw std::invalid_argument("NDArray::operator(t,u,v,w): one of input indexes is out of array length or rank!=4 !");
Nd4jLong coords[4] = {t, u, v, w};
auto xOffset = shape::getOffset(getShapeInfo(), coords);
auto cast = reinterpret_cast<int8_t *>(_buffer) + (xOffset * this->sizeOfT());
NDArray result(cast, nd4j::ShapeBuilders::createScalarShapeInfo(this->dataType(), this->getWorkspace()));
return result;
}
*/
/*
NDArray& NDArray::operator()(const Nd4jLong t, const Nd4jLong u, const Nd4jLong v, const Nd4jLong w) {
if (rankOf() != 4 || t >= shapeOf()[0] || u >= shapeOf()[1] || v >= shapeOf()[2] || w >= shapeOf()[3])
throw std::invalid_argument("NDArray::operator(t,u,v,w): one of input indexes is out of array length or rank!=4 !");
Nd4jLong coords[4] = {t, u, v, w};
auto xOffset = shape::getOffset(getShapeInfo(), coords);
// FIXME
auto cast = reinterpret_cast<int8_t *>(_buffer) + (xOffset * this->sizeOfT());
NDArray result(cast, nd4j::ShapeBuilders::createScalarShapeInfo(this->dataType(), this->getWorkspace()));
return result;
}
*/
//////////////////////////////////////////////////////////////////////////
/*
NDArray NDArray::operator()(const Nd4jLong* idx) const {
for(int i = 0; i < rankOf(); ++i)
if (idx[i] >= sizeAt(i))
throw std::invalid_argument("NDArray::operator(const Nd4jLong* idx): input index is out of dimension length !");
auto xOffset = shape::getOffset(getShapeInfo(), idx);
auto cast = reinterpret_cast<int8_t *>(_buffer) + (xOffset * this->sizeOfT());
NDArray result(cast, nd4j::ShapeBuilders::createScalarShapeInfo(this->dataType(), this->getWorkspace()));
return result;
}
*/
//////////////////////////////////////////////////////////////////////////
/*
NDArray& NDArray::operator()(const Nd4jLong* idx) {
for(int i = 0; i < rankOf(); ++i)
if (idx[i] >= sizeAt(i))
throw std::invalid_argument("NDArray::operator(const Nd4jLong* idx): input index is out of dimension length !");
auto xOffset = shape::getOffset(getShapeInfo(), idx);
auto cast = reinterpret_cast<int8_t *>(_buffer) + (xOffset * this->sizeOfT());
NDArray result(cast, nd4j::ShapeBuilders::createScalarShapeInfo(this->dataType(), this->getWorkspace()));
// FIXME
return result;
}
*/
//////////////////////////////////////////////////////////////////////////
Nd4jLong FORCEINLINE NDArray::memoryFootprint() {
@ -2009,8 +1690,8 @@ bool NDArray::areSameShapeAndType(const NDArray& other) const {
// returns true if these two NDArrays have same _shapeInfo
// still the definition of inline function must be in header file
bool NDArray::isSameShapeStrict(const NDArray *other) const {
return shape::equalsStrict(_shapeInfo, other->_shapeInfo);
bool NDArray::isSameShapeStrict(const NDArray& other) const {
return shape::equalsStrict(_shapeInfo, other._shapeInfo);
}
//////////////////////////////////////////////////////////////////////////

File diff suppressed because it is too large Load Diff

View File

@ -133,7 +133,7 @@ namespace graph {
if (variableSpace->hasVariable(v->getName())) {
// symbolic feeder
auto array = variableSpace->getVariable(v->getName())->getNDArray();
auto vr = array->dup();
auto vr = new NDArray(array->dup());
// deletables.push_back(vr);
v->setNDArray(vr);
} else {
@ -145,7 +145,7 @@ namespace graph {
// if we're not using symbolic lookup - we'll use sequential approach then
auto p = node->input()->at(cnt);
auto array = variableSpace->getVariable(p)->getNDArray();
auto vr = array->dup();
auto vr = new NDArray(array->dup());
//deletables.push_back(vr);
v->setNDArray(vr);
}

View File

@ -71,44 +71,41 @@ void NDArray::makeBothBuffersActual() const { }
////////////////////////////////////////////////////////////////////////
template <typename T>
void NDArray::fillAsTriangular(const float val, int lower, int upper, const char direction, NDArray* target) {
void NDArray::fillAsTriangular(const float val, int lower, int upper, NDArray& target, const char direction) {
if (isS())
throw std::runtime_error("NDArray::fillArrayAsTriangular: you can't use this method on String array!");
if(target == nullptr)
target = this;
if(!isSameShape(target) && !(rankOf() == 1 && target->rankOf() == 2 && sizeAt(0) == target->sizeAt(0) && sizeAt(0) == target->sizeAt(1)))
if(!isSameShape(target) && !(rankOf() == 1 && target.rankOf() == 2 && sizeAt(0) == target.sizeAt(0) && sizeAt(0) == target.sizeAt(1)))
throw std::string("NDArray::fillArrayAsTriangular method: wrong shape of target array !");
if (direction == 'u')
lower = -target->sizeAt(-2);
lower = -target.sizeAt(-2);
else if (direction == 'l')
upper = target->sizeAt(-1);
upper = target.sizeAt(-1);
const T value = static_cast<T>(val);
const auto x = reinterpret_cast<const T*>(getBuffer());
auto z = reinterpret_cast<T*>(target->getBuffer());
auto z = reinterpret_cast<T*>(target.getBuffer());
const int xRank = rankOf();
const int zRank = target->rankOf();
const int zRank = target.rankOf();
const auto zLen = target->lengthOf();
const auto zLen = target.lengthOf();
const bool areSameOffsets = shape::haveSameShapeAndStrides(getShapeInfo(), target->getShapeInfo());
const bool areSameOffsets = shape::haveSameShapeAndStrides(getShapeInfo(), target.getShapeInfo());
auto func = PRAGMA_THREADS_FOR {
Nd4jLong coords[MAX_RANK];
for (auto i = start; i < stop; i += increment) {
shape::index2coords(i, target->getShapeInfo(), coords);
const auto zOffset = shape::getOffset(target->getShapeInfo(), coords);
shape::index2coords(i, target.getShapeInfo(), coords);
const auto zOffset = shape::getOffset(target.getShapeInfo(), coords);
// if( (row + upper < col) || (row + lower > col) )
if ((coords[zRank - 2] + upper < coords[zRank - 1]) || (coords[zRank - 2] + lower > coords[zRank - 1]))
z[zOffset] = value;
else if (this != target) { // when this and target are different arrays
else if (this != &target) { // when this and target are different arrays
if (xRank != zRank)
coords[0] = coords[1];
@ -120,7 +117,7 @@ void NDArray::fillAsTriangular(const float val, int lower, int upper, const char
samediff::Threads::parallel_for(func, 0, zLen);
}
BUILD_SINGLE_TEMPLATE(template void NDArray::fillAsTriangular, (const float val, int lower, int upper, const char direction, NDArray* target), LIBND4J_TYPES);
BUILD_SINGLE_TEMPLATE(template void NDArray::fillAsTriangular, (const float val, int lower, int upper, NDArray& target, const char direction), LIBND4J_TYPES);
////////////////////////////////////////////////////////////////////////
void NDArray::setIdentity() {
@ -405,11 +402,11 @@ static void repeat_(const NDArray& input, NDArray& output, const std::vector<int
//////////////////////////////////////////////////////////////////////////
// create new array by repeating it the number of times given by repeats
NDArray* NDArray::repeat(const int axis, const std::vector<int>& repeats) const {
NDArray NDArray::repeat(const int axis, const std::vector<int>& repeats) const {
auto output = new NDArray('c', ShapeUtils::evalRepeatShape(axis, repeats, *this), dataType(), getContext());
NDArray output('c', ShapeUtils::evalRepeatShape(axis, repeats, *this), dataType(), getContext());
BUILD_SINGLE_SELECTOR_TWICE(dataType(), repeat_, (*this, *output, repeats, axis), LIBND4J_TYPES);
BUILD_SINGLE_SELECTOR_TWICE(dataType(), repeat_, (*this, output, repeats, axis), LIBND4J_TYPES);
return output;
}

View File

@ -2,35 +2,24 @@
template<typename T>
void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<T(T, T, T)>& func, NDArray* target) {
if (target == nullptr)
target = this;
void NDArray::applyTriplewiseLambda(NDArray& second, NDArray& third, const std::function<T(T, T, T)>& func, NDArray& target) {
if (second == nullptr) {
nd4j_printf("applyTriplewiseLambda requires three operands to be valid NDArrays, but Second is NULL\n","");
throw std::runtime_error("second is null");
}
if (third == nullptr) {
nd4j_printf("applyTriplewiseLambda requires three operands to be valid NDArrays, but Third is NULL\n","");
throw std::runtime_error("third is null");
}
if(dataType() != DataTypeUtils::fromT<T>())
throw std::runtime_error("NDArray::applyTriplewiseLambda<T> method: wrong template parameter T, its type should be the same as type of this array!");
if(dataType() != second->dataType() || dataType() != third->dataType() || dataType() != target->dataType())
if(dataType() != second.dataType() || dataType() != third.dataType() || dataType() != target.dataType())
throw std::runtime_error("NDArray::applyTriplewiseLambda<T> method: bother four arrays (this, second, third, target) should have the same type !");
if (this->lengthOf() != second->lengthOf() || this->lengthOf() != third->lengthOf() || !this->isSameShape(second) || !this->isSameShape(third)) {
if (this->lengthOf() != second.lengthOf() || this->lengthOf() != third.lengthOf() || !this->isSameShape(second) || !this->isSameShape(third)) {
nd4j_printf("applyPairwiseLambda requires both operands to have the same shape\n","");
throw std::runtime_error("Shapes mismach");
}
auto f = this->bufferAsT<T>();
auto s = second->bufferAsT<T>();
auto t = third->bufferAsT<T>();
auto z = target->bufferAsT<T>();
auto s = second.bufferAsT<T>();
auto t = third.bufferAsT<T>();
auto z = target.bufferAsT<T>();
if (this->ordering() == second->ordering() && this->ordering() == third->ordering() && this->ordering() == target->ordering() && (this->ews() == 1 && target->ews() == 1) && this->ews() == second->ews() && this->ews() == third->ews()) {
if (this->ordering() == second.ordering() && this->ordering() == third.ordering() && this->ordering() == target.ordering() && (this->ews() == 1 && target.ews() == 1) && this->ews() == second.ews() && this->ews() == third.ews()) {
auto loop = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e += increment)
@ -44,8 +33,8 @@ void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::
auto loop = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e += increment) {
auto tOffset = this->getOffset(e);
auto uOffset = second->getOffset(e);
auto vOffset = third->getOffset(e);
auto uOffset = second.getOffset(e);
auto vOffset = third.getOffset(e);
f[tOffset] = func(f[tOffset], s[uOffset], t[vOffset]);
}
@ -57,9 +46,9 @@ void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::
auto loop = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e += increment) {
auto tOffset = this->getOffset(e);
auto uOffset = second->getOffset(e);
auto vOffset = third->getOffset(e);
auto zOffset = target->getOffset(e);
auto uOffset = second.getOffset(e);
auto vOffset = third.getOffset(e);
auto zOffset = target.getOffset(e);
z[zOffset] = func(f[tOffset], s[uOffset], t[vOffset]);
}
@ -69,46 +58,39 @@ void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::
}
}
}
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<double (double, double, double)>& func, NDArray* target);
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<float (float, float, float)>& func, NDArray* target);
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<float16 (float16, float16, float16)>& func, NDArray* target);
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<bfloat16 (bfloat16, bfloat16, bfloat16)>& func, NDArray* target);
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<Nd4jLong (Nd4jLong, Nd4jLong, Nd4jLong)>& func, NDArray* target);
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<int (int, int, int)>& func, NDArray* target);
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<int16_t (int16_t, int16_t, int16_t)>& func, NDArray* target);
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<uint8_t (uint8_t, uint8_t, uint8_t)>& func, NDArray* target);
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<uint16_t (uint16_t, uint16_t, uint16_t)>& func, NDArray* target);
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<uint32_t (uint32_t, uint32_t, uint32_t)>& func, NDArray* target);
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<uint64_t (uint64_t, uint64_t, uint64_t)>& func, NDArray* target);
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<int8_t (int8_t, int8_t, int8_t)>& func, NDArray* target);
template void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::function<bool (bool, bool, bool)>& func, NDArray* target);
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<double (double, double, double)>& func, NDArray& target);
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<float (float, float, float)>& func, NDArray& target);
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<float16 (float16, float16, float16)>& func, NDArray& target);
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<bfloat16 (bfloat16, bfloat16, bfloat16)>& func, NDArray& target);
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<Nd4jLong (Nd4jLong, Nd4jLong, Nd4jLong)>& func, NDArray& target);
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<int (int, int, int)>& func, NDArray& target);
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<int16_t (int16_t, int16_t, int16_t)>& func, NDArray& target);
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<uint8_t (uint8_t, uint8_t, uint8_t)>& func, NDArray& target);
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<uint16_t (uint16_t, uint16_t, uint16_t)>& func, NDArray& target);
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<uint32_t (uint32_t, uint32_t, uint32_t)>& func, NDArray& target);
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<uint64_t (uint64_t, uint64_t, uint64_t)>& func, NDArray& target);
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<int8_t (int8_t, int8_t, int8_t)>& func, NDArray& target);
template void NDArray::applyTriplewiseLambda(NDArray& second, NDArray &third, const std::function<bool (bool, bool, bool)>& func, NDArray& target);
//////////////////////////////////////////////////////////////////////////
template<typename T>
void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<T(T, T)>& func, NDArray* target) {
if (target == nullptr)
target = this;
if (other == nullptr) {
nd4j_printf("applyPairwiseLambda requires both operands to be valid NDArrays, but Y is NULL\n","");
throw std::runtime_error("Other is null");
}
void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<T(T, T)>& func, NDArray& target) {
if(dataType() != DataTypeUtils::fromT<T>())
throw std::runtime_error("NDArray::applyPairwiseLambda<T> method: wrong template parameter T, its type should be the same as type of this array!");
if(dataType() != other->dataType() || dataType() != target->dataType())
if(dataType() != other.dataType() || dataType() != target.dataType())
throw std::runtime_error("NDArray::applyPairwiseLambda<T> method: all three arrays (this, other, target) must have the same type !");
if (this->lengthOf() != other->lengthOf()) {
if (this->lengthOf() != other.lengthOf()) {
nd4j_printf("applyPairwiseLambda requires both operands to have the same shape\n","");
throw std::runtime_error("Shapes mismach");
}
auto f = this->bufferAsT<T>();
auto s = other->bufferAsT<T>();
auto z = target->bufferAsT<T>();
auto s = other.bufferAsT<T>();
auto z = target.bufferAsT<T>();
if (this->ordering() == other->ordering() && this->ordering() == target->ordering() && (this->ews() == 1 && target->ews() == 1) && this->ews() == other->ews()) {
if (this->ordering() == other.ordering() && this->ordering() == target.ordering() && (this->ews() == 1 && target.ews() == 1) && this->ews() == other.ews()) {
auto loop = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e += increment)
@ -122,7 +104,7 @@ void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<T(T,
auto loop = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e += increment) {
auto xOffset = this->getOffset(e);
auto yOffset = other->getOffset(e);
auto yOffset = other.getOffset(e);
f[xOffset] = func(f[xOffset], s[yOffset]);
}
@ -134,8 +116,8 @@ void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<T(T,
auto loop = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e += increment) {
auto xOffset = this->getOffset(e);
auto yOffset = other->getOffset(e);
auto zOffset = target->getOffset(e);
auto yOffset = other.getOffset(e);
auto zOffset = target.getOffset(e);
z[zOffset] = func(f[xOffset], s[yOffset]);
}
@ -145,35 +127,33 @@ void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<T(T,
}
}
}
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<double (double, double)>& func, NDArray* target);
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<float (float, float)>& func, NDArray* target);
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<float16 (float16, float16)>& func, NDArray* target);
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<bfloat16 (bfloat16, bfloat16)>& func, NDArray* target);
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<Nd4jLong (Nd4jLong, Nd4jLong)>& func, NDArray* target);
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<int (int, int)>& func, NDArray* target);
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<int16_t (int16_t, int16_t)>& func, NDArray* target);
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<uint8_t (uint8_t, uint8_t)>& func, NDArray* target);
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<uint16_t (uint16_t, uint16_t)>& func, NDArray* target);
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<uint32_t (uint32_t, uint32_t)>& func, NDArray* target);
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<uint64_t (uint64_t, uint64_t)>& func, NDArray* target);
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<int8_t (int8_t, int8_t)>& func, NDArray* target);
template void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<bool (bool, bool)>& func, NDArray* target);
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<double (double, double)>& func, NDArray& target);
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<float (float, float)>& func, NDArray& target);
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<float16 (float16, float16)>& func, NDArray& target);
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<bfloat16 (bfloat16, bfloat16)>& func, NDArray& target);
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<Nd4jLong (Nd4jLong, Nd4jLong)>& func, NDArray& target);
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<int (int, int)>& func, NDArray& target);
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<int16_t (int16_t, int16_t)>& func, NDArray& target);
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<uint8_t (uint8_t, uint8_t)>& func, NDArray& target);
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<uint16_t (uint16_t, uint16_t)>& func, NDArray& target);
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<uint32_t (uint32_t, uint32_t)>& func, NDArray& target);
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<uint64_t (uint64_t, uint64_t)>& func, NDArray& target);
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<int8_t (int8_t, int8_t)>& func, NDArray& target);
template void NDArray::applyPairwiseLambda(const NDArray& other, const std::function<bool (bool, bool)>& func, NDArray& target);
//////////////////////////////////////////////////////////////////////////
template<typename T>
void NDArray::applyLambda(const std::function<T(T)>& func, NDArray* target) {
if (target == nullptr)
target = this;
void NDArray::applyLambda(const std::function<T(T)>& func, NDArray& target) {
if(dataType() != DataTypeUtils::fromT<T>())
throw std::runtime_error("NDArray::applyLambda<T> method: wrong template parameter T, its type should be the same as type of this array!");
if(dataType() != target->dataType())
if(dataType() != target.dataType())
throw std::runtime_error("NDArray::applyLambda<T> method: types of this and target array should match !");
auto f = this->bufferAsT<T>();
auto z = target->bufferAsT<T>();
auto z = target.bufferAsT<T>();
if (this->ordering() == target->ordering() && (this->ews() == 1 && target->ews() == 1)) {
if (this->ordering() == target.ordering() && (this->ews() == 1 && target.ews() == 1)) {
auto loop = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e += increment)
@ -198,7 +178,7 @@ void NDArray::applyLambda(const std::function<T(T)>& func, NDArray* target) {
auto loop = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e += increment) {
auto xOffset = this->getOffset(e);
auto zOffset = target->getOffset(e);
auto zOffset = target.getOffset(e);
z[zOffset] = func(f[xOffset]);
}
@ -208,35 +188,33 @@ void NDArray::applyLambda(const std::function<T(T)>& func, NDArray* target) {
}
}
}
template void NDArray::applyLambda(const std::function<double(double)>& func, NDArray* target);
template void NDArray::applyLambda(const std::function<float(float)>& func, NDArray* target);
template void NDArray::applyLambda(const std::function<float16(float16)>& func, NDArray* target);
template void NDArray::applyLambda(const std::function<bfloat16(bfloat16)>& func, NDArray* target);
template void NDArray::applyLambda(const std::function<Nd4jLong(Nd4jLong)>& func, NDArray* target);
template void NDArray::applyLambda(const std::function<int16_t(int16_t)>& func, NDArray* target);
template void NDArray::applyLambda(const std::function<int32_t(int32_t)>& func, NDArray* target);
template void NDArray::applyLambda(const std::function<uint8_t(uint8_t)>& func, NDArray* target);
template void NDArray::applyLambda(const std::function<uint16_t(uint16_t)>& func, NDArray* target);
template void NDArray::applyLambda(const std::function<uint32_t(uint32_t)>& func, NDArray* target);
template void NDArray::applyLambda(const std::function<uint64_t(uint64_t)>& func, NDArray* target);
template void NDArray::applyLambda(const std::function<int8_t(int8_t)>& func, NDArray* target);
template void NDArray::applyLambda(const std::function<bool(bool)>& func, NDArray* target);
template void NDArray::applyLambda(const std::function<double(double)>& func, NDArray& target);
template void NDArray::applyLambda(const std::function<float(float)>& func, NDArray& target);
template void NDArray::applyLambda(const std::function<float16(float16)>& func, NDArray& target);
template void NDArray::applyLambda(const std::function<bfloat16(bfloat16)>& func, NDArray& target);
template void NDArray::applyLambda(const std::function<Nd4jLong(Nd4jLong)>& func, NDArray& target);
template void NDArray::applyLambda(const std::function<int16_t(int16_t)>& func, NDArray& target);
template void NDArray::applyLambda(const std::function<int32_t(int32_t)>& func, NDArray& target);
template void NDArray::applyLambda(const std::function<uint8_t(uint8_t)>& func, NDArray& target);
template void NDArray::applyLambda(const std::function<uint16_t(uint16_t)>& func, NDArray& target);
template void NDArray::applyLambda(const std::function<uint32_t(uint32_t)>& func, NDArray& target);
template void NDArray::applyLambda(const std::function<uint64_t(uint64_t)>& func, NDArray& target);
template void NDArray::applyLambda(const std::function<int8_t(int8_t)>& func, NDArray& target);
template void NDArray::applyLambda(const std::function<bool(bool)>& func, NDArray& target);
//////////////////////////////////////////////////////////////////////////
template<typename T>
void NDArray::applyIndexedLambda(const std::function<T(Nd4jLong, T)>& func, NDArray* target) {
if (target == nullptr)
target = this;
void NDArray::applyIndexedLambda(const std::function<T(Nd4jLong, T)>& func, NDArray& target) {
if(dataType() != DataTypeUtils::fromT<T>())
throw std::runtime_error("NDArray::applyIndexedLambda<T> method: wrong template parameter T, its type should be the same as type of this array!");
if(dataType() != target->dataType())
if(dataType() != target.dataType())
throw std::runtime_error("NDArray::applyIndexedLambda<T> method: types of this and target array should match !");
auto f = this->bufferAsT<T>();
auto z = target->bufferAsT<T>();
auto z = target.bufferAsT<T>();
if (this->ordering() == target->ordering() && (this->ews() == 1 && target->ews() == 1)) {
if (this->ordering() == target.ordering() && (this->ews() == 1 && target.ews() == 1)) {
auto loop = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e += increment)
@ -261,7 +239,7 @@ void NDArray::applyIndexedLambda(const std::function<T(Nd4jLong, T)>& func, NDAr
auto loop = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e += increment) {
auto xOffset = this->getOffset(e);
auto zOffset = target->getOffset(e);
auto zOffset = target.getOffset(e);
z[zOffset] = func(e, f[xOffset]);
}
@ -271,44 +249,38 @@ void NDArray::applyIndexedLambda(const std::function<T(Nd4jLong, T)>& func, NDAr
}
}
}
template void NDArray::applyIndexedLambda(const std::function<double(Nd4jLong, double)>& func, NDArray* target);
template void NDArray::applyIndexedLambda(const std::function<float(Nd4jLong, float)>& func, NDArray* target);
template void NDArray::applyIndexedLambda(const std::function<float16(Nd4jLong, float16)>& func, NDArray* target);
template void NDArray::applyIndexedLambda(const std::function<bfloat16(Nd4jLong, bfloat16)>& func, NDArray* target);
template void NDArray::applyIndexedLambda(const std::function<Nd4jLong(Nd4jLong, Nd4jLong)>& func, NDArray* target);
template void NDArray::applyIndexedLambda(const std::function<int(Nd4jLong, int)>& func, NDArray* target);
template void NDArray::applyIndexedLambda(const std::function<int16_t(Nd4jLong, int16_t)>& func, NDArray* target);
template void NDArray::applyIndexedLambda(const std::function<uint8_t (Nd4jLong, uint8_t)>& func, NDArray* target);
template void NDArray::applyIndexedLambda(const std::function<uint16_t (Nd4jLong, uint16_t)>& func, NDArray* target);
template void NDArray::applyIndexedLambda(const std::function<uint32_t (Nd4jLong, uint32_t)>& func, NDArray* target);
template void NDArray::applyIndexedLambda(const std::function<uint64_t (Nd4jLong, uint64_t)>& func, NDArray* target);
template void NDArray::applyIndexedLambda(const std::function<int8_t(Nd4jLong, int8_t)>& func, NDArray* target);
template void NDArray::applyIndexedLambda(const std::function<bool(Nd4jLong, bool)>& func, NDArray* target);
template void NDArray::applyIndexedLambda(const std::function<double(Nd4jLong, double)>& func, NDArray& target);
template void NDArray::applyIndexedLambda(const std::function<float(Nd4jLong, float)>& func, NDArray& target);
template void NDArray::applyIndexedLambda(const std::function<float16(Nd4jLong, float16)>& func, NDArray& target);
template void NDArray::applyIndexedLambda(const std::function<bfloat16(Nd4jLong, bfloat16)>& func, NDArray& target);
template void NDArray::applyIndexedLambda(const std::function<Nd4jLong(Nd4jLong, Nd4jLong)>& func, NDArray& target);
template void NDArray::applyIndexedLambda(const std::function<int(Nd4jLong, int)>& func, NDArray& target);
template void NDArray::applyIndexedLambda(const std::function<int16_t(Nd4jLong, int16_t)>& func, NDArray& target);
template void NDArray::applyIndexedLambda(const std::function<uint8_t (Nd4jLong, uint8_t)>& func, NDArray& target);
template void NDArray::applyIndexedLambda(const std::function<uint16_t (Nd4jLong, uint16_t)>& func, NDArray& target);
template void NDArray::applyIndexedLambda(const std::function<uint32_t (Nd4jLong, uint32_t)>& func, NDArray& target);
template void NDArray::applyIndexedLambda(const std::function<uint64_t (Nd4jLong, uint64_t)>& func, NDArray& target);
template void NDArray::applyIndexedLambda(const std::function<int8_t(Nd4jLong, int8_t)>& func, NDArray& target);
template void NDArray::applyIndexedLambda(const std::function<bool(Nd4jLong, bool)>& func, NDArray& target);
//////////////////////////////////////////////////////////////////////////
template<typename T>
void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<T(Nd4jLong, T, T)>& func, NDArray* target) {
if (target == nullptr)
target = this;
void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<T(Nd4jLong, T, T)>& func, NDArray& target) {
if (other == nullptr) {
nd4j_printf("applyIndexedPairwiseLambda requires both operands to be valid NDArrays, but Y is NULL\n","");
throw std::runtime_error("Other is null");
}
if(dataType() != DataTypeUtils::fromT<T>())
throw std::runtime_error("NDArray::applyIndexedPairwiseLambda<T> method: wrong template parameter T, its type should be the same as type of this array!");
if(dataType() != target->dataType())
if(dataType() != target.dataType())
throw std::runtime_error("NDArray::applyIndexedPairwiseLambda<T> method: types of this and target array should match !");
if (this->lengthOf() != other->lengthOf()) {
if (this->lengthOf() != other.lengthOf()) {
nd4j_printf("applyIndexedPairwiseLambda requires both operands to have the same shape\n","");
throw std::runtime_error("Shapes mismach");
}
auto f = this->bufferAsT<T>();
auto s = other->bufferAsT<T>();
auto z = target->bufferAsT<T>();
auto s = other.bufferAsT<T>();
auto z = target.bufferAsT<T>();
if (this->ordering() == other->ordering() && this->ordering() == target->ordering() && (this->ews() == 1 && target->ews() == 1) && this->ews() == other->ews()) {
if (this->ordering() == other.ordering() && this->ordering() == target.ordering() && (this->ews() == 1 && target.ews() == 1) && this->ews() == other.ews()) {
auto loop = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e += increment)
@ -322,7 +294,7 @@ void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<T(N
auto loop = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e += increment) {
auto xOffset = this->getOffset(e);
auto yOffset = other->getOffset(e);
auto yOffset = other.getOffset(e);
f[xOffset] = func((Nd4jLong) e, f[xOffset], s[yOffset]);
}
@ -334,8 +306,8 @@ void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<T(N
auto loop = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e += increment) {
auto xOffset = this->getOffset(e);
auto yOffset = other->getOffset(e);
auto zOffset = target->getOffset(e);
auto yOffset = other.getOffset(e);
auto zOffset = target.getOffset(e);
z[zOffset] = func((Nd4jLong) e, f[xOffset], s[yOffset]);
}
@ -345,16 +317,16 @@ void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<T(N
}
}
}
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<double (Nd4jLong, double, double)>& func, NDArray* target);
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<float (Nd4jLong, float, float)>& func, NDArray* target);
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<float16 (Nd4jLong, float16, float16)>& func, NDArray* target);
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<bfloat16 (Nd4jLong, bfloat16, bfloat16)>& func, NDArray* target);
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<Nd4jLong (Nd4jLong, Nd4jLong, Nd4jLong)>& func, NDArray* target);
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<int (Nd4jLong, int, int)>& func, NDArray* target);
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<int16_t (Nd4jLong, int16_t, int16_t)>& func, NDArray* target);
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<uint8_t (Nd4jLong, uint8_t, uint8_t)>& func, NDArray* target);
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<uint16_t (Nd4jLong, uint16_t, uint16_t)>& func, NDArray* target);
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<uint32_t (Nd4jLong, uint32_t, uint32_t)>& func, NDArray* target);
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<uint64_t (Nd4jLong, uint64_t, uint64_t)>& func, NDArray* target);
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<int8_t (Nd4jLong, int8_t, int8_t)>& func, NDArray* target);
template void NDArray::applyIndexedPairwiseLambda(NDArray* other, const std::function<bool (Nd4jLong, bool, bool)>& func, NDArray* target);
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<double (Nd4jLong, double, double)>& func, NDArray& target);
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<float (Nd4jLong, float, float)>& func, NDArray& target);
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<float16 (Nd4jLong, float16, float16)>& func, NDArray& target);
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<bfloat16 (Nd4jLong, bfloat16, bfloat16)>& func, NDArray& target);
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<Nd4jLong (Nd4jLong, Nd4jLong, Nd4jLong)>& func, NDArray& target);
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<int (Nd4jLong, int, int)>& func, NDArray& target);
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<int16_t (Nd4jLong, int16_t, int16_t)>& func, NDArray& target);
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<uint8_t (Nd4jLong, uint8_t, uint8_t)>& func, NDArray& target);
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<uint16_t (Nd4jLong, uint16_t, uint16_t)>& func, NDArray& target);
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<uint32_t (Nd4jLong, uint32_t, uint32_t)>& func, NDArray& target);
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<uint64_t (Nd4jLong, uint64_t, uint64_t)>& func, NDArray& target);
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<int8_t (Nd4jLong, int8_t, int8_t)>& func, NDArray& target);
template void NDArray::applyIndexedPairwiseLambda(NDArray& other, const std::function<bool (Nd4jLong, bool, bool)>& func, NDArray& target);

View File

@ -2717,25 +2717,25 @@ static void _scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSub
switch (opCode) {
case 0:
inSubArr.applyPairwiseTransform(pairwise::Add, &updSubArr, &inSubArr, nullptr);
inSubArr.applyPairwiseTransform(pairwise::Add, updSubArr, inSubArr);
break;
case 1:
inSubArr.applyPairwiseTransform(pairwise::Subtract, &updSubArr, &inSubArr, nullptr);
inSubArr.applyPairwiseTransform(pairwise::Subtract, updSubArr, inSubArr);
break;
case 2:
inSubArr.applyPairwiseTransform(pairwise::Multiply, &updSubArr, &inSubArr, nullptr);
inSubArr.applyPairwiseTransform(pairwise::Multiply, updSubArr, inSubArr);
break;
case 3:
inSubArr.applyPairwiseTransform(pairwise::Divide, &updSubArr, &inSubArr, nullptr);
inSubArr.applyPairwiseTransform(pairwise::Divide, updSubArr, inSubArr);
break;
case 4:
inSubArr.applyPairwiseTransform(pairwise::ReverseSubtract, &updSubArr, &inSubArr, nullptr);
inSubArr.applyPairwiseTransform(pairwise::ReverseSubtract, updSubArr, inSubArr);
break;
case 5:
inSubArr.applyPairwiseTransform(pairwise::ReverseDivide, &updSubArr, &inSubArr, nullptr);
inSubArr.applyPairwiseTransform(pairwise::ReverseDivide, updSubArr, inSubArr);
break;
case 6:
inSubArr.applyPairwiseTransform(pairwise::CopyPws, &updSubArr, &inSubArr, nullptr);
inSubArr.applyPairwiseTransform(pairwise::CopyPws, updSubArr, inSubArr);
break;
default:
continue;

View File

@ -122,35 +122,32 @@ __global__ static void fillAsTriangularCuda(const void* vx, const Nd4jLong* xSha
///////////////////////////////////////////////////////////////////
template<typename T>
void NDArray::fillAsTriangular(const float val, int lower, int upper, const char direction, NDArray* target) {
void NDArray::fillAsTriangular(const float val, int lower, int upper, NDArray& target, const char direction) {
if (isS())
throw std::runtime_error("NDArray::fillAsTriangular: you can't use this method on String array!");
if(target == nullptr)
target = this;
if(!isSameShape(target) && !(rankOf() == 1 && target->rankOf() == 2 && sizeAt(0) == target->sizeAt(0) && sizeAt(0) == target->sizeAt(1)))
if(!isSameShape(target) && !(rankOf() == 1 && target.rankOf() == 2 && sizeAt(0) == target.sizeAt(0) && sizeAt(0) == target.sizeAt(1)))
throw std::string("NDArray::fillAsTriangular method: wrong shape of target array !");
if (direction == 'u')
lower = -target->sizeAt(-2);
lower = -target.sizeAt(-2);
else if (direction == 'l')
upper = target->sizeAt(-1);
upper = target.sizeAt(-1);
const int threadsPerBlock = MAX_NUM_THREADS / 4;
const int blocksPerGrid = (target->lengthOf() + threadsPerBlock - 1) / threadsPerBlock;
const int sharedMem = threadsPerBlock * sizeof(decltype(*target->getShapeInfo())) * target->rankOf() + 128;
const int blocksPerGrid = (target.lengthOf() + threadsPerBlock - 1) / threadsPerBlock;
const int sharedMem = threadsPerBlock * sizeof(decltype(*target.getShapeInfo())) * target.rankOf() + 128;
PointersManager manager(getContext(), "NDArray::fillAsTriangular");
NDArray::prepareSpecialUse({target}, {this});
fillAsTriangularCuda<T><<<blocksPerGrid, threadsPerBlock, sharedMem, *getContext()->getCudaStream()>>>(getPlatformBuffer(), getPlatformShapeInfo(), target->getPlatformBuffer(), target->getPlatformShapeInfo(), static_cast<T>(val), lower, upper);
NDArray::registerSpecialUse({target}, {this});
NDArray::prepareSpecialUse({&target}, {this});
fillAsTriangularCuda<T><<<blocksPerGrid, threadsPerBlock, sharedMem, *getContext()->getCudaStream()>>>(getPlatformBuffer(), getPlatformShapeInfo(), target.getPlatformBuffer(), target.getPlatformShapeInfo(), static_cast<T>(val), lower, upper);
NDArray::registerSpecialUse({&target}, {this});
manager.synchronize();
}
BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT void NDArray::fillAsTriangular, (const float val, int lower, int upper, const char direction, NDArray* target), LIBND4J_TYPES);
BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT void NDArray::fillAsTriangular, (const float val, int lower, int upper, NDArray& target, const char direction), LIBND4J_TYPES);
////////////////////////////////////////////////////////////////////////
template<typename T>
@ -457,21 +454,21 @@ BUILD_DOUBLE_TEMPLATE(template void repeatCudaLauncher, (const int blocksPerGrid
//////////////////////////////////////////////////////////////////////////
// create new array by repeating it the number of times given by repeats
NDArray* NDArray::repeat(const int axis, const std::vector<int>& repeats) const {
NDArray NDArray::repeat(const int axis, const std::vector<int>& repeats) const {
auto output = new NDArray('c', ShapeUtils::evalRepeatShape(axis, repeats, *this), dataType(), getContext());
NDArray output('c', ShapeUtils::evalRepeatShape(axis, repeats, *this), dataType(), getContext());
const int threadsPerBlock = MAX_NUM_THREADS / 2;
const int blocksPerGrid = (output->lengthOf() + threadsPerBlock - 1) / threadsPerBlock;
const int sharedMem = output->rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128;
const int blocksPerGrid = (output.lengthOf() + threadsPerBlock - 1) / threadsPerBlock;
const int sharedMem = output.rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128;
PointersManager manager(getContext(), "NDArray::repeat(const int axis, const std::vector<int>& repeats)");
const int* reps = reinterpret_cast<int*>(manager.replicatePointer(repeats.data(), repeats.size() * sizeof(int)));
prepareSpecialUse({output}, {this});
BUILD_SINGLE_SELECTOR_TWICE(dataType(), repeatCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, getContext()->getCudaStream(), getSpecialBuffer(), getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), reps, repeats.size(), axis), LIBND4J_TYPES);
prepareSpecialUse({output}, {this});
prepareSpecialUse({&output}, {this});
BUILD_SINGLE_SELECTOR_TWICE(dataType(), repeatCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, getContext()->getCudaStream(), getSpecialBuffer(), getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), reps, repeats.size(), axis), LIBND4J_TYPES);
prepareSpecialUse({&output}, {this});
manager.synchronize();

View File

@ -247,73 +247,73 @@ static _CUDA_G void lambdaTriplewiseKernel(void* vw, Nd4jLong *wShapeInfo, void*
//////////////////////////////////////////////////////////////////////////
template<typename Lambda>
void NDArray::applyLambda(Lambda func, NDArray* target) {
auto result = target == nullptr ? this : target;
void NDArray::applyLambda(Lambda func, NDArray& target) {
auto dtype = this->dataType();
if (dtype != result->dataType())
if (dtype != target.dataType())
throw std::runtime_error("NDArray::applyLambda X/Z data types must be the same");
//throw datatype_exception::build("NDArray::applyLambda X/Z data types must be the same", dtype, result->dataType());
prepareSpecialUse({result}, {this});
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), result->specialBuffer(), result->specialShapeInfo(), func), LIBND4J_TYPES);
registerSpecialUse({result}, {this});
//throw datatype_exception::build("NDArray::applyLambda X/Z data types must be the same", dtype, target.dataType());
prepareSpecialUse({&target}, {this});
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), func), LIBND4J_TYPES);
registerSpecialUse({&target}, {this});
}
//////////////////////////////////////////////////////////////////////////
template<typename Lambda>
void NDArray::applyPairwiseLambda(const NDArray* other, Lambda func, NDArray* target) {
auto result = target == nullptr ? this : target;
void NDArray::applyPairwiseLambda(const NDArray& other, Lambda func, NDArray& target) {
auto dtype = this->dataType();
if (dtype != result->dataType() || dtype != other->dataType())
if (dtype != target.dataType() || dtype != other.dataType())
throw std::runtime_error("NDArray::applyPairwiseLambda X/Y/Z data types must be the same");
//throw datatype_exception::build("NDArray::applyLambda X/Z data types must be the same", dtype, result->dataType());
//throw datatype_exception::build("NDArray::applyLambda X/Z data types must be the same", dtype, target.dataType());
prepareSpecialUse({result}, {this, other});
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaPairwiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), other->getSpecialBuffer(), other->getSpecialShapeInfo(), result->specialBuffer(), result->specialShapeInfo(), func), LIBND4J_TYPES);
registerSpecialUse({result}, {this, other});
prepareSpecialUse({&target}, {this, &other});
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaPairwiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), func), LIBND4J_TYPES);
registerSpecialUse({&target}, {this, &other});
}
//////////////////////////////////////////////////////////////////////////
template <typename Lambda>
void NDArray::applyIndexedLambda(Lambda func, NDArray* target) {
auto result = target == nullptr ? this : target;
void NDArray::applyIndexedLambda(Lambda func, NDArray& target) {
auto dtype = this->dataType();
if (dtype != result->dataType())
if (dtype != target.dataType())
throw std::runtime_error("NDArray::applyIndexedLambda X/Z data types must be the same");
prepareSpecialUse({result}, {this});
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaIndexedLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), result->specialBuffer(), result->specialShapeInfo(), func), LIBND4J_TYPES);
registerSpecialUse({result}, {this});
prepareSpecialUse({&target}, {this});
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaIndexedLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), func), LIBND4J_TYPES);
registerSpecialUse({&target}, {this});
}
//////////////////////////////////////////////////////////////////////////
template <typename Lambda>
void NDArray::applyIndexedPairwiseLambda(NDArray* other, Lambda func, NDArray* target) {
auto result = target == nullptr ? this : target;
void NDArray::applyIndexedPairwiseLambda(NDArray& other, Lambda func, NDArray& target) {
auto dtype = this->dataType();
if (dtype != result->dataType() || dtype != other->dataType())
if (dtype != target.dataType() || dtype != other.dataType())
throw std::runtime_error("NDArray::applyIndexedPairwiseLambda X/Y/Z data types must be the same");
prepareSpecialUse({result}, {this, other});
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaIndexedPairwiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), other->getSpecialBuffer(), other->getSpecialShapeInfo(), result->specialBuffer(), result->specialShapeInfo(), func), LIBND4J_TYPES);
registerSpecialUse({result}, {this, other});
prepareSpecialUse({&target}, {this, &other});
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaIndexedPairwiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), func), LIBND4J_TYPES);
registerSpecialUse({&target}, {this, &other});
}
//////////////////////////////////////////////////////////////////////////
template <typename Lambda>
void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, Lambda func, NDArray* target) {
auto result = target == nullptr ? this : target;
void NDArray::applyTriplewiseLambda(NDArray& second, NDArray& third, Lambda func, NDArray& target) {
auto dtype = this->dataType();
if (dtype != result->dataType() || dtype != second->dataType() || dtype != third->dataType())
if (dtype != target.dataType() || dtype != second.dataType() || dtype != third.dataType())
throw std::runtime_error("NDArray::applyTriplewiseLambda X/Y/Z data types must be the same");
prepareSpecialUse({result}, {this, second, third});
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaTriplewiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), second->specialBuffer(), second->specialShapeInfo(), third->specialBuffer(), third->specialShapeInfo(), result->specialBuffer(), result->specialShapeInfo(), func), LIBND4J_TYPES);
registerSpecialUse({result}, {this, second, third});
prepareSpecialUse({&target}, {this, &second, &third});
BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaTriplewiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), second.specialBuffer(), second.specialShapeInfo(), third.specialBuffer(), third.specialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), func), LIBND4J_TYPES);
registerSpecialUse({&target}, {this, &second, &third});
}

View File

@ -91,6 +91,10 @@ namespace nd4j {
template <typename T>
FORCEINLINE static bool castShapeInfo(const Nd4jLong *originalShapeInfo, T *newShapeInfo);
template<typename T>
// struct scalarTypesForNDarray { static bool const value = std::is_same<double, T>::value || std::is_same<float, T>::value || std::is_same<int, T>::value || std::is_same<bfloat16, T>::value || std::is_same<float16, T>::value || std::is_same<long long, T>::value; };
struct scalarTypesForNDarray { static bool const value = std::is_same<double, T>::value || std::is_same<float, T>::value || std::is_same<int, T>::value || std::is_same<unsigned int, T>::value || std::is_same<long long, T>::value || std::is_same<unsigned long long, T>::value || std::is_same<long int, T>::value || std::is_same<long unsigned int, T>::value || std::is_same<int8_t, T>::value || std::is_same<uint8_t, T>::value || std::is_same<int16_t, T>::value || std::is_same<uint16_t, T>::value || std::is_same<bool, T>::value || std::is_same<bfloat16, T>::value || std::is_same<float16, T>::value; };
};

View File

@ -44,7 +44,7 @@ namespace nd4j {
}
NDArray* NDArrayList::read(int idx) {
return readRaw(idx)->dup();
return new NDArray(readRaw(idx)->dup());
}
nd4j::DataType NDArrayList::dataType() {
@ -136,11 +136,10 @@ namespace nd4j {
std::vector<int> args({axis});
auto newAxis = ShapeUtils::evalDimsToExclude(array->rankOf(), args);
auto result = array->allTensorsAlongDimension(newAxis);
for (int e = 0; e < result->size(); e++) {
auto chunk = result->at(e);//->dup(array->ordering());
write(e, chunk->dup(array->ordering()));
for (int e = 0; e < result.size(); e++) {
auto chunk = result.at(e);//->dup(array->ordering());
write(e, new NDArray(chunk->dup(array->ordering())));
}
delete result;
}
NDArray* NDArrayList::stack() {
@ -161,7 +160,7 @@ namespace nd4j {
auto result = op.execute(inputs, {}, {}, {});
auto array = result->at(0)->dup();
auto array = new NDArray(result->at(0)->dup());
delete result;
@ -214,13 +213,11 @@ namespace nd4j {
auto tads = array->allTensorsAlongDimension(axis);
int indicesSize = indices.size();
if (tads->size() != indicesSize)
if (tads.size() != indicesSize)
throw std::runtime_error("Number of TADs should match number of indices");
for (int e = 0; e < indicesSize; e++)
tads->at(e)->assign(_chunks[indices[e]]);
delete tads;
tads.at(e)->assign(_chunks[indices[e]]);
return array;
}
@ -234,7 +231,7 @@ namespace nd4j {
list->_elements.store(_elements.load());
for (auto const& v : _chunks) {
list->_chunks[v.first] = v.second->dup();
list->_chunks[v.first] = new NDArray(v.second->dup());
}
return list;

View File

@ -48,7 +48,7 @@ namespace nd4j {
} else {
// FIXME: in some cases it's possible to have no NDArray
if (inputVar->hasNDArray())
innerVar->setNDArray(inputVar->getNDArray()->dup());
innerVar->setNDArray(new NDArray(inputVar->getNDArray()->dup()));
}
}

View File

@ -56,7 +56,7 @@ namespace nd4j {
} else {
// FIXME: in some cases it's possible to have no NDArray
if (inputVar->hasNDArray())
innerVar->setNDArray(inputVar->getNDArray()->dup());
innerVar->setNDArray(new NDArray(inputVar->getNDArray()->dup()));
}
}

View File

@ -40,7 +40,7 @@ namespace nd4j {
result->setIndex(this->_index);
if (this->_ndarray != nullptr)
result->setNDArray(this->_ndarray->template asT<N>());
result->setNDArray(new NDArray(this->_ndarray->template asT<N>()));
// FIXME: add support for ArrayList
if (this->_list != nullptr) {
@ -61,7 +61,7 @@ namespace nd4j {
result->_index = this->_index;
if (this->_ndarray != nullptr)
result->_ndarray = this->_ndarray->dup(this->_ndarray->ordering());
result->_ndarray = new NDArray(this->_ndarray->dup(this->_ndarray->ordering()));
if (this->_list != nullptr)
result->_list = this->_list->clone();

View File

@ -93,7 +93,7 @@ namespace nd4j {
}
OpBenchmark* clone() override {
return new ScalarBenchmark((scalar::Ops) _opNum, _testName, _x == nullptr ? _x : _x->dup() , _y == nullptr ? _y : _y->dup(), _z == nullptr ? _z : _z->dup());
return new ScalarBenchmark((scalar::Ops) _opNum, _testName, _x == nullptr ? _x : new NDArray(_x->dup()) , _y == nullptr ? _y : new NDArray(_y->dup()), _z == nullptr ? _z : new NDArray(_z->dup()));
}
};
}

View File

@ -230,17 +230,17 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, con
bool cNcont = N == 1 || C->strideAt(1) == 1;
if(!aMcont && !aKcont) {
pA = A->dup('f');
pA = new NDArray(A->dup('f'));
toDelete.push_back(pA);
aMcont = true;
}
if(!bKcont && !bNcont) {
pB = B->dup('f');
pB = new NDArray(B->dup('f'));
toDelete.push_back(pB);
bKcont = true;
}
if(!cMcont && !cNcont) {
pC = C->dup('f');
pC = new NDArray(C->dup('f'));
toDelete.push_back(pC);
cMcont = true;
}
@ -332,7 +332,7 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, nd4j::NDArray*
bool aNcont = N == 1 || A->strideAt(1) == 1;
if(!aMcont && !aNcont) {
pA = A->dup('f');
pA = new NDArray(A->dup('f'));
aMcont = true;
}
const CBLAS_ORDER blasOrder = aMcont ? CblasColMajor : CblasRowMajor;

View File

@ -60,11 +60,10 @@ NDArray Householder<T>::evalHHmatrix(const NDArray& x) {
w.p(Nd4jLong(0), 1.f);
wT.assign(&w);
auto identity = NDArrayFactory::create(x.ordering(), {(int)x.lengthOf(), (int)x.lengthOf()}, x.dataType(), x.getContext());
NDArray identity = NDArrayFactory::create(x.ordering(), {(int)x.lengthOf(), (int)x.lengthOf()}, x.dataType(), x.getContext());
identity.setIdentity(); // identity matrix
return identity - mmul(w, wT) * coeff;
}
//////////////////////////////////////////////////////////////////////////
@ -95,9 +94,9 @@ void Householder<T>::evalHHmatrixData(const NDArray& x, NDArray& tail, T& coeff,
coeff = -u0 / normX;
if(x.isRowVector())
tail.assign(x({0,0, 1,-1}) / u0);
tail.assign(static_cast<const NDArray&>(x({0,0, 1,-1})) / u0);
else
tail.assign(x({1,-1, 0,0,}) / u0);
tail.assign(static_cast<const NDArray&>(x({1,-1, 0,0,})) / u0);
}
}

View File

@ -269,7 +269,7 @@ void JacobiSVD<T>::evalData(const NDArray& matrix) {
HHcolPivQR qr(matrix / scale);
_m.assign(qr._qr({0,_cols, 0,_cols}));
_m.fillAsTriangular<T>(0., 0, 0, 'l');
_m.fillAsTriangular<T>(0., 0, 0, _m, 'l');
HHsequence hhSeg(qr._qr, qr._coeffs, 'u');
@ -288,7 +288,7 @@ void JacobiSVD<T>::evalData(const NDArray& matrix) {
auto matrixT = matrix.transpose();
HHcolPivQR qr(matrixT / scale);
_m.assign(qr._qr({0,_rows, 0,_rows}));
_m.fillAsTriangular<T>(0., 0, 0, 'l');
_m.fillAsTriangular<T>(0., 0, 0, _m, 'l');
_m.transposei();
HHsequence hhSeg(qr._qr, qr._coeffs, 'u'); // type = 'u' is not mistake here !
@ -305,7 +305,7 @@ void JacobiSVD<T>::evalData(const NDArray& matrix) {
}
else {
_m.assign(matrix({0,_diagSize, 0,_diagSize}) / scale);
_m.assign(static_cast<const NDArray&>(matrix({0,_diagSize, 0,_diagSize})) / scale);
if(_calcU)
_u.setIdentity();
@ -366,7 +366,7 @@ void JacobiSVD<T>::evalData(const NDArray& matrix) {
_s.p(i, math::nd4j_abs<T>(_m.e<T>(i,i)));
if(_calcU && _m.e<T>(i,i) < (T)0.) {
auto temp = _u({0,0, i,i+1}, true);
temp.applyTransform(transform::Neg, &temp, nullptr);
temp.applyTransform(transform::Neg, temp, nullptr);
}
}

View File

@ -223,26 +223,26 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
const T almostZero = DataTypeUtils::min<T>();
T maxElem;
if(len == 1)
maxElem = math::nd4j_abs<T>(diagInterval->template e<T>(0));
maxElem = math::nd4j_abs<T>(diagInterval.template e<T>(0));
else
maxElem = (*diagInterval)({1,-1, 0,0}, true).reduceNumber(reduce::AMax).template e<T>(0);
maxElem = diagInterval({1,-1, 0,0}, true).reduceNumber(reduce::AMax).template e<T>(0);
T maxElem0 = colVec0->reduceNumber(reduce::AMax).template e<T>(0);
T eps = math::nd4j_max<T>(almostZero, DataTypeUtils::eps<T>() * maxElem);
T epsBig = (T)8. * DataTypeUtils::eps<T>() * math::nd4j_max<T>(maxElem0, maxElem);
if(diagInterval->template e<T>(0) < epsBig)
diagInterval->p(Nd4jLong(0), epsBig);
if(diagInterval.template e<T>(0) < epsBig)
diagInterval.p(Nd4jLong(0), epsBig);
for(int i=1; i < len; ++i)
if(math::nd4j_abs<T>(colVec0->template e<T>(i)) < eps)
colVec0->p(i, 0.f);
for(int i=1; i < len; i++)
if(diagInterval->template e<T>(i) < epsBig) {
if(diagInterval.template e<T>(i) < epsBig) {
deflation1(col1, shift, i, len);
for(int i = 0; i < len; ++i)
diagInterval->p(i, _m.e<T>(col1+shift+i,col1+shift+i));
diagInterval.p(i, _m.e<T>(col1+shift+i,col1+shift+i));
}
{
@ -261,7 +261,7 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
int p = 1;
for(int i=1; i<len; ++i)
if(math::nd4j_abs<T>(diagInterval->template e<T>(i)) < almostZero)
if(math::nd4j_abs<T>(diagInterval.template e<T>(i)) < almostZero)
permut[p++] = i;
int k = 1, m = ind+1;
@ -271,7 +271,7 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
permut[p] = m++;
else if(m >= len)
permut[p] = k++;
else if(diagInterval->template e<T>(k) < diagInterval->template e<T>(m))
else if(diagInterval.template e<T>(k) < diagInterval.template e<T>(m))
permut[p] = m++;
else
permut[p] = k++;
@ -281,7 +281,7 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
if(totDefl) {
for(int i=1; i<len; ++i) {
int ki = permut[i];
if(math::nd4j_abs<T>(diagInterval->template e<T>(ki)) < almostZero || diagInterval->template e<T>(0) < diagInterval->template e<T>(ki))
if(math::nd4j_abs<T>(diagInterval.template e<T>(ki)) < almostZero || diagInterval.template e<T>(0) < diagInterval.template e<T>(ki))
permut[i-1] = permut[i];
else {
permut[i-1] = 0;
@ -303,10 +303,10 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
const int ki = permut[len - (totDefl ? i+1 : i)];
const int jac = tCol[ki];
T _e0 = diagInterval->template e<T>(jac);
T _e0 = diagInterval.template e<T>(jac);
//math::nd4j_swap<T>(diagInterval)(i), (*diagInterval)(jac));
diagInterval->p(jac, diagInterval->template e<T>(i));
diagInterval->p(i, _e0);
diagInterval.p(jac, diagInterval.template e<T>(i));
diagInterval.p(i, _e0);
if(i!=0 && jac!=0) {
_e0 = colVec0->template e<T>(jac);
@ -315,7 +315,6 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
colVec0->p(i, _e0);
}
NDArray* temp1 = nullptr, *temp2 = nullptr;
if (_calcU) {
auto temp1 = _u({col1,col1+len+1, col1+i, col1+i+1}, true);
auto temp2 = _u({col1,col1+len+1, col1+jac,col1+jac+1}, true);
@ -352,12 +351,12 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
{
int i = len-1;
while(i > 0 && (math::nd4j_abs<T>(diagInterval->template e<T>(i)) < almostZero || math::nd4j_abs<T>(colVec0->template e<T>(i)) < almostZero))
while(i > 0 && (math::nd4j_abs<T>(diagInterval.template e<T>(i)) < almostZero || math::nd4j_abs<T>(colVec0->template e<T>(i)) < almostZero))
--i;
for(; i > 1; --i) {
if( (diagInterval->template e<T>(i) - diagInterval->template e<T>(i-1)) < DataTypeUtils::eps<T>()*maxElem ) {
if (math::nd4j_abs<T>(diagInterval->template e<T>(i) - diagInterval->template e<T>(i-1)) >= epsBig)
if( (diagInterval.template e<T>(i) - diagInterval.template e<T>(i-1)) < DataTypeUtils::eps<T>()*maxElem ) {
if (math::nd4j_abs<T>(diagInterval.template e<T>(i) - diagInterval.template e<T>(i-1)) >= epsBig)
throw std::runtime_error("ops::helpers::SVD::deflation: diagonal elements are not properly sorted !");
deflation2(col1, col1 + shift, row1W, col1W, i-1, i, len);
}
@ -365,7 +364,6 @@ void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh
}
delete colVec0;
delete diagInterval;
}
@ -609,9 +607,7 @@ void SVD<T>::calcBlockSVD(int col1, int size, NDArray& U, NDArray& singVals, NDA
const T almostZero = DataTypeUtils::min<T>();
auto col0 = _m({col1, col1+size, col1, col1+1}, true);
auto diagP = _m({col1, col1+size, col1, col1+size}, true).diagonal('c');
auto diag = *diagP;
delete diagP;
auto diag = static_cast<const NDArray&>(_m({col1, col1+size, col1, col1+size}, true).diagonal('c'));
diag.p(Nd4jLong(0), T(0));
singVals = NDArrayFactory::create<T>(_m.ordering(), {size, 1}, _m.getContext());
@ -730,8 +726,7 @@ void SVD<T>::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif
auto temp = _m({col1+shift,col1+shift+n+1, col1+shift,col1+shift+n}, true);
temp.assign(0.);
auto diag = _m.diagonal('c');
(*diag)({col1+shift, col1+shift+n, 0,0}, true).assign(jac._s({0,n, 0,0}, true));
delete diag;
diag({col1+shift, col1+shift+n, 0,0}, true).assign(jac._s({0,n, 0,0}, true));
return;
}
@ -762,11 +757,6 @@ void SVD<T>::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif
f.assign(_u({0,1, col1+k+1, col1+n}, true));
}
// UofSVD.printIndexedBuffer();
// VofSVD.printIndexedBuffer();
// singVals.printIndexedBuffer();
// printf("!! \n");
if (_calcV)
_v.p(row1W+k, col1W, 1.f);
@ -789,14 +779,10 @@ void SVD<T>::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif
temp.assign(_u({col1, col1+k+1, i, i+1}, true));
}
auto temp1 = _u({col1,col1+k+1, col1,col1+1}, true);
temp1.assign(q1 * c0);
auto temp2 = _u({col1,col1+k+1, col2+1,col2+2}, true);
temp2.assign(q1 * (-s0));
auto temp3 = _u({col1+k+1,col1+n+1, col1, col1+1}, true);
temp3.assign(_u({col1+k+1, col1+n+1, col2+1, col2+2}, true) * s0);
auto temp4 =_u({col1+k+1,col1+n+1, col2+1,col2+2}, true);
temp4 *= c0;
_u({col1,col1+k+1, col1,col1+1}, true).assign(q1 * c0);
_u({col1,col1+k+1, col2+1,col2+2}, true).assign(q1 * (-s0));
_u({col1+k+1,col1+n+1, col1, col1+1}, true).assign(static_cast<const NDArray&>(_u({col1+k+1, col1+n+1, col2+1, col2+2}, true)) * s0);
_u({col1+k+1,col1+n+1, col2+1,col2+2}, true) *= c0;
}
else {
@ -844,8 +830,7 @@ void SVD<T>::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif
auto blockM = _m({col1+shift,col1+shift+n, col1+shift,col1+shift+n}, true);
blockM = 0.f;
auto diag = blockM.diagonal('c');
diag->assign(singVals);
delete diag;
diag.assign(singVals);
}
//////////////////////////////////////////////////////////////////////////

View File

@ -285,17 +285,17 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, dou
bool cNcont = N == 1 || C->strideAt(1) == 1;
if(!aMcont && !aKcont) {
pA = A->dup('f');
pA = new NDArray(A->dup('f'));
toDelete.push_back(pA);
aMcont = true;
}
if(!bKcont && !bNcont) {
pB = B->dup('f');
pB = new NDArray(B->dup('f'));
toDelete.push_back(pB);
bKcont = true;
}
if(!cMcont) {
pC = C->dup('f');
pC = new NDArray(C->dup('f'));
toDelete.push_back(pC);
cMcont = true;
}
@ -418,7 +418,7 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, nd4j::NDArray*
bool aNcont = N == 1 || A->strideAt(1) == 1;
if(!aMcont && !aNcont) {
pA = A->dup('f');
pA = new NDArray(A->dup('f'));
aMcont = true;
}
@ -866,12 +866,12 @@ NDArray* MmulHelper::mmulNxNold2(const NDArray* A, const NDArray* B, NDArray* C,
bool cNcont = N == 1 || C->strideAt(-1) == 1;
if(!aMcont && !aKcont) {
pA = A->dup('c');
pA = new NDArray(A->dup('c'));
toDelete.push_back(pA);
aKcont = true;
}
if(!bKcont && !bNcont) {
pB = B->dup('c');
pB = new NDArray(B->dup('c'));
toDelete.push_back(pB);
bNcont = true;
}

View File

@ -82,7 +82,7 @@ namespace nd4j {
// now we actually apply quantization
auto func = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e += increment) {
rz[e] = static_cast<char>(nd4j::math::nd4j_round<float, char>(1.0f * x[e] / nd4j::math::nd4j_max<float>(amax, amin) * max_byte));
rz[e] = static_cast<char>(nd4j::math::nd4j_round<float, char>( 1.0f * static_cast<float>(x[e]) / nd4j::math::nd4j_max<float>(amax, amin) * max_byte));
}
};
@ -180,7 +180,7 @@ PRAGMA_OMP_ATOMIC_ARGS(write)
for (auto e = start; e < stop; e += increment) {
int el = x[e];
int ael = nd4j::math::nd4j_abs<int>(el) - 1;
z[ael] += el > 0 ? threshold : -threshold;
z[ael] += el > 0 ? static_cast<T>(threshold) : static_cast<T>(-threshold);
}
};

View File

@ -32,21 +32,19 @@ namespace nd4j {
REQUIRE_TRUE(x->isR(), 0, "CRELU: input must be real type");
auto tmp = x->dup();
tmp->applyTransform(nd4j::transform::Neg, nullptr, nullptr);
tmp.applyTransform(nd4j::transform::Neg, tmp);
auto z = OUTPUT_VARIABLE(0);
helpers::concat(block.launchContext(), {x, tmp}, *z, x->rankOf()-1);
helpers::concat(block.launchContext(), {x, &tmp}, *z, x->rankOf()-1);
// NDArrayFactory<T>::concat({x, tmp}, -1, z);
// TODO: make this configurable?
double threshold = 0.0;
z->applyScalar(nd4j::scalar::RELU, threshold);
z->applyScalar(nd4j::scalar::RELU, threshold, *z);
STORE_RESULT(z);
delete tmp;
return Status::OK();
}
@ -94,7 +92,7 @@ namespace nd4j {
auto pos = dec->at(0);
auto neg = dec->at(1);
pos->applyPairwiseTransform(nd4j::pairwise::Subtract, neg, epsilon, nullptr);
pos->applyPairwiseTransform(nd4j::pairwise::Subtract, *neg, *epsilon);
delete tmpResult;
delete dec;

View File

@ -31,7 +31,7 @@ namespace nd4j {
auto input = INPUT_VARIABLE(0);
auto output = OUTPUT_VARIABLE(0);
input->applyTransform(nd4j::transform::Cube, output, nullptr);
input->applyTransform(nd4j::transform::Cube, *output);
STORE_RESULT(output);
return Status::OK();

View File

@ -32,7 +32,7 @@ namespace nd4j {
const auto alpha = block.numT() > 0 ? T_ARG(0) : 1.f;
input->applyScalar(nd4j::scalar::ELU, alpha, output);
input->applyScalar(nd4j::scalar::ELU, alpha, *output);
return Status::OK();
}

View File

@ -30,7 +30,7 @@ namespace nd4j {
auto input = INPUT_VARIABLE(0);
auto output = OUTPUT_VARIABLE(0);
input->applyTransform(nd4j::transform::HardSigmoid, output, nullptr);
input->applyTransform(nd4j::transform::HardSigmoid, *output);
STORE_RESULT(output);
return Status::OK();

View File

@ -30,7 +30,7 @@ namespace nd4j {
auto input = INPUT_VARIABLE(0);
auto output = OUTPUT_VARIABLE(0);
input->applyTransform(nd4j::transform::HardTanh, output, nullptr);
input->applyTransform(nd4j::transform::HardTanh, *output);
STORE_RESULT(output);
return Status::OK();

View File

@ -30,7 +30,7 @@ namespace nd4j {
auto z = this->getZ(block);
// just for lulz
first->applyTransform(nd4j::transform::Identity, z, nullptr);
first->applyTransform(nd4j::transform::Identity, *z);
STORE_RESULT(*z);

View File

@ -33,7 +33,7 @@ namespace nd4j {
auto x = INPUT_VARIABLE(i);
auto z = OUTPUT_VARIABLE(i);
x->applyTransform(transform::Identity, z, nullptr);
x->applyTransform(transform::Identity, *z);
}
}

View File

@ -31,7 +31,7 @@ namespace nd4j {
float alpha = block.numT() > 0 ? T_ARG(0) : 0.01f;
input->applyScalar(nd4j::scalar::LeakyRELU, alpha, output);
input->applyScalar(nd4j::scalar::LeakyRELU, alpha, *output);
STORE_RESULT(output);
return Status::OK();

View File

@ -30,7 +30,7 @@ namespace nd4j {
auto input = INPUT_VARIABLE(0);
auto output = OUTPUT_VARIABLE(0);
input->applyTransform(nd4j::transform::RationalTanh, output, nullptr);
input->applyTransform(nd4j::transform::RationalTanh, *output);
STORE_RESULT(output);
return Status::OK();

View File

@ -30,7 +30,7 @@ namespace nd4j {
auto input = INPUT_VARIABLE(0);
auto output = OUTPUT_VARIABLE(0);
input->applyTransform(nd4j::transform::RectifiedTanh, output, nullptr);
input->applyTransform(nd4j::transform::RectifiedTanh, *output);
STORE_RESULT(output);
return Status::OK();

View File

@ -32,7 +32,7 @@ namespace nd4j {
auto scalar = block.numT() > 0 ? block.getTArguments()->at(0) : 0.0;
first->applyScalar(nd4j::scalar::RELU, scalar, z);
first->applyScalar(nd4j::scalar::RELU, scalar, *z);
STORE_RESULT(*z);

View File

@ -33,7 +33,7 @@ CONFIGURABLE_OP_IMPL(relu6, 1, 1, true, 1, 0) {
auto input = INPUT_VARIABLE(0);
auto output = OUTPUT_VARIABLE(0);
input->applyScalar(nd4j::scalar::RELU6, T_ARG(0), output);
input->applyScalar(nd4j::scalar::RELU6, T_ARG(0), *output);
return Status::OK();
}

View File

@ -30,7 +30,7 @@ namespace nd4j {
auto first = INPUT_VARIABLE(0);
auto z = OUTPUT_VARIABLE(0);
first->applyTransform(nd4j::transform::SELU, z, nullptr);
first->applyTransform(nd4j::transform::SELU, *z);
STORE_RESULT(*z);

View File

@ -29,7 +29,7 @@ namespace nd4j {
auto first = INPUT_VARIABLE(0);
auto z = OUTPUT_VARIABLE(0);
first->applyTransform(nd4j::transform::Sigmoid, z, nullptr);
first->applyTransform(nd4j::transform::Sigmoid, *z);
STORE_RESULT(*z);

View File

@ -30,7 +30,7 @@ namespace nd4j {
auto first = INPUT_VARIABLE(0);
auto z = OUTPUT_VARIABLE(0);
first->applyTransform(nd4j::transform::SoftPlus, z, nullptr);
first->applyTransform(nd4j::transform::SoftPlus, *z);
STORE_RESULT(*z);

View File

@ -30,7 +30,7 @@ namespace nd4j {
auto first = INPUT_VARIABLE(0);
auto z = OUTPUT_VARIABLE(0);
first->applyTransform(nd4j::transform::SoftSign, z, nullptr);
first->applyTransform(nd4j::transform::SoftSign, *z);
STORE_RESULT(*z);

View File

@ -30,7 +30,7 @@ namespace nd4j {
auto first = INPUT_VARIABLE(0);
auto z = OUTPUT_VARIABLE(0);
first->applyTransform(nd4j::transform::Tanh, z, nullptr);
first->applyTransform(nd4j::transform::Tanh, *z);
STORE_RESULT(*z);

View File

@ -34,7 +34,7 @@ namespace nd4j {
BROADCAST_CHECK_EMPTY(x,y,z);
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::IntOps::IntAnd, pairwise::IntOps::IntAnd, broadcast::IntOps::IntAnd), y, z, false);
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::IntOps::IntAnd, pairwise::IntOps::IntAnd, broadcast::IntOps::IntAnd), *y, *z, false);
return Status::OK();
}

View File

@ -34,7 +34,7 @@ namespace nd4j {
BROADCAST_CHECK_EMPTY(x,y,z);
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::IntOps::IntOr, pairwise::IntOps::IntOr, broadcast::IntOps::IntOr), y, z, false);
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::IntOps::IntOr, pairwise::IntOps::IntOr, broadcast::IntOps::IntOr), *y, *z, false);
return Status::OK();
}

View File

@ -34,7 +34,7 @@ namespace nd4j {
BROADCAST_CHECK_EMPTY(x,y,z);
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::IntOps::IntXor, pairwise::IntOps::IntXor, broadcast::IntOps::IntXor), y, z, false);
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::IntOps::IntXor, pairwise::IntOps::IntXor, broadcast::IntOps::IntXor), *y, *z, false);
return Status::OK();
}

View File

@ -34,7 +34,7 @@ namespace nd4j {
BROADCAST_CHECK_EMPTY(x,y,z);
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::CyclicShiftRight, pairwise::CyclicShiftRight, broadcast::CyclicShiftRight), y, z, false);
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::CyclicShiftRight, pairwise::CyclicShiftRight, broadcast::CyclicShiftRight), *y, *z, false);
return Status::OK();
}

View File

@ -34,7 +34,7 @@ namespace nd4j {
BROADCAST_CHECK_EMPTY(x,y,z);
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::CyclicShiftLeft, pairwise::CyclicShiftLeft, broadcast::CyclicShiftLeft), y, z, false);
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::CyclicShiftLeft, pairwise::CyclicShiftLeft, broadcast::CyclicShiftLeft), *y, *z, false);
return Status::OK();
}

View File

@ -34,7 +34,7 @@ namespace nd4j {
BROADCAST_CHECK_EMPTY(x,y,z);
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::ShiftRight, pairwise::ShiftRight, broadcast::ShiftRight), y, z, false);
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::ShiftRight, pairwise::ShiftRight, broadcast::ShiftRight), *y, *z, false);
return Status::OK();
}

View File

@ -34,7 +34,7 @@ namespace nd4j {
BROADCAST_CHECK_EMPTY(x,y,z);
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::ShiftLeft, pairwise::ShiftLeft, broadcast::ShiftLeft), y, z, false);
x->applyTrueBroadcast(BroadcastIntOpsTuple::custom(scalar::ShiftLeft, pairwise::ShiftLeft, broadcast::ShiftLeft), *y, *z, false);
return Status::OK();
}

View File

@ -44,7 +44,7 @@ namespace nd4j {
ExtraArguments arguments({a});
y->applyPairwiseTransform(pairwise::Axpy, x, z, &arguments);
y->applyPairwiseTransform(pairwise::Axpy, *x, *z, &arguments);
return ND4J_STATUS_OK;
}

View File

@ -33,8 +33,12 @@ CUSTOM_OP_IMPL(svd, 1, 1, false, 0, 3) {
const int rank = x->rankOf();
REQUIRE_TRUE(rank >= 2 , 0, "SVD OP: the rank of input array must be >=2, but got %i instead!", rank);
const bool fullUV = (bool)INT_ARG(0);
bool fullUV = (bool)INT_ARG(0);
const bool calcUV = (bool)INT_ARG(1);
if(calcUV == false)
fullUV = false;
const int switchNum = INT_ARG(2);
// #ifndef __CUDABLAS__

View File

@ -29,7 +29,7 @@ namespace nd4j {
auto x = INPUT_VARIABLE(0);
auto z = OUTPUT_VARIABLE(0);
x->applyTransform(transform::Not, z, nullptr);
x->applyTransform(transform::Not, *z);
return Status::OK();
}

View File

@ -70,17 +70,13 @@ namespace nd4j {
auto tadsY = y->allTensorsAlongDimension(dims);
auto tadsZ = z->allTensorsAlongDimension(dims);
for (int e = 0; e < tadsX->size(); e++) {
for (int e = 0; e < tadsX.size(); e++) {
if (!cond->e<bool>(e)) {
tadsZ->at(e)->assign(tadsY->at(e));
tadsZ.at(e)->assign(tadsY.at(e));
} else {
tadsZ->at(e)->assign(tadsX->at(e));
tadsZ.at(e)->assign(tadsX.at(e));
}
}
delete tadsX;
delete tadsY;
delete tadsZ;
}
}

View File

@ -59,17 +59,13 @@ namespace nd4j {
auto tadsY = y->allTensorsAlongDimension(dims);
auto tadsZ = z->allTensorsAlongDimension(dims);
for (int e = 0; e < tadsX->size(); e++) {
for (int e = 0; e < tadsX.size(); e++) {
if (!condition->e<bool>(e)) {
tadsZ->at(e)->assign(tadsY->at(e));
tadsZ.at(e)->assign(tadsY.at(e));
} else {
tadsZ->at(e)->assign(tadsX->at(e));
tadsZ.at(e)->assign(tadsX.at(e));
}
}
delete tadsX;
delete tadsY;
delete tadsZ;
}
} else {
// in this case we return 2D matrix, which basically contains coordinates fo true

View File

@ -89,16 +89,12 @@ namespace nd4j {
auto tadsY = y->allTensorsAlongDimension(dims);
auto tadsZ = z->allTensorsAlongDimension(dims);
for (int e = 0; e < tadsX->size(); e++) {
for (int e = 0; e < tadsX.size(); e++) {
if (!condition->e<bool>(e))
tadsZ->at(e)->assign(tadsY->at(e));
tadsZ.at(e)->assign(tadsY.at(e));
else
tadsZ->at(e)->assign(tadsX->at(e));
tadsZ.at(e)->assign(tadsX.at(e));
}
delete tadsX;
delete tadsY;
delete tadsZ;
}
} else {
// in this case we return 2D matrix, which basically contains coordinates fo true

View File

@ -82,14 +82,12 @@ namespace nd4j {
if (axisX.size() > 0) {
auto sum = epsNext->reduceAlongDimension(nd4j::reduce::Sum, axisX);
gradX->assign(sum);
delete sum;
} else
gradX->assign(epsNext);
if (axisY.size() > 0) {
auto sum = epsNext->reduceAlongDimension(nd4j::reduce::Sum, axisY);
gradY->assign(sum);
delete sum;
} else
gradY->assign(epsNext);
}

View File

@ -80,7 +80,6 @@ namespace nd4j {
if (axisY.size() > 0) {
auto sum = epsNext->reduceAlongDimension(nd4j::reduce::Sum, axisY);
gradY->assign(sum);
delete sum;
} else
gradY->assign(epsNext);
}

View File

@ -36,7 +36,7 @@ BROADCASTABLE_OP_IMPL(tf_atan2, 0, 0) {
BROADCAST_CHECK_EMPTY(x,y,z);
// auto tZ = BroadcastHelper<T>::template broadcastApply<simdOps::Atan2<T>>(y, x, z);
x->applyTrueBroadcast(nd4j::BroadcastOpsTuple::custom(scalar::Atan2, pairwise::Atan2, broadcast::Atan2), y, z, true);
x->applyTrueBroadcast(nd4j::BroadcastOpsTuple::custom(scalar::Atan2, pairwise::Atan2, broadcast::Atan2), *y, *z, true);
// if (tZ == nullptr)
// return ND4J_STATUS_KERNEL_FAILURE;

View File

@ -81,7 +81,7 @@ namespace nd4j {
// Y gradient
//epsNext->applyTriplewiseLambda(x, y, lambdaY, gradY);
gradY->assign((*epsNext) * (*x) / ((*y) * (*y)));
gradY->applyTransform(transform::Neg, nullptr, nullptr);
gradY->applyTransform(transform::Neg, *gradY);
} else if (y->isScalar()) {
// scalar case
@ -91,17 +91,17 @@ namespace nd4j {
//tmpX.printBuffer("SumX");
//tmp.printBuffer("Sum Eps");
gradY->assign(tmp * tmpX / ((*y) * (*y)));
gradY->applyTransform(transform::Neg, nullptr, nullptr);
gradY->applyTransform(transform::Neg, *gradY);
//epsNext->applyLambda(lambdaS, gradX);
epsNext->applyScalarArr(scalar::Divide, y, gradX, nullptr);
//epsNext->applyLambda(lambdaS, *gradX);
epsNext->applyScalarArr(scalar::Divide, *y, *gradX);
} else {
// broadcast case
auto preX = *epsNext / *y;
NDArray negX(*x);
x->applyTransform(transform::Neg, &negX);
x->applyTransform(transform::Neg, negX);
auto preY = *epsNext * negX / ((*y) * (*y));
auto axisX = ShapeUtils::evalBroadcastBackwardAxis(x->shapeInfo(), epsNext->shapeInfo());
@ -110,14 +110,12 @@ namespace nd4j {
if (axisX.size() > 0) {
auto sum = preX.reduceAlongDimension(reduce::Sum, axisX);
gradX->assign(sum);
delete sum;
} else
gradX->assign(preX);
if (axisY.size() > 0) {
auto sum = preY.reduceAlongDimension(reduce::Sum, axisY);
gradY->assign(sum);
delete sum;
} else
gradY->assign(preY);
}

View File

@ -69,7 +69,7 @@ namespace nd4j {
std::unique_ptr<ResultSet> tmpResult(op.execute({x, y}, {}, {}, {}));
if (gradY->rankOf() == gradX->rankOf())
epsNext->applyPairwiseTransform(pairwise::Multiply, tmpResult->at(0), gradY, nullptr);
epsNext->applyPairwiseTransform(pairwise::Multiply, *tmpResult->at(0), *gradY);
else // epsNext is greater than gradY
{
std::vector<Nd4jLong> dims(epsNext->rankOf() * 2);
@ -78,7 +78,7 @@ namespace nd4j {
dims[d * 2 + 1] = 1;
}
auto tempIn((*tmpResult->at(0))(dims));
(*epsNext)(dims).applyPairwiseTransform(pairwise::Multiply, &tempIn, gradY, nullptr);
(*epsNext)(dims).applyPairwiseTransform(pairwise::Multiply, tempIn, *gradY);
}
return Status::OK();
}

View File

@ -79,24 +79,24 @@ CUSTOM_OP_IMPL(multiply_bp, 3, 2, false, 0, 0) {
const Nd4jLong yLen = y->lengthOf();
if(x->isScalar() && y->isScalar()) { // both are scalars
y->applyPairwiseTransform(pairwise::Multiply, dLdz, dLdx, nullptr);
x->applyPairwiseTransform(pairwise::Multiply, dLdz, dLdy, nullptr);
y->applyPairwiseTransform(pairwise::Multiply, *dLdz, *dLdx);
x->applyPairwiseTransform(pairwise::Multiply, *dLdz, *dLdy);
//dLdx->assign((*y) * (*dLdz));
//dLdy->assign((*x) * (*dLdz));
}
else if(x->isScalar()) { // x is scalar and y is not
dLdx->assign((*y * *dLdz).reduceNumber(reduce::Sum));
dLdz->applyScalarArr(scalar::Multiply, x, dLdy, nullptr);
dLdz->applyScalarArr(scalar::Multiply, *x, *dLdy);
//dLdz->applyTrueBroadcast(broadcast::Multiply, x, dLdy, true);
}
else if(y->isScalar()) { // y is scalar and x is not
dLdy->assign((*x * *dLdz).reduceNumber(reduce::Sum));
dLdz->applyScalarArr(scalar::Multiply, y, dLdx);
dLdz->applyScalarArr(scalar::Multiply, *y, *dLdx);
}
else if(x->isSameShape(y)) {
x->applyPairwiseTransform(pairwise::Multiply, dLdz, dLdy, nullptr);
y->applyPairwiseTransform(pairwise::Multiply, dLdz, dLdx, nullptr);
x->applyPairwiseTransform(pairwise::Multiply, *dLdz, *dLdy);
y->applyPairwiseTransform(pairwise::Multiply, *dLdz, *dLdx);
}
else if (x->isSameShape(dLdz)) {
@ -104,8 +104,8 @@ CUSTOM_OP_IMPL(multiply_bp, 3, 2, false, 0, 0) {
y->tile(yTiled);
std::vector<int> axesForY = ShapeUtils::evalBroadcastBackwardAxis(y->getShapeInfo(), dLdz->getShapeInfo());
dLdy->assign( (*x * *dLdz).reduceAlongDims(reduce::Sum, axesForY) );
yTiled.applyPairwiseTransform(pairwise::Multiply, dLdz, dLdx, nullptr);
dLdy->assign( (*x * *dLdz).reduceAlongDimension(reduce::Sum, axesForY) );
yTiled.applyPairwiseTransform(pairwise::Multiply, *dLdz, *dLdx);
}
else if (y->isSameShape(dLdz)) {
@ -113,8 +113,8 @@ CUSTOM_OP_IMPL(multiply_bp, 3, 2, false, 0, 0) {
x->tile(xTiled);
std::vector<int> axesForX = ShapeUtils::evalBroadcastBackwardAxis(x->getShapeInfo(), dLdz->getShapeInfo());
dLdx->assign( (*y * *dLdz).reduceAlongDims(reduce::Sum, axesForX) );
xTiled.applyPairwiseTransform(pairwise::Multiply, dLdz, dLdy, nullptr);
dLdx->assign( (*y * *dLdz).reduceAlongDimension(reduce::Sum, axesForX) );
xTiled.applyPairwiseTransform(pairwise::Multiply, *dLdz, *dLdy);
}
else {
@ -125,8 +125,8 @@ CUSTOM_OP_IMPL(multiply_bp, 3, 2, false, 0, 0) {
std::vector<int> axesForX = ShapeUtils::evalBroadcastBackwardAxis(x->getShapeInfo(), dLdz->getShapeInfo());
std::vector<int> axesForY = ShapeUtils::evalBroadcastBackwardAxis(y->getShapeInfo(), dLdz->getShapeInfo());
dLdx->assign( (*y * *dLdz).reduceAlongDims(reduce::Sum, axesForX) );
dLdy->assign( (*x * *dLdz).reduceAlongDims(reduce::Sum, axesForY) );
dLdx->assign( (*y * *dLdz).reduceAlongDimension(reduce::Sum, axesForX) );
dLdy->assign( (*x * *dLdz).reduceAlongDimension(reduce::Sum, axesForY) );
}
return Status::OK();
@ -182,7 +182,7 @@ DECLARE_SHAPE_FN(multiply_bp) {
T tmpX = x->template reduceNumber<simdOps::Sum<T>>();
gradY->assign(tmpX);
epsNext->applyLambda(lambdaS, gradX);
epsNext->applyLambda(lambdaS, *gradX);
} else {
// broadcast case

View File

@ -71,7 +71,7 @@ namespace nd4j {
// X gradient
//epsNext->applyPairwiseLambda(y, lambdaX, gradX);
epsNext->applyPairwiseTransform(pairwise::Divide, y, gradX, nullptr);
epsNext->applyPairwiseTransform(pairwise::Divide, *y, *gradX);
// Y gradient
//epsNext->applyTriplewiseLambda(x, y, lambdaY, gradY);
@ -86,14 +86,14 @@ namespace nd4j {
gradY->assign(tmp * -tmpX / ((*y) * (*y)));
//epsNext->applyLambda(lambdaS, gradX);
epsNext->applyScalarArr(scalar::Divide, y, gradX, nullptr);
epsNext->applyScalarArr(scalar::Divide, *y, *gradX);
} else {
// broadcast case
auto preX = *epsNext / *y;
NDArray negX(*x);
x->applyTransform(transform::Neg, &negX);
x->applyTransform(transform::Neg, negX);
auto preY = *epsNext * negX / ((*y) * (*y));
auto axisX = ShapeUtils::evalBroadcastBackwardAxis(x->shapeInfo(), epsNext->shapeInfo());
@ -102,14 +102,12 @@ namespace nd4j {
if (axisX.size() > 0) {
auto sum = preX.reduceAlongDimension(reduce::Sum, axisX);
gradX->assign(sum);
delete sum;
} else
gradX->assign(preX);
if (axisY.size() > 0) {
auto sum = preY.reduceAlongDimension(reduce::Sum, axisY);
gradY->assign(sum);
delete sum;
} else
gradY->assign(preY);
}

View File

@ -34,7 +34,7 @@ namespace nd4j {
BROADCAST_CHECK_EMPTY(x,y,z);
REQUIRE_TRUE(!x->isB(), 0, "REVERSEDIVIDE OP: you can't divide by bool array!");
x->applyTrueBroadcast(BROADCAST(ReverseDivide), y, z, true);
x->applyTrueBroadcast(BROADCAST(ReverseDivide), *y, *z, true);
return Status::OK();
}
@ -67,7 +67,7 @@ namespace nd4j {
// X gradient
//epsNext->applyTriplewiseLambda(x, y, lambdaX, gradX);
gradX->assign((*epsNext) * (*y) / ((*x) * (*x)));
gradX->applyTransform(transform::Neg, nullptr, nullptr);
gradX->applyTransform(transform::Neg, *gradX);
// Y gradient
//epsNext->applyPairwiseLambda(x, lambdaY, gradY);
gradY->assign((*epsNext) / (*x));
@ -78,14 +78,14 @@ namespace nd4j {
gradY->assign(tmp / tmpX);
gradX->assign((*epsNext) * (*y) / ((*x) * (*x)));
gradX->applyTransform(transform::Neg, nullptr, nullptr);
gradX->applyTransform(transform::Neg, *gradX);
} else {
// broadcast case
auto preY = (*epsNext) / (*x);
auto preX = *epsNext * (*y) / ((*x) * (*x));
preX.applyTransform(transform::Neg, nullptr, nullptr);
preX.applyTransform(transform::Neg, preX);
auto axisX = ShapeUtils::evalBroadcastBackwardAxis(x->shapeInfo(), epsNext->shapeInfo());
auto axisY = ShapeUtils::evalBroadcastBackwardAxis(y->shapeInfo(), epsNext->shapeInfo());
@ -93,14 +93,12 @@ namespace nd4j {
if (axisX.size() > 0) {
auto sum = preX.reduceAlongDimension(reduce::Sum, axisX);
gradX->assign(sum);
delete sum;
} else
gradX->assign(preX);
if (axisY.size() > 0) {
auto sum = preY.reduceAlongDimension(reduce::Sum, axisY);
gradY->assign(sum);
delete sum;
} else
gradY->assign(preY);
}

View File

@ -61,13 +61,13 @@ namespace nd4j {
if (x->isSameShape(y)) {
// PWT case case
epsNext->applyTransform(transform::Neg, gradX, nullptr);
epsNext->applyTransform(transform::Neg, *gradX);
gradY->assign(epsNext);
} else if (y->isScalar()) {
// scalar case
auto tmp = epsNext->reduceNumber(reduce::Sum);
gradY->assign(tmp);
epsNext->applyTransform(transform::Neg, gradX, nullptr);
epsNext->applyTransform(transform::Neg, *gradX);
} else {
// broadcastable
auto axisX = ShapeUtils::evalBroadcastBackwardAxis(x->shapeInfo(), epsNext->shapeInfo());
@ -75,16 +75,14 @@ namespace nd4j {
if (axisX.size() > 0) {
auto sum = epsNext->reduceAlongDimension(reduce::Sum, axisX);
sum->applyTransform(transform::Neg, gradX);
delete sum;
sum.applyTransform(transform::Neg, *gradX);
} else {
epsNext->applyTransform(transform::Neg, gradX, nullptr);
epsNext->applyTransform(transform::Neg, *gradX);
}
if (axisY.size() > 0) {
auto sum = epsNext->reduceAlongDimension(reduce::Sum, axisY);
gradY->assign(sum);
delete sum;
} else {
gradY->assign(epsNext);
}

View File

@ -98,37 +98,31 @@ namespace nd4j {
auto targetShape = epsNext->getShapeAsVector();
preX->tileToShape(targetShape);
preY->tileToShape(targetShape);
preX.tileToShape(targetShape, preX);
preY.tileToShape(targetShape, preY);
//epsNext->applyTriplewiseLambda(x, y, lambdaX, preX);
//epsNext->applyTriplewiseLambda(x, y, lambdaY, preY);
auto resX = (*epsNext) * ts * ((*x) - (*y));
preX->assign(resX);
preX.assign(resX);
auto resY = (*epsNext) * ts * ((*y) - (*x));
preY->assign(resY);
preY.assign(resY);
auto axisX = ShapeUtils::evalBroadcastBackwardAxis(x->shapeInfo(), epsNext->shapeInfo());
auto axisY = ShapeUtils::evalBroadcastBackwardAxis(y->shapeInfo(), epsNext->shapeInfo());
if (axisX.size() > 0) {
auto sum = preX->reduceAlongDimension(reduce::Sum, axisX);
auto sum = preX.reduceAlongDimension(reduce::Sum, axisX);
gradX->assign(sum);
delete sum;
} else
gradX->assign(preX);
if (axisY.size() > 0) {
auto sum = preY->reduceAlongDimension(reduce::Sum, axisY);
auto sum = preY.reduceAlongDimension(reduce::Sum, axisY);
gradY->assign(sum);
delete sum;
} else
gradY->assign(preY);
delete preX;
delete preY;
}
return Status::OK();

View File

@ -62,7 +62,7 @@ namespace nd4j {
if (x->isSameShape(y)) {
// PWT case case
epsNext->applyTransform(transform::Neg, gradY, nullptr);
epsNext->applyTransform(transform::Neg, *gradY);
gradX->assign(epsNext);
} else if (y->isScalar()) {
// scalar case
@ -77,16 +77,14 @@ namespace nd4j {
if (axisX.size() > 0) {
auto sum = epsNext->reduceAlongDimension(reduce::Sum, axisX);
gradX->assign(sum);
delete sum;
} else
gradX->assign(epsNext);
if (axisY.size() > 0) {
auto sum = epsNext->reduceAlongDimension(reduce::Sum, axisY);
sum->applyTransform(transform::Neg, gradY);
delete sum;
sum.applyTransform(transform::Neg, *gradY);
} else {
epsNext->applyTransform(transform::Neg, gradY);
epsNext->applyTransform(transform::Neg, *gradY);
}
}

View File

@ -41,10 +41,10 @@ namespace nd4j {
// but we'll ensure only one node is active, and other is disabled
if (condition->e<int>(0) == 0) {
block.setBranch(0);
this->storeResult(block, 0, input->dup());
this->storeResult(block, 0, new NDArray(input->dup()));
} else {
block.setBranch(1);
this->storeResult(block, 1, *input->dup());
this->storeResult(block, 1, new NDArray(input->dup()));
}
return Status::OK();

View File

@ -42,34 +42,34 @@ namespace nd4j {
std::unique_ptr<NDArray> ptr;
if (!Environment::getInstance()->isExperimentalBuild()) {
if (y->dataType() != x->dataType()) {
y = y->cast(x->dataType());
y = new NDArray(y->cast(x->dataType()));
std::unique_ptr<NDArray> ptr2(y);
ptr.swap(ptr2);
}
}
if (!x->isScalar() && !y->isScalar() && x->isSameShape(y)) {
x->applyPairwiseTransform(op.p, y, z, nullptr);
x->applyPairwiseTransform(op.p, *y, *z);
} else if (!x->isScalar() && y->isScalar()) {
x->applyScalarArr(op.s, const_cast<const NDArray*>(y), z);
x->applyScalarArr(op.s, const_cast<const NDArray&>(*y), *z);
} else if (x->isScalar() && !y->isScalar()) {
if (z->isSameShape(y)) {
if (op.s == scalar::Add || op.s == scalar::Multiply ) {
y->applyScalarArr(op.s, x, z, nullptr);
y->applyScalarArr(op.s, *x, *z);
} else if (op.s == scalar::SquaredSubtract) {
y->applyScalarArr(scalar::SquaredReverseSubtract, x, z, nullptr);
y->applyScalarArr(scalar::SquaredReverseSubtract, *x, *z);
} else if (op.s == scalar::Subtract) {
y->applyScalarArr(scalar::ReverseSubtract, x, z, nullptr);
y->applyScalarArr(scalar::ReverseSubtract, *x, *z);
} else if (op.s == scalar::Divide) {
y->applyScalarArr(scalar::ReverseDivide, x, z, nullptr);
y->applyScalarArr(scalar::ReverseDivide, *x, *z);
} else if (op.s == scalar::Pow) {
y->applyScalarArr(scalar::ReversePow, x, z, nullptr);
y->applyScalarArr(scalar::ReversePow, *x, *z);
} else if (op.s == scalar::ReverseSubtract) {
y->applyScalarArr(scalar::Subtract, x, z, nullptr);
y->applyScalarArr(scalar::Subtract, *x, *z);
} else if (op.s == scalar::ReverseDivide) {
y->applyScalarArr(scalar::Divide, x, z, nullptr);
y->applyScalarArr(scalar::Divide, *x, *z);
} else if (op.s == scalar::MaxPairwise || op.s == scalar::MinPairwise || op.s == scalar::AMaxPairwise || op.s == scalar::AMinPairwise) {
y->applyScalarArr(op.s, x, z, nullptr);
y->applyScalarArr(op.s, *x, *z);
} else if (op.s == scalar::CopyPws) {
z->assign(y);
} else {
@ -84,9 +84,9 @@ namespace nd4j {
return tZ;
}
} else if (x->isScalar() && y->isScalar()) { // x->isScalar() && y->isScalar()
x->applyScalarArr(op.s, const_cast<const NDArray*>(y), z, nullptr);
x->applyScalarArr(op.s, const_cast<const NDArray&>(*y), *z);
} else if (ShapeUtils::areShapesBroadcastable(*x, *y)) {
x->applyTrueBroadcast(op, y, z, true, extraArgs);
x->applyTrueBroadcast(op, *y, *z, true, extraArgs);
return z;
} else {
auto sx = ShapeUtils::shapeAsString(x);
@ -107,16 +107,16 @@ namespace nd4j {
}
if (!x->isScalar() && !y->isScalar() && x->isSameShape(y)) {
x->applyPairwiseTransform(op.p, y, z, nullptr);
x->applyPairwiseTransform(op.p, *y, *z);
} else if (ShapeUtils::areShapesBroadcastable(*x, *y)) {
x->applyTrueBroadcast(op, y, z, true, extraArgs);
x->applyTrueBroadcast(op, *y, *z, true, extraArgs);
return z;
} else if (!x->isScalar() && y->isScalar()) {
x->applyScalarArr(op.s, const_cast<const NDArray*>(y), z);
x->applyScalarArr(op.s, const_cast<const NDArray&>(*y), *z);
} else if (x->isScalar() && !y->isScalar()) {
if (z->isSameShape(y)) {
//z->assign(x);
x->applyPairwiseTransform(op.p, y, z, extraArgs);
x->applyPairwiseTransform(op.p, *y, *z, extraArgs);
return z;
} else {
auto v = y->getShapeAsVector();
@ -125,9 +125,9 @@ namespace nd4j {
return tZ;
}
} else if (x->isScalar() && y->isScalar()) { // x->isScalar() && y->isScalar()
x->applyScalarArr(op.s, const_cast<const NDArray*>(y), z, nullptr);
x->applyScalarArr(op.s, const_cast<const NDArray&>(*y), *z);
} else if (ShapeUtils::areShapesBroadcastable(*x, *y)) {
x->applyTrueBroadcast(op, y, z, true, extraArgs);
x->applyTrueBroadcast(op, *y, *z, true, extraArgs);
return z;
} else {
auto sx = ShapeUtils::shapeAsString(x);

View File

@ -51,12 +51,12 @@ namespace nd4j {
std::vector<int> axis = ShapeUtils::evalDimsToExclude(array->rankOf(), {0});
auto tads = array->allTensorsAlongDimension( axis);
for (int e = 0; e < tads->size(); e++) {
for (int e = 0; e < tads.size(); e++) {
auto idx = indices->e<int>(e);
if (idx >= tads->size())
if (idx >= tads.size())
return ND4J_STATUS_BAD_ARGUMENTS;
auto arr = tads->at(e)->dup(array->ordering());
auto arr = new NDArray(tads.at(e)->dup(array->ordering()));
auto res = list->write(idx, arr);
if (res != ND4J_STATUS_OK)
return res;
@ -65,7 +65,6 @@ namespace nd4j {
if (!hasList)
//OVERWRITE_RESULT(list);
setupResultList(list, block);
delete tads;
return Status::OK();
}

View File

@ -66,7 +66,7 @@ namespace nd4j {
auto subarray = (*array)(indices);
auto status = list->write(e, subarray.dup(array->ordering()));
auto status = list->write(e, new NDArray(subarray.dup(array->ordering())));
if (status != ND4J_STATUS_OK)
return status;

View File

@ -39,7 +39,7 @@ namespace nd4j {
//nd4j_printf("Writing [%i]:\n", idx->e<int>(0));
//input->printShapeInfo("input shape");
//input->printIndexedBuffer("input buffer");
Nd4jStatus result = list->write(idx->e<int>(0), input->dup());
Nd4jStatus result = list->write(idx->e<int>(0), new NDArray(input->dup()));
auto res = NDArrayFactory::create_(list->counter(), block.launchContext());
//res->printShapeInfo("Write_list 2 output shape");
@ -52,7 +52,7 @@ namespace nd4j {
auto input = INPUT_VARIABLE(1);
auto idx = INT_ARG(0);
Nd4jStatus result = list->write(idx, input->dup());
Nd4jStatus result = list->write(idx, new NDArray(input->dup()));
auto res = NDArrayFactory::create_(list->counter(), block.launchContext());
//res->printShapeInfo("Write_list 1 output shape");

View File

@ -169,10 +169,10 @@ CUSTOM_OP_IMPL(absolute_difference_loss_grad, 3, 3, false, 0, 1) {
NDArray E = *predictions - *labels;
// dE_i/dp_i = sign(p_i - y_i)
E.applyTransform(nd4j::transform::Sign, dLdp); // dE/dp
E.applyTransform(nd4j::transform::Sign, *dLdp); // dE/dp
// dE_i/dy_i = -sign(p_i - y_i)
E.applyTransform(nd4j::transform::Abs);
E.applyTransform(nd4j::transform::Abs, E);
switch (reductionMode) {
@ -184,7 +184,7 @@ CUSTOM_OP_IMPL(absolute_difference_loss_grad, 3, 3, false, 0, 1) {
dLdw->assign(E.reduceNumber(reduce::Sum));
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign(E);
@ -210,7 +210,7 @@ CUSTOM_OP_IMPL(absolute_difference_loss_grad, 3, 3, false, 0, 1) {
*dLdw = 0.;
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum));
@ -238,7 +238,7 @@ CUSTOM_OP_IMPL(absolute_difference_loss_grad, 3, 3, false, 0, 1) {
dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights));
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
*dLdw /= numOfNonZeroWeightsScalar;
}
else

View File

@ -56,7 +56,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss, 3, 1, false, 0, 2) {
REQUIRE_TRUE(weights->isScalar() || ShapeUtils::areShapesBroadcastable(*weights, *output), 0, "COSINE_DISTANCE_LOSS OP: shapes of weights and output arrays should be broadcastable, but got weights = %s and output = %s instead!", ShapeUtils::shapeAsString(weights).c_str(), ShapeUtils::shapeAsString(labels).c_str());
}
NDArray E = 1. - (*predictions * *labels).reduceAlongDims(reduce::Sum, {dim}, true);
NDArray E = 1. - (*predictions * *labels).reduceAlongDimension(reduce::Sum, {dim}, true);
// perform weights broadcasting/tile to E if it is necessary
auto weightsBroad = weights;
@ -194,7 +194,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss_grad, 3, 3, false, 0, 2) {
// input dimension can't be larger than labels/predictions/weights rank
REQUIRE_TRUE(dim < labels->rankOf(), 0, "COSINE_DISTANCE_LOSS_GRAD OP: input reduction dimension (got %i) must be < labels rank %i!", dim, labels->rankOf());
NDArray E = 1. - (*predictions * *labels).reduceAlongDims(reduce::Sum, {dim}, true);
NDArray E = 1. - (*predictions * *labels).reduceAlongDimension(reduce::Sum, {dim}, true);
// perform weights broadcasting/tile to E if it is necessary
auto weightsBroad = weights;
@ -216,7 +216,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss_grad, 3, 3, false, 0, 2) {
else {
if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign(E);
@ -249,7 +249,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss_grad, 3, 3, false, 0, 2) {
if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum));
@ -284,7 +284,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss_grad, 3, 3, false, 0, 2) {
if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
*dLdw /= numOfNonZeroWeights;
}
else

View File

@ -52,7 +52,7 @@ namespace nd4j {
// We first need to convert binary labels to -1/1 labels (as floats)
NDArray E = 1.f - (*labels * 2.f - 1.f) * (*logits);
E.applyScalar(scalar::RELU, 0.0f, &E);
E.applyScalar(scalar::RELU, 0.0f, E);
// multiply E on weights
E *= *weightsBroad;
@ -172,11 +172,11 @@ namespace nd4j {
NDArray z = (*labels * 2.f - 1.f);
NDArray E = 1.f - z * (*logits);
E.applyScalar(scalar::RELU, 0.0f, &E);
E.applyScalar(scalar::RELU, 0.0f, E);
// turn E into gradient mask
NDArray gradientMask(E.getShapeInfo(), block.getWorkspace());
E.applyTransform(nd4j::transform::Sign, &gradientMask);
E.applyTransform(nd4j::transform::Sign, gradientMask);
dLdp->assign(-z * gradientMask);
dLdl->assign(-2.f * (*logits) * gradientMask);
@ -192,7 +192,7 @@ namespace nd4j {
dLdw->assign(E.reduceNumber(reduce::Sum));
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign(E);
@ -220,7 +220,7 @@ namespace nd4j {
*dLdw = 0.;
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum));
@ -249,7 +249,7 @@ namespace nd4j {
dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights));
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
*dLdw /= numOfNonZeroWeightsScalar;
}
else

View File

@ -53,9 +53,9 @@ CUSTOM_OP_IMPL(huber_loss, 3, 1, false, 1, 1) {
weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo()));
auto error = *predictions - *labels;
error.applyTransform(transform::Abs);
error.applyTransform(transform::Abs, error);
NDArray quadratic(error.getShapeInfo(), block.getWorkspace());
error.applyScalar(scalar::MinPairwise, delta, &quadratic);
error.applyScalar(scalar::MinPairwise, delta, quadratic);
NDArray E = quadratic * quadratic * 0.5f + (error - quadratic)*delta;
@ -173,24 +173,24 @@ DECLARE_SHAPE_FN(huber_loss) {
NDArray diff = *predictions - *labels;
NDArray absDiff(diff);
absDiff.applyTransform(transform::Abs);
absDiff.applyTransform(transform::Abs, absDiff);
NDArray quadratic(absDiff);
absDiff.applyScalar(scalar::MinPairwise, delta, &quadratic);
absDiff.applyScalar(scalar::MinPairwise, delta, quadratic);
NDArray E = quadratic * quadratic * 0.5f + (absDiff - quadratic)*delta;
NDArray lteMask(diff.getShapeInfo(), BOOL, true, block.launchContext());
absDiff.applyScalar(scalar::LessThanOrEqual, delta, &lteMask);
absDiff.applyScalar(scalar::LessThanOrEqual, delta, lteMask);
NDArray gtMask(diff.getShapeInfo(), BOOL, true, block.launchContext());
absDiff.applyScalar(scalar::GreaterThan, delta, &gtMask);
absDiff.applyScalar(scalar::GreaterThan, delta, gtMask);
NDArray signDiff(diff);
diff.applyTransform(transform::Sign, &signDiff);
diff.applyTransform(transform::Sign, signDiff);
auto gtMaskFloat = *gtMask.cast(diff.dataType());
auto lteMaskFloat = *lteMask.cast(diff.dataType());
auto gtMaskFloat = gtMask.cast(diff.dataType());
auto lteMaskFloat = lteMask.cast(diff.dataType());
dLdp->assign( lteMaskFloat * diff + gtMaskFloat * delta * signDiff);
@ -207,7 +207,7 @@ DECLARE_SHAPE_FN(huber_loss) {
dLdw->assign(E.reduceNumber(reduce::Sum));
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign(E);
@ -235,7 +235,7 @@ DECLARE_SHAPE_FN(huber_loss) {
*dLdw = 0.;
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum));
@ -264,7 +264,7 @@ DECLARE_SHAPE_FN(huber_loss) {
dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights));
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
*dLdw /= numOfNonZeroWeightsScalar;
}
else

View File

@ -181,7 +181,7 @@ CUSTOM_OP_IMPL(log_loss_grad, 3, 3, false, 1, 1) {
// dE_i/dp_i = (1-y_i)/(1-p_i+eps) - y_i/(p_i+eps)
dLdp->assign(oneMinusLabels / onePlusEpsMinusPredict - *labels / predictPlusEps); // dE/dp
// dE_i/dy_i = log((1+2eps)/(p_i+eps) - 1)
((1. + 2. * epsilon) / predictPlusEps - 1.).applyTransform(transform::Log, dLdl); // dE/dy
((1. + 2. * epsilon) / predictPlusEps - 1.).applyTransform(transform::Log, *dLdl); // dE/dy
NDArray E = -(*labels) * predictPlusEps.transform(transform::Log) - oneMinusLabels * onePlusEpsMinusPredict.transform(transform::Log);
@ -196,7 +196,7 @@ CUSTOM_OP_IMPL(log_loss_grad, 3, 3, false, 1, 1) {
dLdw->assign(E.reduceNumber(reduce::Sum));
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign(E);
@ -226,7 +226,7 @@ CUSTOM_OP_IMPL(log_loss_grad, 3, 3, false, 1, 1) {
*dLdw = 0.;
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum));
@ -254,7 +254,7 @@ CUSTOM_OP_IMPL(log_loss_grad, 3, 3, false, 1, 1) {
dLdw->assign(E.reduceNumber(reduce::Sum) / numOfNonZeroWeights);
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
*dLdw /= numOfNonZeroWeightsScalar;
}
else

View File

@ -55,9 +55,9 @@ namespace ops {
NDArray E(labels->getShapeInfo(), block.getWorkspace());
if (computeFullLoss)
labels->applyPairwiseTransform(pairwise::LogPoissonLossFull, log_predictions, &E, nullptr);
labels->applyPairwiseTransform(pairwise::LogPoissonLossFull, *log_predictions, E);
else
labels->applyPairwiseTransform(pairwise::LogPoissonLoss, log_predictions, &E, nullptr);
labels->applyPairwiseTransform(pairwise::LogPoissonLoss, *log_predictions, E);
// multiply E on weights
@ -176,13 +176,13 @@ namespace ops {
NDArray E(labels->getShapeInfo(), block.getWorkspace());
if (computeFullLoss) {
labels->applyPairwiseTransform(pairwise::LogPoissonLossFull, log_predictions, &E, nullptr);
labels->applyPairwiseTransform(pairwise::LogPoissonLossFull, *log_predictions, E);
NDArray rDiv(labels->getShapeInfo(), block.getWorkspace());
labels->applyScalar(scalar::ReverseDivide, 0.5f, &rDiv);
labels->applyScalar(scalar::ReverseDivide, 0.5f, rDiv);
dLdl->assign(rDiv + labels->transform(transform::Log) + -(*log_predictions));
} else {
labels->applyPairwiseTransform(pairwise::LogPoissonLoss, log_predictions, &E, nullptr);
labels->applyPairwiseTransform(pairwise::LogPoissonLoss, *log_predictions, E);
dLdl->assign(-(*log_predictions));
}
@ -200,7 +200,7 @@ namespace ops {
dLdw->assign(E.reduceNumber(reduce::Sum));
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign(E);
@ -228,7 +228,7 @@ namespace ops {
*dLdw = 0.;
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum));
@ -257,7 +257,7 @@ namespace ops {
dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights));
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
*dLdw /= numOfNonZeroWeightsScalar;
}
else

View File

@ -112,10 +112,10 @@ namespace nd4j {
auto n = double(labels->sizeAt(1));
auto diffs = *predictions - *labels;
auto sumOfSquares = (diffs * diffs).reduceAlongDims(reduce::Sum, reductionIdx, true);
auto sumOfSquares = (diffs * diffs).reduceAlongDimension(reduce::Sum, reductionIdx, true);
auto squareOfSum = diffs.reduceAlongDims(reduce::Sum, reductionIdx, true);
squareOfSum.applyScalar(scalar::Pow, 2);
auto squareOfSum = diffs.reduceAlongDimension(reduce::Sum, reductionIdx, true);
squareOfSum.applyScalar(scalar::Pow, 2, squareOfSum);
auto E = ((sumOfSquares * n) - squareOfSum) * (4/(n*(n-1)));
@ -240,15 +240,15 @@ namespace nd4j {
auto diffs = *predictions - *labels;
std::vector<int> reductionIdx = ShapeUtils::evalDimsToExclude(labels->rankOf(), {0});
auto sumOfSquares = (diffs * diffs).reduceAlongDims(reduce::Sum, reductionIdx, true);
auto sumOfSquares = (diffs * diffs).reduceAlongDimension(reduce::Sum, reductionIdx, true);
auto squareOfSum = diffs.reduceAlongDims(reduce::Sum, reductionIdx, true);
squareOfSum.applyScalar(scalar::Pow, 2);
auto squareOfSum = diffs.reduceAlongDimension(reduce::Sum, reductionIdx, true);
squareOfSum.applyScalar(scalar::Pow, 2, squareOfSum);
auto E = ((sumOfSquares * n) - squareOfSum) * (4/(n*(n-1)));
auto sumPred = predictions->reduceAlongDims(reduce::Sum, reductionIdx, true);
auto sumLabel = labels->reduceAlongDims(reduce::Sum, reductionIdx, true);
auto sumPred = predictions->reduceAlongDimension(reduce::Sum, reductionIdx, true);
auto sumLabel = labels->reduceAlongDimension(reduce::Sum, reductionIdx, true);
dLdp->assign(((diffs * n) - sumPred + sumLabel)*(8/(n*(n-1))));
@ -273,7 +273,7 @@ namespace nd4j {
dLdw->assign(E.reduceNumber(reduce::Sum));
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign(E);
@ -299,7 +299,7 @@ namespace nd4j {
*dLdw = 0.;
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum));
@ -327,7 +327,7 @@ namespace nd4j {
dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights));
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
*dLdw /= numOfNonZeroWeightsScalar;
}
else

View File

@ -51,7 +51,7 @@ CUSTOM_OP_IMPL(mean_sqerr_loss, 3, 1, false, 0, 1) {
weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo()));
NDArray E(labels->getShapeInfo(), false, block.launchContext());
predictions->applyPairwiseTransform(pairwise::SquaredSubtract, labels, &E, nullptr);
predictions->applyPairwiseTransform(pairwise::SquaredSubtract, *labels, E);
// multiply E on weights
E *= (*weightsBroad);
@ -191,7 +191,7 @@ CUSTOM_OP_IMPL(mean_sqerr_loss_grad, 3, 3, false, 0, 1) {
dLdw->assign(E.reduceNumber(reduce::Sum));
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign(E);
@ -217,7 +217,7 @@ CUSTOM_OP_IMPL(mean_sqerr_loss_grad, 3, 3, false, 0, 1) {
*dLdw = 0.;
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum));
@ -245,7 +245,7 @@ CUSTOM_OP_IMPL(mean_sqerr_loss_grad, 3, 3, false, 0, 1) {
dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights));
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
*dLdw /= numOfNonZeroWeightsScalar;
}
else

View File

@ -56,7 +56,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss, 3, 1, false, 1, 1) {
auto newLabels = labels;
if(labelsSmoothing != 0.) {
newLabels = new NDArray(*labels);
newLabels->applyScalar(scalar::SXELogitsSmoother, labelsSmoothing, newLabels, nullptr);
newLabels->applyScalar(scalar::SXELogitsSmoother, labelsSmoothing, *newLabels);
}
NDArray E(labels, false, block.launchContext());
@ -186,7 +186,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss_grad, 3, 3, false, 1, 1) {
auto newLabels = labels;
if(labelsSmoothing.e<float>(0) != 0.f) {
newLabels = new NDArray(*labels);
newLabels->applyScalar(scalar::SXELogitsSmoother, labelsSmoothing.e<float>(0), newLabels, nullptr);
newLabels->applyScalar(scalar::SXELogitsSmoother, labelsSmoothing.e<float>(0), *newLabels);
}
NDArray E(labels, false, block.launchContext());
@ -211,7 +211,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss_grad, 3, 3, false, 1, 1) {
dLdw->assign(E.reduceNumber(reduce::Sum));
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign(E);
@ -239,7 +239,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss_grad, 3, 3, false, 1, 1) {
*dLdw = 0.;
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum * sum));
@ -267,7 +267,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss_grad, 3, 3, false, 1, 1) {
dLdw->assign(E.reduceNumber(reduce::Sum) / numOfNonZeroWeightsScalar);
else if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
*dLdw /= numOfNonZeroWeightsScalar;
}
else

View File

@ -54,11 +54,11 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss, 3, 1, false, 1, 1) {
// If label_smoothing is nonzero, smooth the labels towards 1/num_classes: new_onehot_labels = onehot_labels * (1 - label_smoothing) + label_smoothing / num_classes
// num_classes = labels->sizeAt(1)
auto cLabels = labels->cast(weights->dataType());
auto newLabels = cLabels;
NDArray* cLabels = new NDArray(labels->cast(weights->dataType()));
NDArray* newLabels = cLabels;
if(labelsSmoothing != 0.) {
newLabels = new NDArray(cLabels);
*newLabels = (1.f - labelsSmoothing) * *cLabels + labelsSmoothing / cLabels->sizeAt(1);
newLabels->assign((1.f - labelsSmoothing) * *cLabels + labelsSmoothing / cLabels->sizeAt(1));
}
// main formula: result = - sum_i(lables_i * log(softmax_i)) - sum over last dimension
@ -70,9 +70,9 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss, 3, 1, false, 1, 1) {
std::vector<int> dimensions = {-1};
NDArray shiftedLogits = *logits - logits->reduceAlongDims(reduce::Max, dimensions, true);
NDArray logSumExp = shiftedLogits.transform(transform::Exp).reduceAlongDims(reduce::Sum, dimensions, true).transform(transform::Log);
NDArray E = (*newLabels * (logSumExp - shiftedLogits)).reduceAlongDims(reduce::Sum, dimensions);
NDArray shiftedLogits = *logits - logits->reduceAlongDimension(reduce::Max, dimensions, true);
NDArray logSumExp = shiftedLogits.transform(transform::Exp).reduceAlongDimension(reduce::Sum, dimensions, true).transform(transform::Log);
NDArray E = (*newLabels * (logSumExp - shiftedLogits)).reduceAlongDimension(reduce::Sum, dimensions);
// perform weights broadcasting/tile to E if it is necessary
auto weightsBroad = weights;
@ -217,25 +217,25 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) {
// If label_smoothing is nonzero, smooth the labels towards 1/num_classes: new_onehot_labels = onehot_labels * (1 - label_smoothing) + label_smoothing / num_classes
// num_classes = labels->sizeAt(1)
auto cLabels = labels->cast(weights->dataType());
auto newLabels = cLabels;
NDArray* cLabels = new NDArray(labels->cast(weights->dataType()));
NDArray* newLabels = cLabels;
if(labelsSmoothing != 0.) {
newLabels = new NDArray(labels->getShapeInfo(), dLdl->dataType(), false, block.launchContext());
newLabels->assign((1.f - labelsSmoothing) * *cLabels + labelsSmoothing / cLabels->sizeAt(1));
}
NDArray softmax = (*logits - logits->reduceAlongDims(reduce::Max, dimensions, true)).transform(transform::Exp);
softmax /= softmax.reduceAlongDims(reduce::Sum, dimensions, true);
NDArray softmax = (*logits - logits->reduceAlongDimension(reduce::Max, dimensions, true)).transform(transform::Exp);
softmax /= softmax.reduceAlongDimension(reduce::Sum, dimensions, true);
// dEdp = softmax * sum_i(lables_i) - labels
dLdp->assign(softmax * newLabels->reduceAlongDims(reduce::Sum, dimensions, true) - *newLabels);
dLdp->assign(softmax * newLabels->reduceAlongDimension(reduce::Sum, dimensions, true) - *newLabels);
// dEdl = -log(softmax)
dLdl->assign(-softmax.transform(transform::Log)* (1.f - labelsSmoothing));
NDArray shiftedLogits = *logits - logits->reduceAlongDims(reduce::Max, dimensions, true);
NDArray logSumExp = shiftedLogits.transform(transform::Exp).reduceAlongDims(reduce::Sum, dimensions, true).transform(transform::Log);
NDArray E = (*newLabels * (logSumExp - shiftedLogits)).reduceAlongDims(reduce::Sum, dimensions);
NDArray shiftedLogits = *logits - logits->reduceAlongDimension(reduce::Max, dimensions, true);
NDArray logSumExp = shiftedLogits.transform(transform::Exp).reduceAlongDimension(reduce::Sum, dimensions, true).transform(transform::Log);
NDArray E = (*newLabels * (logSumExp - shiftedLogits)).reduceAlongDimension(reduce::Sum, dimensions);
// perform weights broadcasting/tile to E if it is necessary
auto weightsBroad = weights;
@ -253,12 +253,12 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) {
*dLdl *= *weights;
}
else {
dLdp->applyBroadcast(nd4j::broadcast::Multiply, dimensions, weightsBroad);
dLdl->applyBroadcast(nd4j::broadcast::Multiply, dimensions, weightsBroad);
dLdp->applyBroadcast(nd4j::broadcast::Multiply, dimensions, *weightsBroad, *dLdp);
dLdl->applyBroadcast(nd4j::broadcast::Multiply, dimensions, *weightsBroad, *dLdl);
if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign(E);
@ -289,12 +289,12 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) {
else {
NDArray temp = *weightsBroad / sum;
dLdp->applyBroadcast(nd4j::broadcast::Multiply, dimensions, &temp);
dLdl->applyBroadcast(nd4j::broadcast::Multiply, dimensions, &temp);
dLdp->applyBroadcast(nd4j::broadcast::Multiply, dimensions, temp, *dLdp);
dLdl->applyBroadcast(nd4j::broadcast::Multiply, dimensions, temp, *dLdl);
if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
}
else
dLdw->assign((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum));
@ -326,12 +326,12 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) {
}
else {
NDArray temp = *weightsBroad / numOfNonZeroWeights;
dLdp->applyBroadcast(nd4j::broadcast::Multiply, dimensions, &temp);
dLdl->applyBroadcast(nd4j::broadcast::Multiply, dimensions, &temp);
dLdp->applyBroadcast(nd4j::broadcast::Multiply, dimensions, temp, *dLdp);
dLdl->applyBroadcast(nd4j::broadcast::Multiply, dimensions, temp, *dLdl);
if(weights != weightsBroad) {
std::vector<int> axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo());
E.reduceAlongDimension(reduce::Sum, dLdw, axesToReduceAlong, true, false, false);
E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false);
*dLdw /= numOfNonZeroWeights;
}
else

View File

@ -41,11 +41,11 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_with_logits, 2, 1, false, 0, 0) {
std::vector<int> dimension = {classesDim};
auto maxAlongDim = logits->reduceAlongDims(reduce::Max, {classesDim}, true);
auto maxAlongDim = logits->reduceAlongDimension(reduce::Max, {classesDim}, true);
auto logExp = (*logits - maxAlongDim).transform(transform::Exp);
auto logSoftMax = ( logExp / logExp.reduceAlongDims(reduce::Sum, {classesDim}, true) ).transform(transform::Log);
auto logSoftMax = ( logExp / logExp.reduceAlongDimension(reduce::Sum, {classesDim}, true) ).transform(transform::Log);
(-(*labels) * logSoftMax).reduceAlongDimension(reduce::Sum, output, dimension);
(-(*labels) * logSoftMax).reduceAlongDimension(reduce::Sum, *output, dimension);
return Status::OK();
}
@ -97,14 +97,14 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_with_logits_grad, 2, 2, false, 0, 0) {
std::vector<int> dimension = {classesDim};
NDArray softmax = (*logits - logits->reduceAlongDims(reduce::Max, dimension, true)).transform(transform::Exp);
softmax /= softmax.reduceAlongDims(reduce::Sum, dimension, true);
NDArray softmax = (*logits - logits->reduceAlongDimension(reduce::Max, dimension, true)).transform(transform::Exp);
softmax /= softmax.reduceAlongDimension(reduce::Sum, dimension, true);
// dEdp = softmax * sum_i(labels_i) - labels
dLdp->assign(softmax * labels->reduceAlongDims(reduce::Sum, dimension, true) - *labels);
dLdp->assign(softmax * labels->reduceAlongDimension(reduce::Sum, dimension, true) - *labels);
// dEdl = -log(softmax)
(-softmax).applyTransform(transform::Log, dLdl);
(-softmax).applyTransform(transform::Log, *dLdl);
return Status::OK();
}

View File

@ -50,9 +50,9 @@ CUSTOM_OP_IMPL(sparse_softmax_cross_entropy_loss_with_logits, 2, 1, false, 0, 0)
std::vector<int> dimension = {-1};
auto maxAlongDim = logits->reduceAlongDims(reduce::Max, dimension, true);
auto maxAlongDim = logits->reduceAlongDimension(reduce::Max, dimension, true);
auto logitsExp = (*logits - maxAlongDim).transform(transform::Exp, nullptr);
auto logSoftMax = -(( logitsExp / logitsExp.reduceAlongDims(reduce::Sum, dimension, true) ).transform(transform::Log));
auto logSoftMax = -(( logitsExp / logitsExp.reduceAlongDimension(reduce::Sum, dimension, true) ).transform(transform::Log));
helpers::scatterForLoss(block.launchContext(), *labels, logSoftMax, *output, false);
@ -117,8 +117,8 @@ CUSTOM_OP_IMPL(sparse_softmax_cross_entropy_loss_with_logits_grad, 2, 1, false,
std::vector<int> dimension = {-1};
NDArray softmax = (*logits - logits->reduceAlongDims(reduce::Max, dimension, true)).transform(transform::Exp);
softmax /= softmax.reduceAlongDims(reduce::Sum, dimension, true);
NDArray softmax = (*logits - logits->reduceAlongDimension(reduce::Max, dimension, true)).transform(transform::Exp);
softmax /= softmax.reduceAlongDimension(reduce::Sum, dimension, true);
// dEdp = softmax - 1 (or 0)
dLdp->assign(softmax);

View File

@ -229,19 +229,19 @@ CUSTOM_OP_IMPL(batchnorm_bp, 4, 3, false, 1, 2) {
// input - mean
NDArray xMinusMean(input); // empty array with same shape as input
input->applyBroadcast(nd4j::broadcast::Subtract, axes, mean, &xMinusMean);
input->applyBroadcast(nd4j::broadcast::Subtract, axes, *mean, xMinusMean);
// stdInv
NDArray stdInv = *variance + epsilon;
stdInv.applyTransform(transform::Reciprocal); // 1 / (variance + epsilon)
stdInv.applyTransform(transform::Sqrt); // 1 / (variance + epsilon)^0.5
stdInv.applyTransform(transform::Reciprocal, stdInv); // 1 / (variance + epsilon)
stdInv.applyTransform(transform::Sqrt, stdInv); // 1 / (variance + epsilon)^0.5
// dvdm (use dLdM as storage for dvdm)
xMinusMean.reduceAlongDimension(nd4j::reduce::Sum, dLdM, excludedAxes, keepUnitiesInShape);
xMinusMean.reduceAlongDimension(nd4j::reduce::Sum, *dLdM, excludedAxes, keepUnitiesInShape);
*dLdM *= -Ninv;
// g_sum
auto gSum = dLdO->reduceAlongDims(nd4j::reduce::Sum, excludedAxes, keepUnitiesInShape);
auto gSum = dLdO->reduceAlongDimension(nd4j::reduce::Sum, excludedAxes, keepUnitiesInShape);
// dLdB
if(applyOffset)
@ -249,11 +249,11 @@ CUSTOM_OP_IMPL(batchnorm_bp, 4, 3, false, 1, 2) {
// stdInv * (g - g_sum/N) (use dLdI as storage for this expression)
gSum *= Ninv;
dLdO->applyBroadcast(nd4j::broadcast::Subtract, axes, &gSum, dLdI);
dLdI->applyBroadcast(nd4j::broadcast::Multiply, axes, &stdInv);
dLdO->applyBroadcast(nd4j::broadcast::Subtract, axes, gSum, *dLdI);
dLdI->applyBroadcast(nd4j::broadcast::Multiply, axes, stdInv, *dLdI);
// dLdV <- [g*(x - m)]_sum
(xMinusMean * *dLdO).reduceAlongDimension(nd4j::reduce::Sum, dLdV, excludedAxes, keepUnitiesInShape);
(xMinusMean * *dLdO).reduceAlongDimension(nd4j::reduce::Sum, *dLdV, excludedAxes, keepUnitiesInShape);
// dLdG
*dLdV *= stdInv;
@ -265,13 +265,13 @@ CUSTOM_OP_IMPL(batchnorm_bp, 4, 3, false, 1, 2) {
*dLdV *= -Ninv; // -0.5f * (2 / N);
// dfdv * (dvdm + (x - m)) (use xMinusMean as storage for this expression)
xMinusMean.applyBroadcast(nd4j::broadcast::Add, axes, dLdM);
xMinusMean.applyBroadcast(nd4j::broadcast::Multiply, axes, dLdV);
xMinusMean.applyBroadcast(nd4j::broadcast::Add, axes, *dLdM, xMinusMean);
xMinusMean.applyBroadcast(nd4j::broadcast::Multiply, axes, *dLdV, xMinusMean);
// dLdI
*dLdI += xMinusMean;
if(applyScale)
dLdI->applyBroadcast(nd4j::broadcast::Multiply, axes, gamma);
dLdI->applyBroadcast(nd4j::broadcast::Multiply, axes, *gamma, *dLdI);
*dLdM = 0; // put zeros so far
*dLdV = 0; // put zeros so far

View File

@ -240,7 +240,7 @@ CUSTOM_OP_IMPL(conv3dnew_bp, 3, 2, false, 0, 13) {
if(gradB) {
if(gradB->rankOf() == 2)
gradB = new NDArray(gradB->reshape(gradB->ordering(), {(int)gradB->lengthOf()}));
gradO->reduceAlongDimension(reduce::Sum, gradB, gradOaxesForDot); // sum over bS oD oH oW
gradO->reduceAlongDimension(reduce::Sum, *gradB, gradOaxesForDot); // sum over bS oD oH oW
if(gradB != OUTPUT_VARIABLE(2))
delete gradB;
}

View File

@ -234,7 +234,7 @@ CUSTOM_OP_IMPL(deconv2d_bp, 3, 2, false, 0, 9) {
if(gradB) {
if(gradB->rankOf() == 2)
gradB = new NDArray(gradB->reshape(gradB->ordering(), {gradB->lengthOf()}));
gradO->reduceAlongDimension(reduce::Sum, gradB, {0, 2, 3}); // sum over bS, oH, oW
gradO->reduceAlongDimension(reduce::Sum, *gradB, {0, 2, 3}); // sum over bS, oH, oW
if(gradB != OUTPUT_VARIABLE(2))
delete gradB;
}

View File

@ -244,7 +244,7 @@ CUSTOM_OP_IMPL(deconv3d_bp, 3, 2, false, 0, 13) {
if(gradB) {
if(gradB->rankOf() == 2)
gradB = new NDArray(gradB->reshape(gradB->ordering(), {(int)gradB->lengthOf()}));
gradO->reduceAlongDimension(reduce::Sum, gradB, {0, 2, 3, 4}); // sum over bS, oD, oH, oW
gradO->reduceAlongDimension(reduce::Sum, *gradB, {0, 2, 3, 4}); // sum over bS, oD, oH, oW
if(gradB != OUTPUT_VARIABLE(2))
delete gradB;
}

View File

@ -84,7 +84,7 @@ CUSTOM_OP_IMPL(fused_batch_norm, 3, 3, false, 0, 2) {
epsilon = 0.001;
const int restSize = x->lengthOf() / iD;
auto xAffected = NDArrayFactory::create(x->ordering(), {restSize, iD}, x->dataType(), block.launchContext());
auto xAffected = NDArrayFactory::create(x->ordering(), {restSize, iD}, mean->dataType(), block.launchContext());
xAffected.assign(x);
const int restSizeMinusOne = (restSize > 1) ? (restSize - 1) : 1;
@ -93,7 +93,7 @@ CUSTOM_OP_IMPL(fused_batch_norm, 3, 3, false, 0, 2) {
const double restSizeAdjust = (double)restSize / restSizeMinusOne;
if(isTraining) {
auto sum = xAffected.reduceAlongDims(reduce::Sum, {0});
auto sum = xAffected.reduceAlongDimension(reduce::Sum, {0});
sum *= restSizeInv;
mean->assign(sum);
*batchMean = *mean;
@ -106,8 +106,8 @@ CUSTOM_OP_IMPL(fused_batch_norm, 3, 3, false, 0, 2) {
if(isTraining) {
int power = 2;
xAffected.applyScalar(scalar::Pow, power);
auto sum = xAffected.reduceAlongDims(reduce::Sum, {0});
xAffected.applyScalar(scalar::Pow, power, xAffected);
auto sum = xAffected.reduceAlongDimension(reduce::Sum, {0});
sum *= restSizeInv;
variance->assign(sum);
*batchVar = (*variance) * restSizeAdjust;

View File

@ -68,7 +68,7 @@ CONFIGURABLE_OP_IMPL(log_softmax_bp, 2, 1, true, 0, 0) {
helpers::softmax(block.launchContext(), *input, *gradI, dim);
gradI->assign( *gradO - (*gradI * *gradO).reduceAlongDims(reduce::Sum, {dim}, true) );
gradI->assign( *gradO - (*gradI * *gradO).reduceAlongDimension(reduce::Sum, {dim}, true) );
return Status::OK();
}

View File

@ -46,7 +46,7 @@ namespace nd4j {
auto scalar = block.numT() > 0 ? block.getTArguments()->at(0) : 0.0;
auto xw = result->at(0);
xw->applyScalar(nd4j::scalar::RELU, scalar, output);
xw->applyScalar(nd4j::scalar::RELU, scalar, *output);
return Status::OK();
}

View File

@ -62,7 +62,7 @@ CONFIGURABLE_OP_IMPL(softmax_bp, 2, 1, true, 0, 0) {
helpers::softmax(block.launchContext(), *input, *gradI, dim);
auto sumAlongDim = (*gradI * *gradO).reduceAlongDims(reduce::Sum, {dim}, true);
auto sumAlongDim = (*gradI * *gradO).reduceAlongDimension(reduce::Sum, {dim}, true);
gradI->assign(*gradI * (*gradO - sumAlongDim));
return Status::OK();

View File

@ -56,7 +56,7 @@ CONFIGURABLE_OP_IMPL(adjust_contrast, 1, 1, true, 0, 0) {
axes[i] = i;
// mean as reduction for last dimension set
auto mean = input->reduceAlongDims(reduce::Mean, axes);
auto mean = input->reduceAlongDimension(reduce::Mean, axes);
// this is contrast calculation
output->assign((*input - mean) * (*factor) + mean);
@ -104,13 +104,13 @@ CONFIGURABLE_OP_IMPL(adjust_contrast_v2, 1, 1, true, 0, 0) {
std::vector<int> axes({1}); // dim 1 of pseudoresult
// mean as reduction for last dimension set over size (dim 1) of result3D
auto mean = input3D.reduceAlongDims(reduce::Mean, axes);
auto mean = input3D.reduceAlongDimension(reduce::Mean, axes);
// result as (x - mean) * factor + mean
auto temp = input3D.ulike();
input3D.applyBroadcast(broadcast::Subtract, {0, 2}, &mean, &temp, nullptr);
temp.applyScalarArr(scalar::Multiply, factor);
temp.applyBroadcast(broadcast::Add, {0, 2}, &mean, &output3D);
input3D.applyBroadcast(broadcast::Subtract, {0, 2}, mean, temp);
temp.applyScalarArr(scalar::Multiply, *factor, temp);
temp.applyBroadcast(broadcast::Add, {0, 2}, mean, output3D);
output->assign(output3D);
if(block.width() == 1)
delete factor;

View File

@ -44,11 +44,11 @@ namespace nd4j {
auto axisVector = INPUT_VARIABLE(1);
helpers::adjustAxis(input->rankOf(), axisVector, axis);
input->applyIndexReduce(indexreduce::IndexMax, output, axis);
input->applyIndexReduce(indexreduce::IndexMax, *output, axis);
} else {
helpers::adjustAxis(input->rankOf(), axis);
input->applyIndexReduce(indexreduce::IndexMax, output, axis);
input->applyIndexReduce(indexreduce::IndexMax, *output, axis);
}
STORE_RESULT(output);

View File

@ -44,11 +44,11 @@ namespace nd4j {
auto axisVector = INPUT_VARIABLE(1);
helpers::adjustAxis(input->rankOf(), axisVector, axis);
input->applyIndexReduce(indexreduce::IndexMin, output, axis);
input->applyIndexReduce(indexreduce::IndexMin, *output, axis);
} else {
helpers::adjustAxis(input->rankOf(), axis);
input->applyIndexReduce(indexreduce::IndexMin, output, axis);
input->applyIndexReduce(indexreduce::IndexMin, *output, axis);
}
STORE_RESULT(output);

View File

@ -82,7 +82,7 @@ CUSTOM_OP_IMPL(biasadd_bp, 3, 2, false, 0, 0) {
gradI->assign(gradO);
gradO->reduceAlongDimension(nd4j::reduce::Sum, gradB, ShapeUtils::evalDimsToExclude(gradO->rankOf(), {channelDim}));
gradO->reduceAlongDimension(nd4j::reduce::Sum, *gradB, ShapeUtils::evalDimsToExclude(gradO->rankOf(), {channelDim}));
return ND4J_STATUS_OK;
}

View File

@ -45,7 +45,7 @@ CUSTOM_OP_IMPL(embedding_lookup, 2, 1, false, 0, 1) {
v = i++;
}
std::unique_ptr<ResultSet> outputView(output->allTensorsAlongDimension(dims));
ResultSet outputView = output->allTensorsAlongDimension(dims);
REQUIRE_TRUE(block.width() > output->sizeAt(0), 0, "embedding_lookup: input list should be greater then %i, but %i given.",
output->sizeAt(0), block.width()
);
@ -53,7 +53,7 @@ CUSTOM_OP_IMPL(embedding_lookup, 2, 1, false, 0, 1) {
Nd4jLong thisIndex = (*indeces).e<Nd4jLong>(e);
input = INPUT_VARIABLE(thisIndex); // lookup param
outputView->at(e)->assign(input);
outputView.at(e)->assign(input);
}
}
else {

View File

@ -49,8 +49,8 @@ namespace nd4j {
}
std::vector<int>& dims = axis;
input->varianceAlongDimension(variance::SummaryStatsVariance, variances, false, axis);
input->reduceAlongDimension(reduce::Mean, means, axis, keepDims);
input->varianceAlongDimension(variance::SummaryStatsVariance, *variances, false, axis);
input->reduceAlongDimension(reduce::Mean, *means, axis, keepDims);
return Status::OK();
}

View File

@ -52,31 +52,31 @@ namespace nd4j {
case 0: {
REQUIRE_TRUE(dims.size() == 2 || (input->rankOf() == 2 && dims.size() == 0), 0, "Norm: Frobenius is defined for 2D matrices or TADS only");
// fro
input->reduceAlongDimension(reduce::NormFrobenius, output, dims, false, output->rankOf() == 2);
input->reduceAlongDimension(reduce::NormFrobenius, *output, dims, false, output->rankOf() == 2);
}
break;
case 1: {
// euclidean
if ((input->rankOf() == 2 && dims.size() == 0) || dims.size() == 2) {
input->reduceAlongDimension(reduce::NormFrobenius, output, dims, false, output->rankOf() == 2);
input->reduceAlongDimension(reduce::NormFrobenius, *output, dims, false, output->rankOf() == 2);
} else {
input->reduceAlongDimension(reduce::Norm2, output, dims, false, output->rankOf() == 2);
input->reduceAlongDimension(reduce::Norm2, *output, dims, false, output->rankOf() == 2);
}
}
break;
case 2: {
// 1
input->reduceAlongDimension(reduce::Norm1, output, dims, false, output->rankOf() == 2);
input->reduceAlongDimension(reduce::Norm1, *output, dims, false, output->rankOf() == 2);
}
break;
case 3: {
// 2
input->reduceAlongDimension(reduce::Norm2, output, dims, false, output->rankOf() == 2);
input->reduceAlongDimension(reduce::Norm2, *output, dims, false, output->rankOf() == 2);
}
break;
case 4: {
// inf-norm
input->reduceAlongDimension(reduce::NormMax, output, dims, false, output->rankOf() == 2);
input->reduceAlongDimension(reduce::NormMax, *output, dims, false, output->rankOf() == 2);
}
break;
default: {
@ -84,7 +84,7 @@ namespace nd4j {
REQUIRE_TRUE(block.getIArguments()->size() > 1, 0, "P-Norm reductions requires 2 TArguments, but only 1 was provided");
// FIXME: p is required here
//T p = T_ARG(1);
input->reduceAlongDimension(reduce::NormP, output, dims, false, output->rankOf() == 2);
input->reduceAlongDimension(reduce::NormP, *output, dims, false, output->rankOf() == 2);
}
}

View File

@ -40,23 +40,20 @@ namespace nd4j {
shift.assign(T_ARG(0));
}
means->applyScalarArr(scalar::Divide, counts, resMeans, nullptr);
means->applyScalarArr(scalar::Divide, *counts, *resMeans);
NDArray* squareMeans = resMeans->dup('c');
NDArray* tempVariances = resVariances->dup('c');
NDArray squareMeans = resMeans->dup('c');
NDArray tempVariances = resVariances->dup('c');
squareMeans->applyTransform(transform::Square, squareMeans, nullptr);
variances->applyScalarArr(scalar::Divide, counts, tempVariances, nullptr);
// tempVariances->printIndexedBuffer("varianced divided by count");
tempVariances->applyPairwiseTransform(pairwise::Subtract, squareMeans, resVariances, nullptr);
squareMeans.applyTransform(transform::Square, squareMeans, nullptr);
variances->applyScalarArr(scalar::Divide, *counts, tempVariances);
// tempVariances.printIndexedBuffer("varianced divided by count");
tempVariances.applyPairwiseTransform(pairwise::Subtract, squareMeans, *resVariances);
if (shift.e<double>(0) != 0) {
resMeans->applyScalarArr(scalar::Add, &shift, resMeans, nullptr);
resMeans->applyScalarArr(scalar::Add, shift, *resMeans);
}
delete squareMeans;
delete tempVariances;
return Status::OK();
}

View File

@ -47,7 +47,7 @@ CUSTOM_OP_IMPL(reduce_mean, 1, 1, false, 0, 0) {
for(const auto& item : dimensions)
REQUIRE_TRUE(item >= -input->rankOf() && item < input->rankOf(), 0, "REDUCE_MEAN OP: the input dimension to reduce along must be in range [-%i, %i), but got %i instead !" , input->rankOf(), input->rankOf(), item);
input->reduceAlongDimension(reduce::Mean, output, dimensions, keepDims);
input->reduceAlongDimension(reduce::Mean, *output, dimensions, keepDims);
return Status::OK();
}

View File

@ -55,7 +55,7 @@ CUSTOM_OP_IMPL(reduce_stdev, 1, 1, false, 0, 0) {
for(const auto& item : dimensions)
REQUIRE_TRUE(item >= -input->rankOf() && item < input->rankOf(), 0, "REDUCE_STDEV OP: the input dimension to reduce along must be in range [-%i, %i), but got %i instead !" , input->rankOf(), input->rankOf(), item);
input->varianceAlongDimension(variance::SummaryStatsStandardDeviation, output, biasCorrected, dimensions);
input->varianceAlongDimension(variance::SummaryStatsStandardDeviation, *output, biasCorrected, dimensions);
return Status::OK();
}
@ -130,10 +130,10 @@ CUSTOM_OP_IMPL(reduce_stdev_bp, 2, 1, false, 0, 0) {
const Nd4jLong N = input->lengthOf() / gradO->lengthOf();
const Nd4jLong NminusOne = biasCorrected ? N - 1 : N;
auto mean = input->reduceAlongDims(reduce::Mean, dimensions, true);
auto mean = input->reduceAlongDimension(reduce::Mean, dimensions, true);
NDArray variance(mean.getShapeInfo(), true, block.launchContext()); // create empty array with shape matching shape of mean array
input->varianceAlongDimension(variance::SummaryStatsStandardDeviation, &variance, biasCorrected, dimensions);
input->varianceAlongDimension(variance::SummaryStatsStandardDeviation, variance, biasCorrected, dimensions);
gradI->assign( (*input - mean) / (variance * NminusOne)); // automatic broadcasting happens here

View File

@ -55,7 +55,7 @@ CUSTOM_OP_IMPL(reduce_variance, 1, 1, false, 0, 0) {
for(const auto& item : dimensions)
REQUIRE_TRUE(item >= -input->rankOf() && item < input->rankOf(), 0, "REDUCE_VARIANCE OP: the input dimension to reduce along must be in range [-%i, %i), but got %i instead !" , input->rankOf(), input->rankOf(), item);
input->varianceAlongDimension(variance::SummaryStatsVariance, output, biasCorrected, dimensions);
input->varianceAlongDimension(variance::SummaryStatsVariance, *output, biasCorrected, dimensions);
return Status::OK();
}
@ -129,7 +129,7 @@ CUSTOM_OP_IMPL(reduce_variance_bp, 2, 1, false, 0, 0) {
const double factor1 = 2.0 / NminusOne;
const double factor2 = 2.0 / (N * NminusOne);
auto mean = input->reduceAlongDims(reduce::Mean, dimensions, true);
auto mean = input->reduceAlongDimension(reduce::Mean, dimensions, true);
gradI->assign( (*input - mean) * (2.0f / NminusOne)); // automatic broadcasting happens here

View File

@ -45,9 +45,9 @@ namespace ops {
//void* whereMax = (void*)();
auto internal = (*input);
internal -= maxVals;
internal.applyTransform(transform::Exp, nullptr, nullptr);
internal.reduceAlongDimension(reduce::Sum, output, axes, keepDims, false); //, (void*)&maxVals);
output->applyTransform(transform::Log, nullptr, nullptr);
internal.applyTransform(transform::Exp, internal);
internal.reduceAlongDimension(reduce::Sum, *output, axes, keepDims, false); //, (void*)&maxVals);
output->applyTransform(transform::Log, *output);
(*output) += maxVals;
return ND4J_STATUS_OK;
}

View File

@ -52,7 +52,7 @@ CUSTOM_OP_IMPL(reduce_max, 1, 1, false, 0, 0) {
else if (block.getTArguments()->size() > 0)
keepDims = (bool)T_ARG(0);
input->reduceAlongDimension(reduce::Max, output, dimensions, keepDims);
input->reduceAlongDimension(reduce::Max, *output, dimensions, keepDims);
return Status::OK();
}
@ -122,8 +122,7 @@ CUSTOM_OP_IMPL(reduce_max_bp, 2, 1, false, 0, 0) {
else {
auto indicesArr = input->applyIndexReduce(nd4j::indexreduce::IndexMax, dimensions);
helpers::scatterSimple(block.launchContext(), 6, *gradI, *gradO, *indicesArr, ShapeUtils::evalDimsToExclude(gradI->rankOf(), dimensions)); // 6 corresponds to copy operation
delete indicesArr;
helpers::scatterSimple(block.launchContext(), 6, *gradI, *gradO, indicesArr, ShapeUtils::evalDimsToExclude(gradI->rankOf(), dimensions)); // 6 corresponds to copy operation
}
return Status::OK();

View File

@ -52,7 +52,7 @@ CUSTOM_OP_IMPL(reduce_min, 1, 1, false, 0, 0) {
else if (block.getTArguments()->size() > 0)
keepDims = (bool)T_ARG(0);
input->reduceAlongDimension(reduce::Min, output, dimensions, keepDims);
input->reduceAlongDimension(reduce::Min, *output, dimensions, keepDims);
return Status::OK();
}
@ -125,8 +125,7 @@ CUSTOM_OP_IMPL(reduce_min_bp, 2, 1, false, 0, 0) {
else {
auto indicesArr = input->applyIndexReduce(nd4j::indexreduce::IndexMin, dimensions);
helpers::scatterSimple(block.launchContext(), 6, *gradI, *gradO, *indicesArr, ShapeUtils::evalDimsToExclude(gradI->rankOf(), dimensions)); // 6 corresponds to copy operation
delete indicesArr;
helpers::scatterSimple(block.launchContext(), 6, *gradI, *gradO, indicesArr, ShapeUtils::evalDimsToExclude(gradI->rankOf(), dimensions)); // 6 corresponds to copy operation
}
return Status::OK();

Some files were not shown because too many files have changed in this diff Show More