From 753ce28a924053228d0d5a1df33c24e4ff22d459 Mon Sep 17 00:00:00 2001 From: Yurii Shyrma Date: Thu, 14 May 2020 18:06:13 +0300 Subject: [PATCH] Shyrma sqrtm (#429) * - start working on implementation of sqrtm op Signed-off-by: Yurii * - improving householder procedure Signed-off-by: Yurii * - further polishing householder stuff Signed-off-by: Yurii * - polishing hh pivoting qr procedure Signed-off-by: Yurii * - polishing BiDiagonalUp procedure Signed-off-by: Yurii * - polishing householder sequence class Signed-off-by: Yurii * - polishing jacobi svd class Signed-off-by: Yurii * - polishing svd stuff 1 Signed-off-by: Yurii * - polishing svd stuff 2 Signed-off-by: Yurii * - implementation and testing class which performs Hessenberg decomposition of square matrix Signed-off-by: Yurii * - add static method to JacobiSVD class which makes the continuous Givens rotation generation algorithm Signed-off-by: Yurii * - implementation and testing auxiliary methods of Schur decomp class Signed-off-by: Yurii * some references here and there Signed-off-by: raver119 * - trying figure out difference between eigen and our Schur alg Signed-off-by: Yurii * - testing fixing bugs in Schur decomposition op Signed-off-by: Yurii * - start to implement class which performs calculation of eigen values and vectors Signed-off-by: Yurii * - add to EigenValsAndVecs method which calculates complex eigen vectors Signed-off-by: Yurii * - testing and fixing bugs in EigenValsAndVecs class Signed-off-by: Yurii * - implementation and testing triangularSolver class Signed-off-by: Yurii * Added a 2D routine for triangular systems solve. Signed-off-by: shugeo * Refactored triangularSolve2D routine and tests. Signed-off-by: shugeo * Refactored another test for triangularSolve2D. Signed-off-by: shugeo * Refactored test for triangularSolve for vector-bar case. Signed-off-by: shugeo * Refactored triangularSolve2D routine and tests. Signed-off-by: shugeo * - implementation of FullPivLU class Signed-off-by: Yurii * - fix bugs in FullPivLU::solve method Signed-off-by: Yurii * - correct permutation vector in FullPivLU::solve Signed-off-by: Yurii * - correct include headers Signed-off-by: Yurii * - implementation of Sqrtm class Signed-off-by: Yurii * - testing and fixing bugs in Sqrtm class Signed-off-by: Yurii * - include sqrtm classes to cuda folder, investigate in what places synchronization doesn't work Signed-off-by: Yurii * Added implementation for cuda triangularSolve2D and also refactored triangularSolve2D for cpu. Signed-off-by: shugeo * Eliminated waste implementations. Signed-off-by: shugeo * - make offset calculation faster in t<> methods Signed-off-by: Yurii * - rename refference T& NDArray::t<> method Signed-off-by: Yurii * - further work on cuda sqrtm Signed-off-by: Yurii * - provide correct synchronization to device in Sqrtm class Signed-off-by: Yurii * - add tests for sqrtm op Signed-off-by: Yurii * - correct fails which appeared while testing on jenkins Signed-off-by: Yurii * - trying to find out mistake in svd::deflation method Signed-off-by: Yurii * Revert "- trying to find out mistake in svd::deflation method" This reverts commit 19d37baddbc509028e4bc67bc932fe7449becdb6. * Revert "- trying to find out mistake in svd::deflation method" This reverts commit 19d37baddbc509028e4bc67bc932fe7449becdb6. Signed-off-by: Yurii * - change call semantic of r<> and t<> methods Signed-off-by: Yurii * - ged rid of ambiguity in * operator overloads for windows buikd Signed-off-by: Yurii * - get rid of ambiguity in * operator overloads for windows build 2 Signed-off-by: Yurii * - get rid of ambiguity in * operator overloads for windows build 3 Signed-off-by: Yurii * - resolve conflicts with master Signed-off-by: Yurii * cmakelists updated Signed-off-by: raver119@gmail.com * - minor fix in merge cpu helper - make use of reference getter Signed-off-by: Yurii Co-authored-by: raver119 Co-authored-by: shugeo --- libnd4j/include/array/NDArray.h | 110 +-- libnd4j/include/array/NDArray.hXX | 45 +- libnd4j/include/array/cpu/NDArray.cpp | 31 +- libnd4j/include/array/cuda/NDArray.cu | 19 +- libnd4j/include/helpers/EigenValsAndVecs.h | 86 ++ libnd4j/include/helpers/FullPivLU.h | 52 + libnd4j/include/helpers/HessenbergAndSchur.h | 102 ++ libnd4j/include/helpers/Sqrtm.h | 45 + libnd4j/include/helpers/biDiagonalUp.h | 13 +- libnd4j/include/helpers/cpu/biDiagonalUp.cpp | 180 ---- libnd4j/include/helpers/cpu/hhColPivQR.cpp | 171 ---- libnd4j/include/helpers/cpu/householder.cpp | 221 ----- libnd4j/include/helpers/cpu/svd.cpp | 485 ++++------ libnd4j/include/helpers/hhSequence.h | 28 +- libnd4j/include/helpers/householder.h | 46 +- .../include/helpers/impl/EigenValsAndVecs.cpp | 293 ++++++ libnd4j/include/helpers/impl/FullPivLU.cpp | 170 ++++ .../helpers/impl/HessenbergAndSchur.cpp | 383 ++++++++ libnd4j/include/helpers/impl/MmulHelper.cpp | 2 +- libnd4j/include/helpers/impl/Sqrtm.cpp | 276 ++++++ libnd4j/include/helpers/impl/biDiagonalUp.cpp | 160 +++ libnd4j/include/helpers/impl/hhColPivQR.cpp | 147 +++ .../helpers/{cpu => impl}/hhSequence.cpp | 79 +- libnd4j/include/helpers/impl/householder.cpp | 218 +++++ .../helpers/{cpu => impl}/jacobiSVD.cpp | 235 ++--- libnd4j/include/helpers/jacobiSVD.h | 7 +- libnd4j/include/helpers/shape.h | 2 +- .../loops/cuda/specials/swapUnsafeKernel.cu | 28 +- .../ops/declarable/generic/linalg/sqrtm.cpp | 53 + .../generic/{blas => linalg}/svd.cpp | 0 .../generic/linalg/triangular_solve.cpp | 4 +- libnd4j/include/ops/declarable/headers/blas.h | 30 +- .../ops/declarable/helpers/cpu/betaInc.cpp | 2 +- .../helpers/cpu/extract_patches.cpp | 2 +- .../helpers/cpu/fake_quantization.cpp | 2 +- .../declarable/helpers/cpu/image_resize.cpp | 2 +- .../ops/declarable/helpers/cpu/lstsq.cpp | 4 +- .../ops/declarable/helpers/cpu/lup.cpp | 34 +- .../ops/declarable/helpers/cpu/merge.cpp | 4 +- .../ops/declarable/helpers/cpu/random.cpp | 14 +- .../declarable/helpers/cpu/randomShuffle.cpp | 8 +- .../ops/declarable/helpers/cpu/segment.cpp | 14 +- .../declarable/helpers/cpu/sequence_mask.cpp | 2 +- .../ops/declarable/helpers/cpu/solve.cpp | 6 +- .../ops/declarable/helpers/cpu/svd.cpp | 912 +----------------- .../ops/declarable/helpers/cpu/top_k.cpp | 16 +- .../helpers/cpu/triangular_solve.cpp | 38 +- .../ops/declarable/helpers/cpu/triu.cpp | 2 +- .../helpers/cuda/triangular_solve.cu | 139 ++- .../helpers/impl/sparse_to_dense.cpp | 1 + .../ops/declarable/helpers/impl/sqrtm.cpp | 66 ++ .../include/ops/declarable/helpers/sqrtm.h | 39 + .../ops/declarable/helpers/triangular_solve.h | 4 +- .../layers_tests/DeclarableOpsTests11.cpp | 86 +- .../layers_tests/DeclarableOpsTests13.cpp | 494 ---------- .../layers_tests/DeclarableOpsTests15.cpp | 75 +- .../layers_tests/DeclarableOpsTests19.cpp | 2 + .../tests_cpu/layers_tests/HelpersTests1.cpp | 868 +++++++---------- .../tests_cpu/layers_tests/HelpersTests2.cpp | 426 ++++++++ .../tests_cpu/layers_tests/NDArrayTests.cpp | 3 + .../tests_cpu/layers_tests/NativeOpsTests.cpp | 2 +- .../layers_tests/PlaygroundTests.cpp | 523 ++++++++++ libnd4j/tests_cpu/layers_tests/RNGTests.cpp | 44 +- .../tests_cpu/libnd4j_tests/CMakeLists.txt | 13 +- 64 files changed, 4310 insertions(+), 3258 deletions(-) create mode 100644 libnd4j/include/helpers/EigenValsAndVecs.h create mode 100644 libnd4j/include/helpers/FullPivLU.h create mode 100644 libnd4j/include/helpers/HessenbergAndSchur.h create mode 100644 libnd4j/include/helpers/Sqrtm.h delete mode 100644 libnd4j/include/helpers/cpu/biDiagonalUp.cpp delete mode 100644 libnd4j/include/helpers/cpu/hhColPivQR.cpp delete mode 100644 libnd4j/include/helpers/cpu/householder.cpp create mode 100644 libnd4j/include/helpers/impl/EigenValsAndVecs.cpp create mode 100644 libnd4j/include/helpers/impl/FullPivLU.cpp create mode 100644 libnd4j/include/helpers/impl/HessenbergAndSchur.cpp create mode 100644 libnd4j/include/helpers/impl/Sqrtm.cpp create mode 100644 libnd4j/include/helpers/impl/biDiagonalUp.cpp create mode 100644 libnd4j/include/helpers/impl/hhColPivQR.cpp rename libnd4j/include/helpers/{cpu => impl}/hhSequence.cpp (59%) create mode 100644 libnd4j/include/helpers/impl/householder.cpp rename libnd4j/include/helpers/{cpu => impl}/jacobiSVD.cpp (58%) create mode 100644 libnd4j/include/ops/declarable/generic/linalg/sqrtm.cpp rename libnd4j/include/ops/declarable/generic/{blas => linalg}/svd.cpp (100%) create mode 100644 libnd4j/include/ops/declarable/helpers/impl/sqrtm.cpp create mode 100644 libnd4j/include/ops/declarable/helpers/sqrtm.h create mode 100644 libnd4j/tests_cpu/layers_tests/HelpersTests2.cpp diff --git a/libnd4j/include/array/NDArray.h b/libnd4j/include/array/NDArray.h index ae4df227d..04500a987 100644 --- a/libnd4j/include/array/NDArray.h +++ b/libnd4j/include/array/NDArray.h @@ -1163,7 +1163,7 @@ namespace sd { /** * fill target matrix with given value in one or two directions from main diagonal: - * - down from main diagonal starting at subdiagonal number "lower" if direction = 'd' (down) or 'b' (both) + * - down from main diagonal starting at subdiagonal number "lower" if direction = 'l' (down) or 'b' (both) * - up from main diagonal starting at superdiagonal number "upper"if direction = 'u' (up) or 'b' (both) * direction - in what direction to fill matrix. There are 3 possible directions: * 'u' - fill up, mathematically this corresponds to lower triangular matrix, subdiagonal "lower" unaffected @@ -1230,14 +1230,13 @@ namespace sd { * returns reference on array element with given index */ template - FORCEINLINE T& t(const Nd4jLong index); - + FORCEINLINE T& r(const Nd4jLong index); template - FORCEINLINE T& t(const Nd4jLong i, const Nd4jLong j); + FORCEINLINE T& r(const Nd4jLong i, const Nd4jLong j); template - FORCEINLINE T& t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k); + FORCEINLINE T& r(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k); template - FORCEINLINE T& t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w); + FORCEINLINE T& r(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w); /** @@ -1246,7 +1245,6 @@ namespace sd { */ template FORCEINLINE T t(const Nd4jLong i) const; - template FORCEINLINE T t(const Nd4jLong i, const Nd4jLong j) const; template @@ -1778,70 +1776,60 @@ DataType NDArray::dataType() const { //////////////////////////////////////////////////////////////////////// template -T& NDArray::t(const Nd4jLong i) { +T& NDArray::r(const Nd4jLong i) { // if (i >= _length) // throw std::invalid_argument("NDArray::t(i): input index is out of array length !"); if (DataTypeUtils::fromT() != _dataType) throw std::invalid_argument("NDArray::t(i): type of array is not equal to template type T!"); - if(!isActualOnHostSide()) - syncToHost(); - + syncToHost(); tickWriteHost(); + return *(reinterpret_cast(bufferWithOffset(getOffset(i)))); } //////////////////////////////////////////////////////////////////////// template -T& NDArray::t(const Nd4jLong i, const Nd4jLong j) { +T& NDArray::r(const Nd4jLong i, const Nd4jLong j) { if (rankOf() != 2 || i >= sizeAt(0) || j >= sizeAt(1)) throw std::invalid_argument("NDArray::t(i,j): one of input indexes is out of array length or rank!=2 !"); if (DataTypeUtils::fromT() != _dataType) throw std::invalid_argument("NDArray::t(i,j): type of array is not equal to template type T!"); - if(!isActualOnHostSide()) - syncToHost(); - - Nd4jLong coords[2] = {i, j}; - auto offset = shape::getOffset(shapeInfo(), coords); + syncToHost(); tickWriteHost(); - return *(reinterpret_cast(bufferWithOffset(offset))); + + return *(reinterpret_cast(bufferWithOffset(i * strideAt(0) + j * strideAt(1)))); } template -T& NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) { +T& NDArray::r(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) { if (rankOf() != 3 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2)) throw std::invalid_argument("NDArray::t(i,j,k): one of input indexes is out of array length or rank!=3!"); if (DataTypeUtils::fromT() != _dataType) throw std::invalid_argument("NDArray::t(i,j,k): type of array is not equal to template type T!"); - if(!isActualOnHostSide()) - syncToHost(); - - Nd4jLong coords[3] = {i, j, k}; - auto offset = shape::getOffset(shapeInfo(), coords); + syncToHost(); tickWriteHost(); - return *(reinterpret_cast(bufferWithOffset(offset))); + + return *(reinterpret_cast(bufferWithOffset(i * strideAt(0) + j * strideAt(1) + k * strideAt(2)))); } template -T& NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w) { +T& NDArray::r(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w) { if (rankOf() != 4 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2) || w >= sizeAt(3)) throw std::invalid_argument("NDArray::t(i,j,k,w): one of input indexes is out of array length or rank!=4 !"); if (DataTypeUtils::fromT() != _dataType) throw std::invalid_argument("NDArray::t(i,j,k,w): type of array is not equal to template type T!"); - if(!isActualOnHostSide()) - syncToHost(); - - Nd4jLong coords[4] = {i, j, k, w}; - auto offset = shape::getOffset(shapeInfo(), coords); + syncToHost(); tickWriteHost(); - return *(reinterpret_cast(bufferWithOffset(offset))); + + return *(reinterpret_cast(bufferWithOffset(i * strideAt(0) + j * strideAt(1) + k * strideAt(2) + w * strideAt(3)))); } //////////////////////////////////////////////////////////////////////// @@ -1853,10 +1841,8 @@ T NDArray::t(const Nd4jLong i) const { if (DataTypeUtils::fromT() != _dataType) throw std::invalid_argument("NDArray::t(i): type of array is not equal to template type T!"); - if(!isActualOnHostSide()) - syncToHost(); + syncToHost(); - tickReadHost(); return *(reinterpret_cast(bufferWithOffset(getOffset(i)))); } @@ -1869,48 +1855,38 @@ T NDArray::t(const Nd4jLong i, const Nd4jLong j) const { if (DataTypeUtils::fromT() != _dataType) throw std::invalid_argument("NDArray::t(i,j): type of array is not equal to template type T!"); - if(!isActualOnHostSide()) - syncToHost(); + syncToHost(); - Nd4jLong coords[2] = {i, j}; - auto offset = shape::getOffset(shapeInfo(), coords); - tickReadHost(); - return *(reinterpret_cast(bufferWithOffset(offset))); + return *(reinterpret_cast(bufferWithOffset(i * strideAt(0) + j * strideAt(1)))); } - template - T NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) const { +//////////////////////////////////////////////////////////////////////// +template +T NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) const { - if (rankOf() != 3 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2)) - throw std::invalid_argument("NDArray::t(i,j,k): one of input indexes is out of array length or rank!=3!"); - if (DataTypeUtils::fromT() != _dataType) - throw std::invalid_argument("NDArray::t(i,j,k): type of array is not equal to template type T!"); + if (rankOf() != 3 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2)) + throw std::invalid_argument("NDArray::t(i,j,k): one of input indexes is out of array length or rank!=3!"); + if (DataTypeUtils::fromT() != _dataType) + throw std::invalid_argument("NDArray::t(i,j,k): type of array is not equal to template type T!"); - if(!isActualOnHostSide()) - syncToHost(); + syncToHost(); - Nd4jLong coords[3] = {i, j, k}; - auto offset = shape::getOffset(shapeInfo(), coords); - tickReadHost(); - return *(reinterpret_cast(bufferWithOffset(offset))); - } + return *(reinterpret_cast(bufferWithOffset(i * strideAt(0) + j * strideAt(1) + k * strideAt(2)))); +} - template - T NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w) const { +//////////////////////////////////////////////////////////////////////// +template +T NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong w) const { - if (rankOf() != 4 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2) || w >= sizeAt(3)) - throw std::invalid_argument("NDArray::t(i,j,k,w): one of input indexes is out of array length or rank!=4!"); - if (DataTypeUtils::fromT() != _dataType) - throw std::invalid_argument("NDArray::t(i,j,k,w): type of array is not equal to template type T!"); + if (rankOf() != 4 || i >= sizeAt(0) || j >= sizeAt(1) || k >= sizeAt(2) || w >= sizeAt(3)) + throw std::invalid_argument("NDArray::t(i,j,k,w): one of input indexes is out of array length or rank!=4!"); + if (DataTypeUtils::fromT() != _dataType) + throw std::invalid_argument("NDArray::t(i,j,k,w): type of array is not equal to template type T!"); - if(!isActualOnHostSide()) - syncToHost(); + syncToHost(); - Nd4jLong coords[4] = {i, j, k, w}; - auto offset = shape::getOffset(shapeInfo(), coords); - tickReadHost(); - return *(reinterpret_cast(bufferWithOffset(offset))); - } + return *(reinterpret_cast(bufferWithOffset(i * strideAt(0) + j * strideAt(1) + k * strideAt(2) + w * strideAt(3)))); +} #ifndef __JAVACPP_HACK__ //////////////////////////////////////////////////////////////////////// diff --git a/libnd4j/include/array/NDArray.hXX b/libnd4j/include/array/NDArray.hXX index 786333eec..773d845ab 100644 --- a/libnd4j/include/array/NDArray.hXX +++ b/libnd4j/include/array/NDArray.hXX @@ -2170,7 +2170,7 @@ const std::string* ND4J_EXPORT NDArray::bufferAsT() const { template const T* NDArray::bufferAsT() const { // FIXME: do we REALLY want sync here? - syncToHost(); + // syncToHost(); return reinterpret_cast(buffer()); } @@ -2597,11 +2597,9 @@ void NDArray::operator+=(const T value) { auto other = NDArrayFactory::create(this->dataType(), value, getContext()); - NDArray::prepareSpecialUse({this}, {&other}); - + NDArray::prepareSpecialUse({this}, {this, &other}); NativeOpExecutioner::execScalar(getContext(), sd::scalar::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); - - NDArray::registerSpecialUse({this}, {}); + NDArray::registerSpecialUse({this}, {this, &other}); } template ND4J_EXPORT void NDArray::operator+=(const double value); template ND4J_EXPORT void NDArray::operator+=(const float value); @@ -2619,11 +2617,9 @@ void NDArray::operator-=(const T value) { auto other = NDArrayFactory::create(dataType(), value, getContext()); - NDArray::prepareSpecialUse({this}, {&other}); - + NDArray::prepareSpecialUse({this}, {this, &other}); NativeOpExecutioner::execScalar(getContext(), sd::scalar::Subtract, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); - - NDArray::registerSpecialUse({this}, {}); + NDArray::registerSpecialUse({this}, {this, &other}); } template ND4J_EXPORT void NDArray::operator-=(const double value); template ND4J_EXPORT void NDArray::operator-=(const float value); @@ -2640,10 +2636,9 @@ void NDArray::operator*=(const T scalar) { throw std::runtime_error("NDArray::operator*=: you can't use this method on String array!"); auto other = NDArrayFactory::create(this->dataType(), scalar, getContext()); - NDArray::prepareSpecialUse({this}, {&other}); + NDArray::prepareSpecialUse({this}, {this, &other}); NativeOpExecutioner::execScalar(getContext(), sd::scalar::Multiply, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); - - NDArray::registerSpecialUse({this}, {}); + NDArray::registerSpecialUse({this}, {this, &other}); } template ND4J_EXPORT void NDArray::operator*=(const double scalar); template ND4J_EXPORT void NDArray::operator*=(const float scalar); @@ -2663,9 +2658,9 @@ void NDArray::operator/=(const T scalar) { throw std::runtime_error("NDArray::operator/=: you can't use this method on String array!"); auto other = NDArrayFactory::create(this->dataType(), scalar, getContext()); - NDArray::prepareSpecialUse({this}, {&other}); + NDArray::prepareSpecialUse({this}, {this, &other}); NativeOpExecutioner::execScalar(getContext(), sd::scalar::Divide, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); - NDArray::registerSpecialUse({this}, {}); + NDArray::registerSpecialUse({this}, {this, &other}); } template ND4J_EXPORT void NDArray::operator/=(const double scalar); template ND4J_EXPORT void NDArray::operator/=(const float scalar); @@ -3758,8 +3753,7 @@ T NDArray::e(const Nd4jLong i, const Nd4jLong j) const { if (rankOf() != 2 || i >= shapeOf()[0] || j >= shapeOf()[1]) throw std::invalid_argument("NDArray::e(i,j): one of input indexes is out of array length or rank!=2 !"); - const Nd4jLong coords[2] = {i, j}; - const auto xOffset = shape::getOffset(shapeInfo(), coords); + const auto xOffset = i * strideAt(0) + j * strideAt(1); NDArray::preparePrimaryUse({}, {this}); NDArray::registerPrimaryUse({}, {this}); @@ -3778,8 +3772,7 @@ T NDArray::e(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) const { if (rankOf() != 3 || i >= shapeOf()[0] || j >= shapeOf()[1] || k >= shapeOf()[2]) throw std::invalid_argument("NDArray::e(i,j,k): one of input indexes is out of array length or rank!=3 !"); - const Nd4jLong coords[3] = {i, j, k}; - const auto xOffset = shape::getOffset(shapeInfo(), coords); + const auto xOffset = i * strideAt(0) + j * strideAt(1) + k * strideAt(2); NDArray::preparePrimaryUse({}, {this}); NDArray::registerPrimaryUse({}, {this}); @@ -3798,8 +3791,7 @@ T NDArray::e(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLon if (rankOf() != 4 || i >= shapeOf()[0] || j >= shapeOf()[1] || k >= shapeOf()[2] || l >= shapeOf()[3]) throw std::invalid_argument("NDArray::e(i,j,k,l): one of input indexes is out of array length or rank!=4 !"); - const Nd4jLong coords[4] = {i, j, k, l}; - const auto xOffset = shape::getOffset(shapeInfo(), coords); + const auto xOffset = i * strideAt(0) + j * strideAt(1) + k * strideAt(2) + l * strideAt(3); NDArray::preparePrimaryUse({}, {this}); NDArray::registerPrimaryUse({}, {this}); @@ -4411,8 +4403,7 @@ void NDArray::p(const Nd4jLong i, const Nd4jLong j, const T value) { throw std::invalid_argument("NDArray:pe(i,j, value): one of input indexes is out of array length or rank!=2 !"); void *p = reinterpret_cast(const_cast(&value)); - Nd4jLong coords[2] = {i, j}; - auto xOffset = shape::getOffset(shapeInfo(), coords); + auto xOffset = i * strideAt(0) + j * strideAt(1); NDArray::preparePrimaryUse({this}, {}, true); BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->buffer(), xOffset, p), LIBND4J_TYPES); @@ -4440,11 +4431,10 @@ void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const T va if (rankOf() != 3 || i >= shapeOf()[0] || j >= shapeOf()[1] || k >= shapeOf()[2]) throw std::invalid_argument("NDArray:pe(i,j,k, value): one of input indexes is out of array length or rank!=3 !"); - NDArray::preparePrimaryUse({this}, {}, true); - void *p = reinterpret_cast(const_cast(&value)); - Nd4jLong coords[3] = {i, j, k}; - auto xOffset = shape::getOffset(shapeInfo(), coords); + auto xOffset = i * strideAt(0) + j * strideAt(1) + k * strideAt(2); + + NDArray::preparePrimaryUse({this}, {}, true); BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->buffer(), xOffset, p), LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {}); } @@ -4470,8 +4460,7 @@ void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4j throw std::invalid_argument("NDArray::p(i,j,k,l, value): one of input indexes is out of array length or rank!=4 !"); void *p = reinterpret_cast(const_cast(&value)); - Nd4jLong coords[4] = {i, j, k, l}; - auto xOffset = shape::getOffset(shapeInfo(), coords); + auto xOffset = i * strideAt(0) + j * strideAt(1) + k * strideAt(2) + l * strideAt(3); NDArray::preparePrimaryUse({this}, {}, true); BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->buffer(), xOffset, p), LIBND4J_TYPES); diff --git a/libnd4j/include/array/cpu/NDArray.cpp b/libnd4j/include/array/cpu/NDArray.cpp index 87369f740..873b3fec9 100644 --- a/libnd4j/include/array/cpu/NDArray.cpp +++ b/libnd4j/include/array/cpu/NDArray.cpp @@ -153,21 +153,38 @@ void NDArray::setIdentity() { //////////////////////////////////////////////////////////////////////// template -static void templatedSwap(void *xBuffer, void *yBuffer, Nd4jLong length) { +static void templatedSwap(void *xBuffer, void *yBuffer, const Nd4jLong* xShapeInfo, const Nd4jLong* yShapeInfo, Nd4jLong length) { auto x = reinterpret_cast(xBuffer); auto y = reinterpret_cast(yBuffer); + const bool isSameOrders = shape::order(xShapeInfo) == shape::order(xShapeInfo); + + const auto xEws = shape::elementWiseStride(xShapeInfo); + const auto yEws = shape::elementWiseStride(yShapeInfo); + auto func = PRAGMA_THREADS_FOR { - for (auto i = start; i < stop; i++) { - auto temp = x[i]; - x[i] = y[i]; - y[i] = temp; + if(isSameOrders && xEws > 0 && yEws > 0) { + for(auto i = start; i < stop; i++) + sd::math::nd4j_swap(x[i*xEws], y[i*yEws]); + } + else if(shape::haveSameShapeAndStrides(xShapeInfo, yShapeInfo)) { + for(auto i = start; i < stop; i++) { + const auto ind = shape::getIndexOffset(i, xShapeInfo); + sd::math::nd4j_swap(x[ind], y[ind]); + } + } + else { + for(auto i = start; i < stop; i++) { + const auto xInd = shape::getIndexOffset(i, xShapeInfo); + const auto yInd = shape::getIndexOffset(i, yShapeInfo); + sd::math::nd4j_swap(x[xInd], y[yInd]); + } } }; samediff::Threads::parallel_for(func, 0, length); } -BUILD_SINGLE_TEMPLATE(template void templatedSwap, (void *xBuffer, void *yBuffer, Nd4jLong length), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void templatedSwap, (void *xBuffer, void *yBuffer, const Nd4jLong* xShapeInfo, const Nd4jLong* yShapeInfo, Nd4jLong length), LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////// void NDArray::swapUnsafe(NDArray& other) { @@ -182,7 +199,7 @@ void NDArray::swapUnsafe(NDArray& other) { if(lengthOf() != other.lengthOf()) throw std::runtime_error("NDArray::swapUnsafe method: input arrays should have the same length!"); - BUILD_SINGLE_SELECTOR(xType, templatedSwap, (buffer(), other.buffer(), this->lengthOf()), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(xType, templatedSwap, (buffer(), other.buffer(), shapeInfo(), other.shapeInfo(), this->lengthOf()), LIBND4J_TYPES); } //////////////////////////////////////////////////////////////////////// diff --git a/libnd4j/include/array/cuda/NDArray.cu b/libnd4j/include/array/cuda/NDArray.cu index e33e97c3b..8ed3eceeb 100644 --- a/libnd4j/include/array/cuda/NDArray.cu +++ b/libnd4j/include/array/cuda/NDArray.cu @@ -225,7 +225,13 @@ void NDArray::swapUnsafe(NDArray& other) { if(lengthOf() != other.lengthOf()) throw std::runtime_error("NDArray::swapUnsafe method: input arrays should have the same length!"); + PointersManager manager(getContext(), "NDArray::swapUnsafe"); + + prepareSpecialUse({&other, this}, {&other, this}); BUILD_SINGLE_SELECTOR(xType, templatedSwapUnsafe, (specialBuffer(), specialShapeInfo(), other.specialBuffer(), other.specialShapeInfo(), getContext()->getCudaStream()), LIBND4J_TYPES); + registerSpecialUse({&other, this}, {&other, this}); + + manager.synchronize(); } //////////////////////////////////////////////////////////////////////// @@ -546,21 +552,18 @@ void NDArray::printCurrentBuffer(const bool host, const char* msg, const int pre if(specialBuffer() == nullptr || _length == 0) { printf("NDArray::printSpecialBuffer: special buffer is nullptr !\n"); return; } - void* pHost = operator new(sizeof(T) * _length); + const auto sizeOfBuffer = sizeOfT() * (getOffset(_length - 1) + 1); - if (ews() != 1) { - for (uint i = 0; i < _length; i++) - cudaMemcpyAsync(reinterpret_cast(pHost) + i, specialBufferWithOffset(i), sizeof(T), cudaMemcpyDeviceToHost, *(getContext()->getCudaStream())); - } - else - cudaMemcpyAsync(pHost, specialBuffer(), sizeOfT() * _length, cudaMemcpyDeviceToHost, *getContext()->getCudaStream()); + void* pHost = operator new(sizeOfBuffer); + + cudaMemcpyAsync(pHost, specialBuffer(), sizeOfBuffer, cudaMemcpyDeviceToHost, *getContext()->getCudaStream()); cudaError_t cudaResult = cudaStreamSynchronize(*getContext()->getCudaStream()); if(cudaResult != 0) throw std::runtime_error("NDArray::printSpecialBuffer: cudaStreamSynchronize failed!"); for (uint i = 0; i < _length; i++) - printf("%.*f, ", precision, (double)reinterpret_cast(pHost)[i]); + printf("%.*f, ", precision, (double)reinterpret_cast(pHost)[getOffset(i)]); printf("\n"); operator delete(pHost); diff --git a/libnd4j/include/helpers/EigenValsAndVecs.h b/libnd4j/include/helpers/EigenValsAndVecs.h new file mode 100644 index 000000000..222b9c36e --- /dev/null +++ b/libnd4j/include/helpers/EigenValsAndVecs.h @@ -0,0 +1,86 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#ifndef LIBND4J_EIGENVALSANDVECS_H +#define LIBND4J_EIGENVALSANDVECS_H + +#include + +namespace sd { +namespace ops { +namespace helpers { + +// this class calculates eigenvalues and eigenvectors of given input matrix +template +class EigenValsAndVecs { + + public: + // suppose we got input square NxN matrix + + NDArray _Vals; // {N,2} matrix of eigenvalues, 2 means real and imaginary part + NDArray _Vecs; // {N,N,2} matrix, whose columns are the eigenvectors (complex), 2 means real and imaginary part + + explicit EigenValsAndVecs(const NDArray& matrix); + + + ////////////////////////////////////////////////////////////////////////// + FORCEINLINE static void divideComplexNums(const T& a1, const T& b1, const T& a2, const T& b2, T& a3, T& b3) { + + T norm2 = a2*a2 + b2*b2; + + a3 = (a1*a2 + b1*b2) / norm2; + b3 = (a2*b1 - a1*b2) / norm2; + } + + ////////////////////////////////////////////////////////////////////////// + FORCEINLINE static void multiplyComplexNums(const T& a1, const T& b1, const T& a2, const T& b2, T& a3, T& b3) { + + a3 = (a1*a2 - b1*b2); + b3 = (a1*b2 + b1*a2); + } + + ////////////////////////////////////////////////////////////////////////// + FORCEINLINE static void sqrtComplexNum(T& a, T& b) { + + T norm = math::nd4j_sqrt(a*a + b*b); + + if(b < (T)0) + b = -math::nd4j_sqrt((T)0.5 * (norm - a)); + else + b = math::nd4j_sqrt((T)0.5 * (norm - a)); + a = math::nd4j_sqrt((T)0.5 * (norm + a)); + } + + + private: + + void calcEigenVals(const NDArray& schurMatrixT); // calculates _Vals + void calcPseudoEigenVecs(NDArray& schurMatrixT, NDArray& schurMatrixU); // makes changes both in schurMatrixT(NxN) and schurMatrixU(NxN), also calculates and stores pseudo-eigenvectors (real) in schurMatrixU columns + void calcEigenVecs(const NDArray& schurMatrixU); // calculates _Vecs + +}; + + +} +} +} + + +#endif //LIBND4J_EIGENVALSANDVECS_H diff --git a/libnd4j/include/helpers/FullPivLU.h b/libnd4j/include/helpers/FullPivLU.h new file mode 100644 index 000000000..3e285b597 --- /dev/null +++ b/libnd4j/include/helpers/FullPivLU.h @@ -0,0 +1,52 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#ifndef LIBND4J_FULLPIVLU_H +#define LIBND4J_FULLPIVLU_H + +#include + +namespace sd { +namespace ops { +namespace helpers { + +// class solves equation A*x = b for x, by procedure of LU decomposition of input matrix A with complete pivoting +// LU decomposition of a matrix is: +// A = P^-1 * L * U * Q^-1 +// L is unit-lower-triangular, +// U is upper-triangular, +// and P and Q are permutation matrices for rows and columns correspondingly + +template +class FullPivLU { + + public: + + // A{M,K} * x{K,N} = b{M,N} + static void solve(const NDArray& A, const NDArray& b, NDArray& x); +}; + + +} +} +} + + +#endif //LIBND4J_FULLPIVLU_H diff --git a/libnd4j/include/helpers/HessenbergAndSchur.h b/libnd4j/include/helpers/HessenbergAndSchur.h new file mode 100644 index 000000000..9c209ea56 --- /dev/null +++ b/libnd4j/include/helpers/HessenbergAndSchur.h @@ -0,0 +1,102 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#ifndef LIBND4J_HESSENBERGANDSCHUR_H +#define LIBND4J_HESSENBERGANDSCHUR_H + +#include + +namespace sd { +namespace ops { +namespace helpers { + +// this class implements Hessenberg decomposition of square matrix using orthogonal similarity transformation +// A = Q H Q^T +// Q - orthogonal matrix +// H - Hessenberg matrix +template +class Hessenberg { + // suppose we got input square NxN matrix + + public: + + NDArray _Q; // {N,N} + NDArray _H; // {N,N} + + explicit Hessenberg(const NDArray& matrix); + + private: + void evalData(); +}; + + +// this class implements real Schur decomposition of square matrix using orthogonal similarity transformation +// A = U T U^T +// T - real quasi-upper-triangular matrix - block upper triangular matrix where the blocks on the diagonal are 1×1 or 2×2 with complex eigenvalues +// U - real orthogonal matrix + +template +class Schur { + // suppose we got input square NxN matrix + + public: + + NDArray _T; // {N,N} + NDArray _U; // {N,N} + + explicit Schur(const NDArray& matrix); + + void splitTwoRows(const int ind, const T shift); + + void calcShift(const int ind, const int iter, T& shift, NDArray& shiftInfo); + + void initFrancisQR(const int ind1, const int ind2, const NDArray& shiftVec, int& ind3, NDArray& householderVec); + + void doFrancisQR(const int ind1, const int ind2, const int ind3, const NDArray& householderVec); + + void calcFromHessenberg(); + + private: + + static const int _maxItersPerRow = 40; + + void evalData(const NDArray& matrix); + + ////////////////////////////////////////////////////////////////////////// + FORCEINLINE int getSmallSubdiagEntry(const int inInd) { + + int outInd = inInd; + while (outInd > 0) { + T factor = math::nd4j_abs(_T.t(outInd-1, outInd-1)) + math::nd4j_abs(_T.t(outInd, outInd)); + if (math::nd4j_abs(_T.t(outInd, outInd-1)) <= DataTypeUtils::eps() * factor) + break; + outInd--; + } + return outInd; + } +}; + + +} +} +} + + +#endif //LIBND4J_HESSENBERGANDSCHUR_H diff --git a/libnd4j/include/helpers/Sqrtm.h b/libnd4j/include/helpers/Sqrtm.h new file mode 100644 index 000000000..1968bc7a5 --- /dev/null +++ b/libnd4j/include/helpers/Sqrtm.h @@ -0,0 +1,45 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#ifndef LIBND4J_SQRTM_H +#define LIBND4J_SQRTM_H + +#include + +namespace sd { +namespace ops { +namespace helpers { + +template +class Sqrtm { + + + public: + + static void calc(const NDArray& in, NDArray& out); +}; + + +} +} +} + + +#endif //LIBND4J_SQRTM_H diff --git a/libnd4j/include/helpers/biDiagonalUp.h b/libnd4j/include/helpers/biDiagonalUp.h index aaf64d41d..dc44057a9 100644 --- a/libnd4j/include/helpers/biDiagonalUp.h +++ b/libnd4j/include/helpers/biDiagonalUp.h @@ -32,13 +32,14 @@ namespace helpers { class BiDiagonalUp { public: - + NDArray _HHmatrix; // 2D Householder matrix NDArray _HHbidiag; // vector which contains Householder coefficients + NDArray _hhCoeffs; // vector of Householder coefficients /** * constructor - * + * * matrix - input matrix expected to be bi-diagonalized, remains unaffected */ BiDiagonalUp(const NDArray& matrix); @@ -47,7 +48,7 @@ class BiDiagonalUp { * this method evaluates data (coeff, normX, tail) used in Householder transformation * formula for Householder matrix: P = identity_matrix - coeff * w * w^T * P * x = [normX, 0, 0 , 0, ...] - * coeff - scalar + * coeff - scalar * w = [1, w1, w2, w3, ...], "tail" is w except first unity element, that is "tail" = [w1, w2, w3, ...] * tail and coeff are stored in _HHmatrix * normX are stored in _HHbidiag @@ -59,13 +60,13 @@ class BiDiagonalUp { /** * this method evaluates product of Householder sequence matrices (transformations) acting on columns - * + * * type - type of sequence, type = 'u' (acting on columns) or type = 'v' (acting on rows) */ template - HHsequence makeHHsequence_(const char type) const; + HHsequence makeHHsequence_(const char type); - HHsequence makeHHsequence(const char type) const; + HHsequence makeHHsequence(const char type); }; diff --git a/libnd4j/include/helpers/cpu/biDiagonalUp.cpp b/libnd4j/include/helpers/cpu/biDiagonalUp.cpp deleted file mode 100644 index 4623a93ad..000000000 --- a/libnd4j/include/helpers/cpu/biDiagonalUp.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by Yurii Shyrma on 18.12.2017 -// - - -#include -#include -#include - - -namespace sd { -namespace ops { -namespace helpers { - - -////////////////////////////////////////////////////////////////////////// -BiDiagonalUp::BiDiagonalUp(const NDArray& matrix): _HHmatrix(sd::NDArrayFactory::create(matrix.ordering(), {matrix.sizeAt(0), matrix.sizeAt(1)}, matrix.dataType(), matrix.getContext())), - _HHbidiag(sd::NDArrayFactory::create(matrix.ordering(), {matrix.sizeAt(1), matrix.sizeAt(1)}, matrix.dataType(), matrix.getContext())) { - - // input validation - if(matrix.rankOf() != 2 || matrix.isScalar()) - throw std::runtime_error("ops::helpers::biDiagonalizeUp constructor: input array must be 2D matrix !"); - - _HHmatrix.assign(&matrix); - _HHbidiag.assign(0.); - - evalData(); - -} - - template - void BiDiagonalUp::_evalData() { - - const auto rows = _HHmatrix.sizeAt(0); - const auto cols = _HHmatrix.sizeAt(1); - - if(rows < cols) - throw std::runtime_error("ops::helpers::BiDiagonalizeUp::evalData method: this procedure is applicable only for input matrix with rows >= cols !"); - - NDArray* bottomRightCorner(nullptr), *column(nullptr), *row(nullptr); - T coeff, normX; - - T _x, _y; - - for(Nd4jLong i = 0; i < cols-1; ++i ) { - - // evaluate Householder matrix nullifying columns - column = new NDArray(_HHmatrix({i,rows, i,i+1}, true)); - - _x = _HHmatrix.e(i,i); - _y = _HHbidiag.e(i,i); - - Householder::evalHHmatrixDataI(*column, _x, _y); - - _HHmatrix.p(i, i, _x); - _HHbidiag.p(i, i, _y); - - // multiply corresponding matrix block on householder matrix from the left: P * bottomRightCorner - bottomRightCorner = new NDArray(_HHmatrix({i,rows, i+1,cols}, true)); // {i, cols} - Householder::mulLeft(*bottomRightCorner, _HHmatrix({i+1,rows, i,i+1}, true), _HHmatrix.e(i,i)); - - delete bottomRightCorner; - delete column; - - if(i == cols-2) - continue; // do not apply right multiplying at last iteration - - // evaluate Householder matrix nullifying rows - row = new NDArray(_HHmatrix({i,i+1, i+1,cols}, true)); - - _x = _HHmatrix.e(i,i+1); - _y = _HHbidiag.e(i,i+1); - - Householder::evalHHmatrixDataI(*row, _x, _y); - - _HHmatrix.p(i, i+1, _x); - _HHbidiag.p(i, i+1, _y); - - // multiply corresponding matrix block on householder matrix from the right: bottomRightCorner * P - bottomRightCorner = new NDArray(_HHmatrix({i+1,rows, i+1,cols}, true)); // {i, rows} - - Householder::mulRight(*bottomRightCorner, _HHmatrix({i,i+1, i+2,cols}, true), _HHmatrix.e(i,i+1)); - - delete bottomRightCorner; - delete row; - } - - row = new NDArray(_HHmatrix({cols-2,cols-1, cols-1,cols}, true)); - - _x = _HHmatrix.e(cols-2,cols-1); - _y = _HHbidiag.e(cols-2,cols-1); - - Householder::evalHHmatrixDataI(*row, _x, _y); - - _HHmatrix.p(cols-2,cols-1, _x); - _HHbidiag.p(cols-2,cols-1, _y); - - delete row; - - column = new NDArray(_HHmatrix({cols-1,rows, cols-1,cols}, true)); - - _x = _HHmatrix.e(cols-1,cols-1); - _y = _HHbidiag.e(cols-1,cols-1); - - Householder::evalHHmatrixDataI(*column, _x, _y); - - _HHmatrix.p(cols-1, cols-1, _x); - _HHbidiag.p(cols-1, cols-1, _y); - - delete column; - } - -////////////////////////////////////////////////////////////////////////// -void BiDiagonalUp::evalData() { - auto xType = _HHmatrix.dataType(); - - BUILD_SINGLE_SELECTOR(xType, _evalData, ();, FLOAT_TYPES); -} - - -////////////////////////////////////////////////////////////////////////// -template -HHsequence BiDiagonalUp::makeHHsequence_(const char type) const { - - if(type == 'u') { - - const int diagSize = _HHbidiag.sizeAt(0); - auto colOfCoeffs = NDArrayFactory::create(_HHmatrix.ordering(), {diagSize, 1}, _HHmatrix.dataType(), _HHmatrix.getContext()); - - for(int i = 0; i < diagSize; ++i) - colOfCoeffs.p(i, _HHmatrix.e(i,i)); - - return HHsequence(_HHmatrix, colOfCoeffs, type); - } - else { - - const int diagUpSize = _HHbidiag.sizeAt(0) - 1; - NDArray colOfCoeffs = NDArrayFactory::create(_HHmatrix.ordering(), {diagUpSize, 1}, _HHmatrix.dataType(), _HHmatrix.getContext()); - - for(int i = 0; i < diagUpSize; ++i) - colOfCoeffs.p(i, _HHmatrix.e(i,i+1)); - - HHsequence result(_HHmatrix, colOfCoeffs, type); - result._diagSize = diagUpSize; - result._shift = 1; - - return result; - } -} - - HHsequence BiDiagonalUp::makeHHsequence(const char type) const { - auto xType = _HHmatrix.dataType(); - - BUILD_SINGLE_SELECTOR(xType, return makeHHsequence_, (type);, FLOAT_TYPES); - } - - - -BUILD_SINGLE_TEMPLATE(template void BiDiagonalUp::_evalData, (), FLOAT_TYPES); -BUILD_SINGLE_TEMPLATE(template HHsequence BiDiagonalUp::makeHHsequence_, (const char type) const, FLOAT_TYPES); - -} -} -} \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/hhColPivQR.cpp b/libnd4j/include/helpers/cpu/hhColPivQR.cpp deleted file mode 100644 index e118b0bf1..000000000 --- a/libnd4j/include/helpers/cpu/hhColPivQR.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by Yurii Shyrma on 11.01.2018 -// - -#include -#include -#include - -namespace sd { -namespace ops { -namespace helpers { - - -////////////////////////////////////////////////////////////////////////// -HHcolPivQR::HHcolPivQR(const NDArray& matrix) { - - _qr = matrix; - _diagSize = math::nd4j_min(matrix.sizeAt(0), matrix.sizeAt(1)); - _coeffs = NDArrayFactory::create(matrix.ordering(), {1, _diagSize}, matrix.dataType(), matrix.getContext()); - - _permut = NDArrayFactory::create(matrix.ordering(), {matrix.sizeAt(1), matrix.sizeAt(1)}, matrix.dataType(), matrix.getContext()); - - evalData(); -} - - void HHcolPivQR::evalData() { - BUILD_SINGLE_SELECTOR(_qr.dataType(), _evalData, (), FLOAT_TYPES); - } - -////////////////////////////////////////////////////////////////////////// -template -void HHcolPivQR::_evalData() { - - int rows = _qr.sizeAt(0); - int cols = _qr.sizeAt(1); - - auto transp = NDArrayFactory::create(_qr.ordering(), {1, cols}, _qr.dataType(), _qr.getContext()); - auto normsUpd = NDArrayFactory::create(_qr.ordering(), {1, cols}, _qr.dataType(), _qr.getContext()); - auto normsDir = NDArrayFactory::create(_qr.ordering(), {1, cols}, _qr.dataType(), _qr.getContext()); - - int transpNum = 0; - - for (int k = 0; k < cols; ++k) { - - T norm = _qr({0,0, k,k+1}).reduceNumber(reduce::Norm2).e(0); - normsDir.p(k, norm); - normsUpd.p(k, norm); - } - - T normScaled = (normsUpd.reduceNumber(reduce::Max)).e(0) * DataTypeUtils::eps(); - T threshold1 = normScaled * normScaled / (T)rows; - T threshold2 = math::nd4j_sqrt(DataTypeUtils::eps()); - - T nonZeroPivots = _diagSize; - T maxPivot = 0.; - - for(int k = 0; k < _diagSize; ++k) { - - int biggestColIndex = normsUpd({0,0, k,-1}).indexReduceNumber(indexreduce::IndexMax).e(0); - T biggestColNorm = normsUpd({0,0, k,-1}).reduceNumber(reduce::Max).e(0); - T biggestColSqNorm = biggestColNorm * biggestColNorm; - biggestColIndex += k; - - if(nonZeroPivots == (T)_diagSize && biggestColSqNorm < threshold1 * (T)(rows-k)) - nonZeroPivots = k; - - transp.p(k, (T)biggestColIndex); - - if(k != biggestColIndex) { - - auto temp1 = new NDArray(_qr({0,0, k,k+1}, true)); - auto temp2 = new NDArray(_qr({0,0, biggestColIndex,biggestColIndex+1}, true)); - auto temp3 = *temp1; - temp1->assign(temp2); - temp2->assign(temp3); - delete temp1; - delete temp2; - - T e0 = normsUpd.e(k); - T e1 = normsUpd.e(biggestColIndex); - normsUpd.p(k, e1); - normsUpd.p(biggestColIndex, e0); - //math::nd4j_swap(normsUpd(k), normsUpd(biggestColIndex)); - - e0 = normsDir.e(k); - e1 = normsDir.e(biggestColIndex); - normsDir.p(k, e1); - normsDir.p(biggestColIndex, e0); - //math::nd4j_swap(normsDir(k), normsDir(biggestColIndex)); - - ++transpNum; - } - - T normX; - NDArray* qrBlock = new NDArray(_qr({k,rows, k,k+1}, true)); - T c; - Householder::evalHHmatrixDataI(*qrBlock, c, normX); - _coeffs.p(k, c); - delete qrBlock; - - _qr.p(k,k, normX); - - T max = math::nd4j_abs(normX); - if(max > maxPivot) - maxPivot = max; - - if(k < rows && (k+1) < cols) { - qrBlock = new NDArray(_qr({k, rows, k+1,cols}, true)); - auto tail = new NDArray(_qr({k+1,rows, k, k+1}, true)); - Householder::mulLeft(*qrBlock, *tail, _coeffs.e(k)); - delete qrBlock; - delete tail; - } - - for (int j = k + 1; j < cols; ++j) { - - if (normsUpd.e(j) != (T)0.f) { - T temp = math::nd4j_abs(_qr.e(k, j)) / normsUpd.e(j); - temp = (1. + temp) * (1. - temp); - temp = temp < (T)0. ? (T)0. : temp; - T temp2 = temp * normsUpd.e(j) * normsUpd.e(j) / (normsDir.e(j)*normsDir.e(j)); - - if (temp2 <= threshold2) { - if(k+1 < rows && j < cols) - normsDir.p(j, _qr({k+1,rows, j,j+1}).reduceNumber(reduce::Norm2).e(0)); - - normsUpd.p(j, normsDir.e(j)); - } - else - normsUpd.p(j, normsUpd.e(j) * math::nd4j_sqrt(temp)); - } - } - } - - _permut.setIdentity(); - - for(int k = 0; k < _diagSize; ++k) { - - int idx = transp.e(k); - auto temp1 = new NDArray(_permut({0,0, k, k+1}, true)); - auto temp2 = new NDArray(_permut({0,0, idx,idx+1}, true)); - auto temp3 = *temp1; - temp1->assign(temp2); - temp2->assign(temp3); - delete temp1; - delete temp2; - } -} - - BUILD_SINGLE_TEMPLATE(template void HHcolPivQR::_evalData, (), FLOAT_TYPES); - -} -} -} - diff --git a/libnd4j/include/helpers/cpu/householder.cpp b/libnd4j/include/helpers/cpu/householder.cpp deleted file mode 100644 index 69d4ca3db..000000000 --- a/libnd4j/include/helpers/cpu/householder.cpp +++ /dev/null @@ -1,221 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by Yurii Shyrma on 18.12.2017 -// - -#include -#include - -namespace sd { -namespace ops { -namespace helpers { - - -////////////////////////////////////////////////////////////////////////// -template -NDArray Householder::evalHHmatrix(const NDArray& x) { - - // input validation - if(!x.isVector() && !x.isScalar()) - throw std::runtime_error("ops::helpers::Householder::evalHHmatrix method: input array must be vector or scalar!"); - - auto w = NDArrayFactory::create(x.ordering(), {(int)x.lengthOf(), 1}, x.dataType(), x.getContext()); // column-vector - auto wT = NDArrayFactory::create(x.ordering(), {1, (int)x.lengthOf()}, x.dataType(), x.getContext()); // row-vector (transposed w) - - T coeff; - T normX = x.reduceNumber(reduce::Norm2).e(0); - - if(normX*normX - x.e(0) * x.e(0) <= DataTypeUtils::min() || x.lengthOf() == 1) { - - normX = x.e(0); - coeff = 0.f; - w = 0.f; - - } - else { - - if(x.e(0) >= (T)0.f) - normX = -normX; // choose opposite sign to lessen roundoff error - - T u0 = x.e(0) - normX; - coeff = -u0 / normX; - w.assign(x / u0); - } - - w.p(Nd4jLong(0), 1.f); - wT.assign(&w); - - NDArray identity = NDArrayFactory::create(x.ordering(), {(int)x.lengthOf(), (int)x.lengthOf()}, x.dataType(), x.getContext()); - identity.setIdentity(); // identity matrix - - return identity - mmul(w, wT) * coeff; -} - -////////////////////////////////////////////////////////////////////////// -template -void Householder::evalHHmatrixData(const NDArray& x, NDArray& tail, T& coeff, T& normX) { - - // input validation - if(!x.isVector() && !x.isScalar()) - throw std::runtime_error("ops::helpers::Householder::evalHHmatrixData method: input array must be vector or scalar!"); - - if(!x.isScalar() && x.lengthOf() != tail.lengthOf() + 1) - throw std::runtime_error("ops::helpers::Householder::evalHHmatrixData method: input tail vector must have length less than unity compared to input x vector!"); - - normX = x.reduceNumber(reduce::Norm2, nullptr).e(0); - - if(normX*normX - x.e(0) * x.e(0) <= DataTypeUtils::min() || x.lengthOf() == 1) { - - normX = x.e(0); - coeff = (T)0.f; - tail = (T)0.f; - } - else { - - if(x.e(0) >= (T)0.f) - normX = -normX; // choose opposite sign to lessen roundoff error - - T u0 = x.e(0) - normX; - coeff = -u0 / normX; - - if(x.isRowVector()) - tail.assign(static_cast(x({0,0, 1,-1})) / u0); - else - tail.assign(static_cast(x({1,-1, 0,0,})) / u0); - } -} - -////////////////////////////////////////////////////////////////////////// -template -void Householder::evalHHmatrixDataI(const NDArray& x, T& coeff, T& normX) { - - int rows = (int)x.lengthOf()-1; - int num = 1; - - if(rows == 0) { - rows = 1; - num = 0; - } - - auto tail = NDArrayFactory::create(x.ordering(), {rows, 1}, x.dataType(), x.getContext()); - evalHHmatrixData(x, tail, coeff, normX); - - if(x.isRowVector()) { - auto temp = x({0,0, num, x.sizeAt(1)}, true); - temp.assign(tail); - } - else { - auto temp = x({num,x.sizeAt(0), 0,0}, true); - temp.assign(tail); - } -} - -////////////////////////////////////////////////////////////////////////// -template -void Householder::mulLeft(NDArray& matrix, const NDArray& tail, const T coeff) { - - // if(matrix.rankOf() != 2) - // throw "ops::helpers::Householder::mulLeft method: input array must be 2D matrix !"; - - if(matrix.sizeAt(0) == 1) { - matrix *= (T) 1.f - coeff; - } - else if(coeff != (T)0.f) { - - auto bottomPart = new NDArray(matrix({1,matrix.sizeAt(0), 0,0}, true)); - auto bottomPartCopy = *bottomPart; - - if(tail.isColumnVector()) { - - auto column = tail; - auto row = tail.transpose(); - auto resultingRow = mmul(row, bottomPartCopy); - auto fistRow = matrix({0,1, 0,0}, true); - resultingRow += fistRow; - fistRow -= resultingRow * coeff; - *bottomPart -= mmul(column, resultingRow) * coeff; - } - else { - - auto row = tail; - auto column = tail.transpose(); - auto resultingRow = mmul(row, bottomPartCopy); - auto fistRow = matrix({0,1, 0,0}, true); - resultingRow += fistRow; - fistRow -= resultingRow * coeff; - *bottomPart -= mmul(column, resultingRow) * coeff; - } - delete bottomPart; - } -} - - -////////////////////////////////////////////////////////////////////////// -template -void Householder::mulRight(NDArray& matrix, const NDArray& tail, const T coeff) { - - // if(matrix.rankOf() != 2) - // throw "ops::helpers::Householder::mulRight method: input array must be 2D matrix !"; - - if(matrix.sizeAt(1) == 1) - matrix *= (T)1.f - coeff; - - else if(coeff != (T)0.f) { - - auto rightPart = new NDArray(matrix({0,0, 1,matrix.sizeAt(1)}, true)); - auto rightPartCopy = *rightPart; - auto fistCol = new NDArray(matrix({0,0, 0,1}, true)); - - if(tail.isColumnVector()) { - - auto column = tail; - auto row = tail.transpose(); - auto resultingCol = mmul(rightPartCopy, column); - resultingCol += *fistCol; - *fistCol -= resultingCol * coeff; - *rightPart -= mmul(resultingCol, row) * coeff; - } - else { - - auto row = tail; - auto column = tail.transpose(); - auto resultingCol = mmul(rightPartCopy, column); - resultingCol += *fistCol; - *fistCol -= resultingCol * coeff; - *rightPart -= mmul(resultingCol, row) * coeff; - } - delete rightPart; - delete fistCol; - } -} - - -template class ND4J_EXPORT Householder; -template class ND4J_EXPORT Householder; -template class ND4J_EXPORT Householder; -template class ND4J_EXPORT Householder; - - - - - - - -} -} -} diff --git a/libnd4j/include/helpers/cpu/svd.cpp b/libnd4j/include/helpers/cpu/svd.cpp index 4e257b267..8a320f6de 100644 --- a/libnd4j/include/helpers/cpu/svd.cpp +++ b/libnd4j/include/helpers/cpu/svd.cpp @@ -22,7 +22,6 @@ #include #include #include -#include namespace sd { @@ -59,19 +58,19 @@ SVD::SVD(const NDArray& matrix, const int switchSize, const bool calcU, const if (_transp) math::nd4j_swap(_calcU, _calcV); - _s = NDArrayFactory::create(matrix.ordering(), {_diagSize, 1}, matrix.getContext()); - _m = NDArrayFactory::create(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.getContext()); - _m.assign(0.); + _s = NDArray(matrix.ordering(), {_diagSize, 1}, matrix.dataType(), matrix.getContext()); + _m = NDArray(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.dataType(), matrix.getContext()); + // _m.assign(0.); if (_calcU) - _u = NDArrayFactory::create(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.getContext()); + _u = NDArray(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.dataType(), matrix.getContext()); else - _u = NDArrayFactory::create(matrix.ordering(), {2, _diagSize + 1}, matrix.getContext()); - _u.assign(0.); + _u = NDArray(matrix.ordering(), {2, _diagSize + 1}, matrix.dataType(), matrix.getContext()); + // _u.assign(0.); if (_calcV) { - _v = NDArrayFactory::create(matrix.ordering(), {_diagSize, _diagSize}, matrix.getContext()); - _v.assign(0.); + _v = NDArray(matrix.ordering(), {_diagSize, _diagSize}, matrix.dataType(), matrix.getContext()); + // _v.assign(0.); } evalData(matrix); @@ -106,19 +105,19 @@ SVD::SVD(const NDArray& matrix, const int switchSize, const bool calcU, const if (_transp) math::nd4j_swap(_calcU, _calcV); - _s = NDArrayFactory::create(matrix.ordering(), {_diagSize, 1}, matrix.getContext()); - _m = NDArrayFactory::create(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.getContext()); - _m.assign(0.f); + _s = NDArray(matrix.ordering(), {_diagSize, 1}, matrix.dataType(), matrix.getContext()); + _m = NDArray(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.dataType(), matrix.getContext()); + // _m.assign(0.f); if (_calcU) - _u = NDArrayFactory::create(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.getContext()); + _u = NDArray(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.dataType(), matrix.getContext()); else - _u = NDArrayFactory::create(matrix.ordering(), {2, _diagSize + 1}, matrix.getContext()); - _u.assign(0.); + _u = NDArray(matrix.ordering(), {2, _diagSize + 1}, matrix.dataType(), matrix.getContext()); + // _u.assign(0.); if (_calcV) { - _v = NDArrayFactory::create(matrix.ordering(), {_diagSize, _diagSize}, matrix.getContext()); - _v.assign(0.); + _v = NDArray(matrix.ordering(), {_diagSize, _diagSize}, matrix.dataType(), matrix.getContext()); + // _v.assign(0.); } } @@ -131,28 +130,27 @@ void SVD::deflation1(int col1, int shift, int ind, int size) { throw std::runtime_error("ops::helpers::SVD::deflation1 method: input int must satisfy condition ind > 0 !"); int first = col1 + shift; - T cos = _m.e(first, first); - T sin = _m.e(first+ind, first); + T cos = _m.t(first, first); + T sin = _m.t(first+ind, first); T denom = math::nd4j_sqrt(cos*cos + sin*sin); if (denom == (T)0.) { - - _m.p(first+ind, first+ind, 0.f); + _m.r(first+ind, first+ind) = (T)0; return; } cos /= denom; sin /= denom; - _m.p(first,first, denom); - _m.p(first+ind, first, 0.f); - _m.p(first+ind, first+ind, 0.f); + _m.r(first,first) = denom; + _m.r(first+ind, first) = (T)0; + _m.r(first+ind, first+ind) = (T)0; - auto rotation = NDArrayFactory::create(_m.ordering(), {2, 2}, _m.getContext()); - rotation.p(0, 0, cos); - rotation.p(0, 1, -sin); - rotation.p(1, 0, sin); - rotation.p(1, 1, cos); + NDArray rotation(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext()); + + rotation.r(0,0) = rotation.r(1,1) = cos; + rotation.r(0,1) = -sin; + rotation.r(1,0) = sin; if (_calcU) { auto temp = _u({col1,col1+size+1, 0,0}, true); @@ -172,28 +170,26 @@ void SVD::deflation2(int col1U , int col1M, int row1W, int col1W, int ind1, i if(size <= 0) throw std::runtime_error("ops::helpers::SVD::deflation2 method: input size must satisfy condition size > 0 !"); - T cos = _m.e(col1M+ind1, col1M); - T sin = _m.e(col1M+ind2, col1M); + T cos = _m.t(col1M+ind1, col1M); + T sin = _m.t(col1M+ind2, col1M); T denom = math::nd4j_sqrt(cos*cos + sin*sin); if (denom == (T)0.) { - - _m.p(col1M + ind1, col1M + ind1, _m.e(col1M + ind2, col1M + ind2)); + _m.r(col1M+ind1, col1M+ind1) = _m.t(col1M+ind2, col1M+ind2); return; } cos /= denom; sin /= denom; - _m.p(col1M + ind1, col1M, denom); - _m.p(col1M + ind2, col1M + ind2, _m.e(col1M + ind1, col1M + ind1)); - _m.p(col1M + ind2, col1M, 0.f); + _m.r(col1M+ind1, col1M) = denom; + _m.r(col1M+ind2, col1M+ind2) = _m.t(col1M+ind1, col1M+ind1); + _m.r(col1M+ind2, col1M) = (T)0; - auto rotation = NDArrayFactory::create(_m.ordering(), {2, 2}, _m.getContext()); - rotation.p(0,0, cos); - rotation.p(1,1, cos); + NDArray rotation(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext()); - rotation.p(0,1, -sin); - rotation.p(1,0, sin); + rotation.r(0,0) = rotation.r(1,1) = cos; + rotation.r(0,1) = -sin; + rotation.r(1,0) = sin; if (_calcU) { auto temp = _u({col1U,col1U+size+1, 0,0}, true); @@ -216,40 +212,40 @@ void SVD::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh const int len = col2 + 1 - col1; - auto colVec0 = new NDArray(_m({col1+shift,col1+shift+len, col1+shift,col1+shift+1}, true)); + NDArray colVec0 = _m({col1+shift,col1+shift+len, col1+shift,col1+shift+1}, true); - auto diagInterval = _m({col1+shift, col1+shift+len, col1+shift,col1+shift+len}, true).diagonal('c'); + NDArray diagInterval = _m({col1+shift,col1+shift+len, col1+shift,col1+shift+len}, true).diagonal('c'); const T almostZero = DataTypeUtils::min(); T maxElem; if(len == 1) - maxElem = math::nd4j_abs(diagInterval.template e(0)); + maxElem = math::nd4j_abs(diagInterval.template t(0)); else - maxElem = diagInterval({1,-1, 0,0}, true).reduceNumber(reduce::AMax).template e(0); - T maxElem0 = colVec0->reduceNumber(reduce::AMax).template e(0); + maxElem = diagInterval({1,-1, 0,0}, true).reduceNumber(reduce::AMax).template t(0); + T maxElem0 = colVec0.reduceNumber(reduce::AMax).template t(0); T eps = math::nd4j_max(almostZero, DataTypeUtils::eps() * maxElem); T epsBig = (T)8. * DataTypeUtils::eps() * math::nd4j_max(maxElem0, maxElem); - if(diagInterval.template e(0) < epsBig) - diagInterval.p(Nd4jLong(0), epsBig); + if(diagInterval.template t(0) < epsBig) + diagInterval.r(0) = epsBig; for(int i=1; i < len; ++i) - if(math::nd4j_abs(colVec0->template e(i)) < eps) - colVec0->p(i, 0.f); + if(math::nd4j_abs(colVec0.template t(i)) < eps) + colVec0.r(i) = (T)0; for(int i=1; i < len; i++) - if(diagInterval.template e(i) < epsBig) { + if(diagInterval.template t(i) < epsBig) { deflation1(col1, shift, i, len); for(int i = 0; i < len; ++i) - diagInterval.p(i, _m.e(col1+shift+i,col1+shift+i)); + diagInterval.r(i) = _m.t(col1+shift+i,col1+shift+i); } { bool totDefl = true; for(int i=1; i < len; i++) - if(colVec0->template e(i) >= almostZero) { + if(colVec0.template t(i) >= almostZero) { totDefl = false; break; } @@ -261,7 +257,7 @@ void SVD::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh int p = 1; for(int i=1; i(diagInterval.template e(i)) < almostZero) + if(math::nd4j_abs(diagInterval.template t(i)) < almostZero) permut[p++] = i; int k = 1, m = ind+1; @@ -271,7 +267,7 @@ void SVD::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh permut[p] = m++; else if(m >= len) permut[p] = k++; - else if(diagInterval.template e(k) < diagInterval.template e(m)) + else if(diagInterval.template t(k) < diagInterval.template t(m)) permut[p] = m++; else permut[p] = k++; @@ -281,7 +277,7 @@ void SVD::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh if(totDefl) { for(int i=1; i(diagInterval.template e(ki)) < almostZero || diagInterval.template e(0) < diagInterval.template e(ki)) + if(math::nd4j_abs(diagInterval.template t(ki)) < almostZero || diagInterval.template t(0) < diagInterval.template t(ki)) permut[i-1] = permut[i]; else { permut[i-1] = 0; @@ -303,39 +299,26 @@ void SVD::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh const int ki = permut[len - (totDefl ? i+1 : i)]; const int jac = tCol[ki]; - T _e0 = diagInterval.template e(jac); - //math::nd4j_swap(diagInterval)(i), (*diagInterval)(jac)); - diagInterval.p(jac, diagInterval.template e(i)); - diagInterval.p(i, _e0); + math::nd4j_swap(diagInterval.r(i), diagInterval.r(jac)); - if(i!=0 && jac!=0) { - _e0 = colVec0->template e(jac); - //math::nd4j_swap((*colVec0)(i), (*colVec0)(jac)); - colVec0->p(jac, colVec0->template e(i)); - colVec0->p(i, _e0); - } + if(i!=0 && jac!=0) + math::nd4j_swap(colVec0.r(i), colVec0.r(jac)); if (_calcU) { - auto temp1 = _u({col1,col1+len+1, col1+i, col1+i+1}, true); - auto temp2 = _u({col1,col1+len+1, col1+jac,col1+jac+1}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); + auto temp1 = _u({col1,col1+len+1, col1+i, col1+i+1}); + auto temp2 = _u({col1,col1+len+1, col1+jac,col1+jac+1}); + temp1.swapUnsafe(temp2); } else { - auto temp1 = _u({0,2, col1+i, col1+i+1}, true); - auto temp2 = _u({0,2, col1+jac, col1+jac+1}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); + auto temp1 = _u({0,2, col1+i, col1+i+1}); + auto temp2 = _u({0,2, col1+jac, col1+jac+1}); + temp1.swapUnsafe(temp2); } if(_calcV) { - auto temp1 = _v({row1W,row1W+len, col1W+i, col1W+i+1}, true); - auto temp2 = _v({row1W,row1W+len, col1W+jac, col1W+jac+1}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); + auto temp1 = _v({row1W,row1W+len, col1W+i, col1W+i+1}); + auto temp2 = _v({row1W,row1W+len, col1W+jac, col1W+jac+1}); + temp1.swapUnsafe(temp2); } const int tI = tInd[i]; @@ -351,19 +334,17 @@ void SVD::deflation(int col1, int col2, int ind, int row1W, int col1W, int sh { int i = len-1; - while(i > 0 && (math::nd4j_abs(diagInterval.template e(i)) < almostZero || math::nd4j_abs(colVec0->template e(i)) < almostZero)) + while(i > 0 && (math::nd4j_abs(diagInterval.template t(i)) < almostZero || math::nd4j_abs(colVec0.template t(i)) < almostZero)) --i; for(; i > 1; --i) { - if( (diagInterval.template e(i) - diagInterval.template e(i-1)) < DataTypeUtils::eps()*maxElem ) { - if (math::nd4j_abs(diagInterval.template e(i) - diagInterval.template e(i-1)) >= epsBig) + if( (diagInterval.template t(i) - diagInterval.template t(i-1)) < DataTypeUtils::eps()*maxElem ) { + if (math::nd4j_abs(diagInterval.template t(i) - diagInterval.template t(i-1)) >= epsBig) throw std::runtime_error("ops::helpers::SVD::deflation: diagonal elements are not properly sorted !"); deflation2(col1, col1 + shift, row1W, col1W, i-1, i, len); } } } - - delete colVec0; } @@ -374,10 +355,10 @@ T SVD::secularEq(const T diff, const NDArray& col0, const NDArray& diag, cons auto len = permut.lengthOf(); T res = 1.; T item; - for(Nd4jLong i=0; i(i); - item = col0.e(j) / ((diagShifted.e(j) - diff) * (diag.e(j) + shift + diff)); - res += item * col0.e(j); + for(int i=0; i(i); + item = col0.t(j) / ((diagShifted.t(j) - diff) * (diag.t(j) + shift + diff)); + res += item * col0.t(j); } return res; @@ -390,34 +371,34 @@ void SVD::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArra auto len = col0.lengthOf(); auto curLen = len; - while(curLen > 1 && col0.e(curLen-1) == (T)0.f) + while(curLen > 1 && col0.t(curLen-1) == (T)0.f) --curLen; for (Nd4jLong k = 0; k < len; ++k) { - if (col0.e(k) == (T)0.f || curLen==1) { + if (col0.t(k) == (T)0.f || curLen==1) { - singVals.p(k, k==0 ? col0.e(0) : diag.e(k)); - mus.p(k, 0.f); - shifts.p(k, k==0 ? col0.e(0) : diag.e(k)); + singVals.r(k) = k==0 ? col0.t(0) : diag.t(k); + mus.r(k) = (T)0; + shifts.r(k) = k==0 ? col0.t(0) : diag.t(k); continue; } - T left = diag.e(k); + T left = diag.t(k); T right; if(k==curLen-1) - right = diag.e(curLen-1) + col0.reduceNumber(reduce::Norm2).e(0); + right = diag.t(curLen-1) + col0.reduceNumber(reduce::Norm2).t(0); else { int l = k+1; - while(col0.e(l) == (T)0.f) { + while(col0.t(l) == (T)0.f) { ++l; if(l >= curLen) throw std::runtime_error("ops::helpers::SVD::calcSingVals method: l >= curLen !"); } - right = diag.e(l); + right = diag.t(l); } T mid = left + (right - left) / (T)2.; @@ -440,7 +421,7 @@ void SVD::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArra } T fPrev = secularEq(muPrev, col0, diag, permut, diagShifted, shift); - T fCur = secularEq(muCur, col0, diag, permut, diagShifted, shift); + T fCur = secularEq(muCur, col0, diag, permut, diagShifted, shift); if (math::nd4j_abs(fPrev) < math::nd4j_abs(fCur)) { math::nd4j_swap(fPrev, fCur); @@ -464,13 +445,12 @@ void SVD::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArra if (shift == left && (muCur < (T)0. || muCur > right - left)) useBisection = true; - if (shift == right && (muCur < -(right - left) || muCur > (T)0.)) + else if (shift == right && (muCur < -(right - left) || muCur > (T)0.)) useBisection = true; - if (math::nd4j_abs(fCur) > math::nd4j_abs(fPrev) && math::nd4j_abs(fCur - fPrev) > (T)16. * DataTypeUtils::eps()) + else if (math::nd4j_abs(fCur) > math::nd4j_abs(fPrev) && math::nd4j_abs(fCur - fPrev) > (T)16. * DataTypeUtils::eps()) useBisection = true; } - if (useBisection) { T leftShifted, rightShifted; @@ -479,7 +459,6 @@ void SVD::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArra rightShifted = (k==curLen-1) ? right : ((right - left) * (T)0.6); } else { - leftShifted = -(right - left) * (T)0.6; rightShifted = -DataTypeUtils::min(); } @@ -502,14 +481,12 @@ void SVD::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArra } muCur = (leftShifted + rightShifted) / (T)2.; } - singVals.p(k, shift + muCur); - shifts.p(k, shift); - mus.p(k, muCur); + singVals.r(k) = shift + muCur; + shifts.r(k) = shift; + mus.r(k) = muCur; } - } - ////////////////////////////////////////////////////////////////////////// template void SVD::perturb(const NDArray& col0, const NDArray& diag, const NDArray& permut, const NDArray& singVals, const NDArray& shifts, const NDArray& mus, NDArray& zhat) { @@ -517,29 +494,29 @@ void SVD::perturb(const NDArray& col0, const NDArray& diag, const NDArray& pe int n = col0.lengthOf(); int m = permut.lengthOf(); if(m==0) { - zhat.assign(0.); + zhat.nullify(); return; } - int last = permut.e(m-1); + int last = permut.t(m-1); for (int k = 0; k < n; ++k) { - if (col0.e(k) == (T)0.f) - zhat.p(k, (T)0.f); + if (col0.t(k) == (T)0.f) + zhat.r(k) = (T)0; else { - T dk = diag.e(k); - T prod = (singVals.e(last) + dk) * (mus.e(last) + (shifts.e(last) - dk)); + T dk = diag.t(k); + T prod = (singVals.t(last) + dk) * (mus.t(last) + (shifts.t(last) - dk)); for(int l = 0; l(l); + int i = (int)permut.t(l); if(i!=k) { - int j = i(l-1); - prod *= ((singVals.e(j)+dk) / ((diag.e(i)+dk))) * ((mus.e(j)+(shifts.e(j)-dk)) / ((diag.e(i)-dk))); + int j = i(l-1); + prod *= ((singVals.t(j)+dk) / ((diag.t(i)+dk))) * ((mus.t(j)+(shifts.t(j)-dk)) / ((diag.t(i)-dk))); } } T tmp = math::nd4j_sqrt(prod); - zhat.p(k, col0.e(k) > (T)0.f ? tmp : -tmp); + zhat.r(k) = col0.t(k) > (T)0 ? tmp : -tmp; } } } @@ -555,48 +532,46 @@ void SVD::calcSingVecs(const NDArray& zhat, const NDArray& diag, const NDArra for (int k = 0; k < n; ++k) { - auto colU = new NDArray(U({0,0, k,k+1}, true)); - *colU = 0.; - NDArray* colV = nullptr; + NDArray colU = U({0,0, k,k+1}); + colU.nullify(); + + NDArray colV; if (_calcV) { - colV = new NDArray(V({0,0, k,k+1}, true)); - *colV = 0.; + colV = V({0,0, k,k+1}); + colV.nullify(); } - if (zhat.e(k) == (T)0.f) { - colU->p(k, 1.f); + if (zhat.t(k) == (T)0.f) { + colU.r(k) = (T)1; if (_calcV) - colV->p(k, 1.f); + colV.r(k) = (T)1; } else { for(int l = 0; l < m; ++l) { - int i = perm.e(l); - U.p(i,k, zhat.e(i)/(((diag.e(i) - shifts.e(k)) - mus.e(k)) )/( (diag.e(i) + singVals.e(k)))); + int i = (int)perm.t(l); + U.r(i,k) = zhat.t(i)/(((diag.t(i) - shifts.t(k)) - mus.t(k)) )/( (diag.t(i) + singVals.t(k))); } - U.p(n,k, 0.f); - *colU /= colU->reduceNumber(reduce::Norm2); + U.r(n,k) = (T)0; + colU /= colU.reduceNumber(reduce::Norm2); if (_calcV) { for(int l = 1; l < m; ++l){ - int i = perm.e(l); - V.p(i,k, diag.e(i) * zhat.e(i) / (((diag.e(i) - shifts.e(k)) - mus.e(k)) )/( (diag.e(i) + singVals.e(k)))); + int i = perm.t(l); + V.r(i,k) = diag.t(i) * zhat.t(i) / (((diag.t(i) - shifts.t(k)) - mus.t(k)) )/( (diag.t(i) + singVals.t(k))); } - V.p(0,k, -1.f); - *colV /= colV->reduceNumber(reduce::Norm2); + V.r(0,k) = (T)-1; + colV /= colV.reduceNumber(reduce::Norm2); } } - delete colU; - if (_calcV) - delete colV; } - auto colU = U({0,0, n,n+1}, true); - colU = 0.; - colU.p(n, 1.); + NDArray colU = U({0,0, n,n+1}); + colU.nullify(); + colU.r(n) = (T)1; } @@ -608,26 +583,29 @@ void SVD::calcBlockSVD(int col1, int size, NDArray& U, NDArray& singVals, NDA auto col0 = _m({col1, col1+size, col1, col1+1}, true); auto diag = static_cast(_m({col1, col1+size, col1, col1+size}, true).diagonal('c')); - diag.p(Nd4jLong(0), T(0)); - singVals = NDArrayFactory::create(_m.ordering(), {size, 1}, _m.getContext()); - U = NDArrayFactory::create(_u.ordering(), {size+1, size+1}, _u.getContext()); + diag.r(0) = (T)0; + singVals = NDArray(_m.ordering(), {size, 1}, _m.dataType(), _m.getContext()); + U = NDArray(_u.ordering(), {size+1, size+1}, _u.dataType(), _u.getContext()); if (_calcV) - V = NDArrayFactory::create(_v.ordering(), {size, size}, _v.getContext()); + V = NDArray(_v.ordering(), {size, size}, _v.dataType(), _v.getContext()); int curSize = size; - while(curSize > 1 && diag.template e(curSize-1) == (T)0.f) + while(curSize > 1 && diag.template t(curSize-1) == (T)0.f) --curSize; int m = 0; - std::vector indices; + std::vector indices; for(int k = 0; k < curSize; ++k) - if(math::nd4j_abs(col0.template e(k)) > almostZero) - indices.push_back((T)k); + if(math::nd4j_abs(col0.template t(k)) > almostZero) + indices.push_back(k); - auto permut = NDArrayFactory::create(_m.ordering(), {1, (int)indices.size()}, indices, _m.getContext()); - auto shifts = NDArrayFactory::create(_m.ordering(), {size, 1}, _m.getContext()); - auto mus = NDArrayFactory::create(_m.ordering(), {size, 1}, _m.getContext()); - auto zhat = NDArrayFactory::create(_m.ordering(), {size, 1}, _m.getContext()); + NDArray permut(_m.ordering(), {(int)indices.size()}, _m.dataType(), _m.getContext()); + for(int k = 0; k < indices.size(); ++k) + permut.r(k) = (T)indices[k]; + + NDArray shifts(_m.ordering(), {size, 1}, _m.dataType(), _m.getContext()); + NDArray mus(_m.ordering(), {size, 1}, _m.dataType(), _m.getContext()); + NDArray zhat(_m.ordering(), {size, 1}, _m.dataType(), _m.getContext()); calcSingVals(col0, diag, permut, singVals, shifts, mus); perturb(col0, diag, permut, singVals, shifts, mus, zhat); @@ -635,53 +613,39 @@ void SVD::calcBlockSVD(int col1, int size, NDArray& U, NDArray& singVals, NDA for(int i=0; i(i) > singVals.e(i+1)) { - T _e0 = singVals.e(i); - T _e1 = singVals.e(i+1); - //math::nd4j_swap(singVals(i),singVals(i+1)); - singVals.p(i, _e1); - singVals.p(i+1, _e0); + if(singVals.t(i) > singVals.t(i+1)) { - auto temp1 = U({0,0, i,i+1}, true); - auto temp2 = U({0,0, i+1,i+2}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); + math::nd4j_swap(singVals.r(i), singVals.r(i+1)); + + auto temp1 = U({0,0, i,i+1}); + auto temp2 = U({0,0, i+1,i+2}); + temp1.swapUnsafe(temp2); if(_calcV) { - auto temp1 = V({0,0, i,i+1}, true); - auto temp2 = V({0,0, i+1,i+2}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); + auto temp1 = V({0,0, i,i+1}); + auto temp2 = V({0,0, i+1,i+2}); + temp1.swapUnsafe(temp2); } } } - auto temp1 = singVals({0,curSize, 0,0}, true); - for (int e = 0; e < curSize / 2; ++e) { - T tmp = temp1.e(e); - temp1.p(e, temp1.e(curSize-1-e)); - temp1.p(curSize-1-e, tmp); - } + auto temp1 = singVals({0,curSize, 0,0}); + for (int e = 0; e < curSize / 2; ++e) + math::nd4j_swap(temp1.r(e), temp1.r(curSize-1-e)); auto temp2 = U({0,0, 0,curSize}, true); for(int i = 0; i < curSize/2; ++i) { - auto temp3 = temp2({0,0, i,i+1}, true); - auto temp4 = temp2({0,0, curSize-1-i,curSize-i}, true); - auto temp5 = temp3; - temp3.assign(temp4); - temp4.assign(temp5); + auto temp3 = temp2({0,0, i,i+1}); + auto temp4 = temp2({0,0, curSize-1-i,curSize-i}); + temp3.swapUnsafe(temp4); } if (_calcV) { auto temp2 = V({0,0, 0,curSize}, true); for(int i = 0; i < curSize/2; ++i) { - auto temp3 = temp2({0,0, i,i+1}, true); - auto temp4 = temp2({0,0, curSize-1-i,curSize-i}, true); - auto temp5 = temp3; - temp3.assign(temp4); - temp4.assign(temp5); + auto temp3 = temp2({0,0, i,i+1}); + auto temp4 = temp2({0,0, curSize-1-i,curSize-i}); + temp3.swapUnsafe(temp4); } } } @@ -695,54 +659,45 @@ void SVD::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif const int n = col2 - col1 + 1; const int k = n/2; const T almostZero = DataTypeUtils::min(); - T alphaK; - T betaK; - T r0; - T lambda, phi, c0, s0; - auto l = NDArrayFactory::create(_u.ordering(), {1, k}, _u.getContext()); - auto f = NDArrayFactory::create(_u.ordering(), {1, n-k-1}, _u.getContext()); + T alphaK, betaK, r0, lambda, phi, c0, s0; + + NDArray l(_u.ordering(), {1, k}, _u.dataType(), _u.getContext()); + NDArray f(_u.ordering(), {1, n-k-1}, _u.dataType(), _u.getContext()); if(n < _switchSize) { JacobiSVD jac(_m({col1,col1+n+1, col1,col1+n}, true), _calcU, _calcV, _fullUV); - if (_calcU) { - auto temp = _u({col1,col1+n+1, col1,col1+n+1}, true); - temp.assign(jac._u); - } + if (_calcU) + _u({col1,col1+n+1, col1,col1+n+1}, true).assign(jac._u); else { - auto temp1 = _u({0,1, col1,col1+n+1}, true); - temp1.assign(jac._u({0,1, 0,0}, true)); - auto temp2 = _u({1,2, col1,col1+n+1}, true); - temp2.assign(jac._u({n,n+1, 0,0}, true)); + _u({0,1, col1,col1+n+1}, true).assign(jac._u({0,1, 0,0}, true)); + _u({1,2, col1,col1+n+1}, true).assign(jac._u({n,n+1, 0,0}, true)); } - if (_calcV) { - auto temp = _v({row1W,row1W+n, col1W,col1W+n}, true); - temp.assign(jac._v); - } + if (_calcV) + _v({row1W,row1W+n, col1W,col1W+n}, true).assign(jac._v); - auto temp = _m({col1+shift,col1+shift+n+1, col1+shift,col1+shift+n}, true); - temp.assign(0.); + _m({col1+shift,col1+shift+n+1, col1+shift,col1+shift+n}, true).nullify(); auto diag = _m.diagonal('c'); diag({col1+shift, col1+shift+n, 0,0}, true).assign(jac._s({0,n, 0,0}, true)); return; } - alphaK = _m.e(col1 + k, col1 + k); - betaK = _m.e(col1 + k + 1, col1 + k); + alphaK = _m.t(col1 + k, col1 + k); + betaK = _m.t(col1 + k + 1, col1 + k); DivideAndConquer(k + 1 + col1, col2, k + 1 + row1W, k + 1 + col1W, shift); DivideAndConquer(col1, k - 1 + col1, row1W, col1W + 1, shift + 1); if (_calcU) { - lambda = _u.e(col1 + k, col1 + k); - phi = _u.e(col1 + k + 1, col2 + 1); + lambda = _u.t(col1 + k, col1 + k); + phi = _u.t(col1 + k + 1, col2 + 1); } else { - lambda = _u.e(1, col1 + k); - phi = _u.e(0, col2 + 1); + lambda = _u.t(1, col1 + k); + phi = _u.t(0, col2 + 1); } r0 = math::nd4j_sqrt((math::nd4j_abs(alphaK * lambda) * math::nd4j_abs(alphaK * lambda)) + math::nd4j_abs(betaK * phi) * math::nd4j_abs(betaK * phi)); @@ -757,7 +712,7 @@ void SVD::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif } if (_calcV) - _v.p(row1W+k, col1W, 1.f); + _v.r(row1W+k, col1W) = (T)1; if (r0 < almostZero){ c0 = 1.; @@ -770,39 +725,37 @@ void SVD::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif if (_calcU) { - auto temp = _u({col1,col1+k+1, col1+k,col1+k+1}, true); - NDArray q1(temp); + NDArray q1 = _u({col1,col1+k+1, col1+k,col1+k+1}, true).dup(); - for (int i = col1 + k - 1; i >= col1; --i) { - auto temp = _u({col1,col1+k+1, i+1,i+2}, true); - temp.assign(_u({col1, col1+k+1, i, i+1}, true)); - } + for (int i = col1 + k - 1; i >= col1; --i) + _u({col1,col1+k+1, i+1,i+2}, true).assign(_u({col1,col1+k+1, i,i+1}, true)); + + NDArray temp1 = _u({col1+k+1,col1+n+1, col2+1,col2+2}, true); _u({col1,col1+k+1, col1,col1+1}, true).assign(q1 * c0); _u({col1,col1+k+1, col2+1,col2+2}, true).assign(q1 * (-s0)); - _u({col1+k+1,col1+n+1, col1, col1+1}, true).assign(static_cast(_u({col1+k+1, col1+n+1, col2+1, col2+2}, true)) * s0); - _u({col1+k+1,col1+n+1, col2+1,col2+2}, true) *= c0; + _u({col1+k+1,col1+n+1, col1,col1+1}, true).assign(temp1 * s0); + temp1 *= c0; } else { - T q1 = _u.e(0, col1 + k); + T q1 = _u.t(0, col1 + k); for (int i = col1 + k - 1; i >= col1; --i) - _u.p(0, i+1, _u.e(0, i)); + _u.r(0, i+1) = _u.r(0, i); - _u.p(0, col1, q1 * c0); - _u.p(0, col2+1, -q1*s0); - _u.p(1, col1, _u.e(1, col2+1) * s0); - _u.p(1, col2 + 1, _u.e(1, col2 + 1) * c0); - _u({1,2, col1+1, col1+k+1}, true) = 0.f; - _u({0,1, col1+k+1, col1+n}, true) = 0.f; + _u.r(0, col1) = q1 * c0; + _u.r(0, col2+1) = -q1*s0; + _u.r(1, col1) = _u.t(1, col2+1) * s0; + _u.r(1, col2+1) = _u.t(1, col2+1) * c0; + _u({1,2, col1+1, col1+k+1}).nullify(); + _u({0,1, col1+k+1, col1+n}).nullify(); } - _m.p(col1 + shift, col1 + shift, r0); - auto temp1 = _m({col1+shift+1,col1+shift+k+1, col1+shift,col1+shift+1}, true); - temp1.assign(l*alphaK); - auto temp2 = _m({col1+shift+k+1,col1+shift+n, col1+shift,col1+shift+1}, true); - temp2.assign(f*betaK); + _m.r(col1+shift, col1+shift) = r0; + + _m({col1+shift+1,col1+shift+k+1, col1+shift,col1+shift+1}, true).assign(l*alphaK); + _m({col1+shift+k+1,col1+shift+n, col1+shift,col1+shift+1}, true).assign(f*betaK); deflation(col1, col2, k, row1W, col1W, shift); @@ -810,26 +763,22 @@ void SVD::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shif calcBlockSVD(col1 + shift, n, UofSVD, singVals, VofSVD); if(_calcU) { - auto pTemp = _u({col1, col1+n+1, col1,col1+n+1}, true); - auto temp = pTemp; - pTemp.assign(mmul(temp, UofSVD)); + auto temp = _u({col1, col1+n+1, col1,col1+n+1}, true); + temp.assign(mmul(temp, UofSVD)); } else { - auto pTemp = _u({0,0, col1,col1+n+1}, true); - auto temp = pTemp; - pTemp.assign(mmul(temp, UofSVD)); + auto temp = _u({0,0, col1,col1+n+1}, true); + temp.assign(mmul(temp, UofSVD)); } if (_calcV) { - auto pTemp = _v({row1W,row1W+n, row1W,row1W+n}, true); - auto temp = pTemp; - pTemp.assign(mmul(temp, VofSVD)); + auto temp = _v({row1W,row1W+n, row1W,row1W+n}, true); + temp.assign(mmul(temp, VofSVD)); } auto blockM = _m({col1+shift,col1+shift+n, col1+shift,col1+shift+n}, true); - blockM = 0.f; - auto diag = blockM.diagonal('c'); - diag.assign(singVals); + blockM.nullify(); + blockM.diagonal('c').assign(singVals); } ////////////////////////////////////////////////////////////////////////// @@ -839,24 +788,22 @@ void SVD::exchangeUV(const HHsequence& hhU, const HHsequence& hhV, const NDAr if (_calcU) { int colsU = _fullUV ? hhU.rows() : _diagSize; - auto temp1 = NDArrayFactory::create(_u.ordering(), {hhU.rows(), colsU}, _u.getContext()); + NDArray temp1(_u.ordering(), {hhU.rows(), colsU}, _u.dataType(), _u.getContext()); temp1.setIdentity(); _u = temp1; - auto temp2 = _u({0,_diagSize, 0,_diagSize}, true); - temp2.assign(V({0,_diagSize, 0,_diagSize}, true)); + _u({0,_diagSize, 0,_diagSize}, true).assign(V({0,_diagSize, 0,_diagSize}, true)); const_cast(hhU).mulLeft(_u); } if (_calcV) { int colsV = _fullUV ? hhV.rows() : _diagSize; - auto temp1 = NDArrayFactory::create(_v.ordering(), {hhV.rows(), colsV}, _v.getContext()); + NDArray temp1(_v.ordering(), {hhV.rows(), colsV}, _v.dataType(), _v.getContext()); temp1.setIdentity(); _v = temp1; - auto temp2 = _v({0,_diagSize, 0,_diagSize}, true); - temp2.assign(U({0,_diagSize, 0,_diagSize}, true)); + _v({0,_diagSize, 0,_diagSize}, true).assign(U({0,_diagSize, 0,_diagSize}, true)); const_cast(hhV).mulLeft(_v); } } @@ -881,48 +828,40 @@ void SVD::evalData(const NDArray& matrix) { return; } - T scale = matrix.reduceNumber(reduce::AMax).e(0); + T scale = matrix.reduceNumber(reduce::AMax).t(0); if(scale == (T)0.) scale = 1.; - NDArray copy; - if(_transp) - copy = matrix.transpose(); - else - copy = matrix / scale; + BiDiagonalUp biDiag(_transp ? matrix.transpose() : matrix / scale); - BiDiagonalUp biDiag(copy); + _u.nullify(); + _v.nullify(); - _u = 0.; - _v = 0.; + _m({0,_diagSize, 0,0}, true).assign(biDiag._HHbidiag.transpose()); - auto temp1 = biDiag._HHbidiag.transpose(); - auto temp2 = _m({0,_diagSize, 0,0}, true); - temp2.assign(temp1); - - - auto temp3 = _m({_m.sizeAt(0)-1,_m.sizeAt(0), 0,0}, true); - temp3.assign(0.); + _m({_m.sizeAt(0)-1,_m.sizeAt(0), 0,0}).nullify(); DivideAndConquer(0, _diagSize - 1, 0, 0, 0); for (int i = 0; i < _diagSize; ++i) { - T a = math::nd4j_abs(_m.e(i, i)); - _s.p(i, a * scale); + T a = math::nd4j_abs(_m.t(i, i)); + _s.r(i) = a * scale; if (a < almostZero) { - auto temp = _s({i+1,_diagSize, 0,0}, true); - temp.assign(0.); + _s({i+1,_diagSize, 0,0}).nullify(); break; } else if (i == _diagSize-1) break; } + HHsequence hhV = biDiag.makeHHsequence('v'); + HHsequence hhU = biDiag.makeHHsequence('u'); + if(_transp) - exchangeUV(biDiag.makeHHsequence('v'), biDiag.makeHHsequence('u'), _v, _u); + exchangeUV(hhV, hhU, _v, _u); else - exchangeUV(biDiag.makeHHsequence('u'), biDiag.makeHHsequence('v'), _u, _v); + exchangeUV(hhU, hhV, _u, _v); } diff --git a/libnd4j/include/helpers/hhSequence.h b/libnd4j/include/helpers/hhSequence.h index 31855a86c..1e1f8ecad 100644 --- a/libnd4j/include/helpers/hhSequence.h +++ b/libnd4j/include/helpers/hhSequence.h @@ -27,35 +27,35 @@ namespace sd { namespace ops { namespace helpers { - + class HHsequence { public: - + /* * matrix containing the Householder vectors */ - NDArray _vectors; + const NDArray& _vectors; /* * vector containing the Householder coefficients */ - NDArray _coeffs; - + const NDArray& _coeffs; + /* - * shift of the Householder sequence + * shift of the Householder sequence */ int _shift; /* * length of the Householder sequence */ - int _diagSize; + int _diagSize; - /* + /* * type of sequence, type = 'u' (acting on columns, left) or type = 'v' (acting on rows, right) */ - char _type; + char _type; /* * constructor @@ -64,18 +64,18 @@ class HHsequence { /** * this method mathematically multiplies input matrix on Householder sequence from the left H0*H1*...Hn * matrix - * + * * matrix - input matrix to be multiplied */ template - void _mulLeft(NDArray& matrix); + void mulLeft_(NDArray& matrix); void mulLeft(NDArray& matrix); NDArray getTail(const int idx) const; template - void _applyTo(NDArray& dest); + void applyTo_(NDArray& dest); void applyTo(NDArray& dest); @@ -87,8 +87,8 @@ class HHsequence { ////////////////////////////////////////////////////////////////////////// FORCEINLINE int HHsequence::rows() const { - return _type == 'u' ? _vectors.sizeAt(0) : _vectors.sizeAt(1); -} + return _type == 'u' ? _vectors.sizeAt(0) : _vectors.sizeAt(1); +} diff --git a/libnd4j/include/helpers/householder.h b/libnd4j/include/helpers/householder.h index e71769901..7811fafa0 100644 --- a/libnd4j/include/helpers/householder.h +++ b/libnd4j/include/helpers/householder.h @@ -32,74 +32,74 @@ template class Householder { public: - + /** * this method calculates Householder matrix P = identity_matrix - coeff * w * w^T * P * x = [normX, 0, 0 , 0, ...] - * coeff - scalar + * coeff - scalar * w = [1, w1, w2, w3, ...] * w = u / u0 * u = x - |x|*e0 - * u0 = x0 - |x| + * u0 = x0 - |x| * e0 = [1, 0, 0 , 0, ...] - * + * * x - input vector, remains unaffected - */ - static NDArray evalHHmatrix(const NDArray& x); + */ + // static NDArray evalHHmatrix(const NDArray& x); /** * this method evaluates data required for calculation of Householder matrix P = identity_matrix - coeff * w * w^T * P * x = [normX, 0, 0 , 0, ...] - * coeff - scalar + * coeff - scalar * w = [1, w1, w2, w3, ...] * w = u / u0 * u = x - |x|*e0 - * u0 = x0 - |x| + * u0 = x0 - |x| * e0 = [1, 0, 0 , 0, ...] - * + * * x - input vector, remains unaffected * tail - the essential part of the vector w: [w1, w2, w3, ...] * normX - this scalar is the first non-zero element in vector resulting from Householder transformation -> (P*x) - * coeff - scalar, scaling factor in Householder matrix formula + * coeff - scalar, scaling factor in Householder matrix formula */ static void evalHHmatrixData(const NDArray& x, NDArray& tail, T& coeff, T& normX); - static void evalHHmatrixDataI(const NDArray& x, T& coeff, T& normX); + static void evalHHmatrixDataI(NDArray& x, T& coeff, T& normX); // in-place, x to be affected /** * this method mathematically multiplies input matrix on Householder from the left P * matrix - * + * * matrix - input matrix * tail - the essential part of the Householder vector w: [w1, w2, w3, ...] - * coeff - scalar, scaling factor in Householder matrix formula + * coeff - scalar, scaling factor in Householder matrix formula */ static void mulLeft(NDArray& matrix, const NDArray& tail, const T coeff); /** * this method mathematically multiplies input matrix on Householder from the right matrix * P - * + * * matrix - input matrix * tail - the essential part of the Householder vector w: [w1, w2, w3, ...] - * coeff - scalar, scaling factor in Householder matrix formula - */ + * coeff - scalar, scaling factor in Householder matrix formula + */ static void mulRight(NDArray& matrix, const NDArray& tail, const T coeff); - + }; - + // /** // * this function reduce given matrix to upper bidiagonal form (in-place operation), matrix must satisfy following condition rows >= cols - // * - // * matrix - input 2D matrix to be reduced to upper bidiagonal from + // * + // * matrix - input 2D matrix to be reduced to upper bidiagonal from // */ // template // void biDiagonalizeUp(NDArray& matrix); - // /** + // /** // * given a matrix [m,n], this function computes its singular value decomposition matrix = u * s * v^T - // * + // * // * matrix - input 2D matrix to decompose, [m, n] // * u - unitary matrix containing left singular vectors of input matrix, [m, m] // * s - diagonal matrix with singular values of input matrix (non-negative) on the diagonal sorted in decreasing order, @@ -109,7 +109,7 @@ class Householder { // * fullUV - if false then only p (p is smaller among m and n) first columns of u and v will be calculated and their dimensions in this case are [m, p] and [n, p] // * // */ - // void svd(const NDArray& matrix, NDArray& u, NDArray& s, NDArray& v, const bool calcUV = false, const bool fullUV = false) + // void svd(const NDArray& matrix, NDArray& u, NDArray& s, NDArray& v, const bool calcUV = false, const bool fullUV = false) diff --git a/libnd4j/include/helpers/impl/EigenValsAndVecs.cpp b/libnd4j/include/helpers/impl/EigenValsAndVecs.cpp new file mode 100644 index 000000000..6eeb0c28b --- /dev/null +++ b/libnd4j/include/helpers/impl/EigenValsAndVecs.cpp @@ -0,0 +1,293 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include +#include + + +namespace sd { +namespace ops { +namespace helpers { + +////////////////////////////////////////////////////////////////////////// +template +EigenValsAndVecs::EigenValsAndVecs(const NDArray& matrix) { + + if(matrix.rankOf() != 2) + throw std::runtime_error("ops::helpers::EigenValsAndVecs constructor: input matrix must be 2D !"); + + if(matrix.sizeAt(0) != matrix.sizeAt(1)) + throw std::runtime_error("ops::helpers::EigenValsAndVecs constructor: input array must be 2D square matrix !"); + + Schur schur(matrix); + + NDArray& schurMatrixU = schur._U; + NDArray& schurMatrixT = schur._T; + + _Vecs = NDArray(matrix.ordering(), {schurMatrixU.sizeAt(1), schurMatrixU.sizeAt(1), 2}, matrix.dataType(), matrix.getContext()); + _Vals = NDArray(matrix.ordering(), {matrix.sizeAt(1), 2}, matrix.dataType(), matrix.getContext()); + + // sequence of methods calls matters + calcEigenVals(schurMatrixT); + calcPseudoEigenVecs(schurMatrixT, schurMatrixU); // pseudo-eigenvectors are real and will be stored in schurMatrixU + calcEigenVecs(schurMatrixU); +} + +////////////////////////////////////////////////////////////////////////// +template +void EigenValsAndVecs::calcEigenVals(const NDArray& schurMatrixT) { + + const int numOfCols = schurMatrixT.sizeAt(1); + + // calculate eigenvalues _Vals + int i = 0; + while (i < numOfCols) { + + if (i == numOfCols - 1 || schurMatrixT.t(i+1, i) == T(0.f)) { + + _Vals.r(i, 0) = schurMatrixT.t(i, i); // real part + _Vals.r(i, 1) = T(0); // imaginary part + + if(!math::nd4j_isfin(_Vals.t(i, 0))) { + throw std::runtime_error("ops::helpers::igenValsAndVec::calcEigenVals: got infinite eigen value !"); + return; + } + + ++i; + } + else { + + T p = T(0.5) * (schurMatrixT.t(i, i) - schurMatrixT.t(i+1, i+1)); + T z; + { + T t0 = schurMatrixT.t(i+1, i); + T t1 = schurMatrixT.t(i, i+1); + T maxval = math::nd4j_max(math::nd4j_abs(p), math::nd4j_max(math::nd4j_abs(t0), math::nd4j_abs(t1))); + t0 /= maxval; + t1 /= maxval; + T p0 = p / maxval; + z = maxval * math::nd4j_sqrt(math::nd4j_abs(p0 * p0 + t0 * t1)); + } + + _Vals.r(i, 0) = _Vals.r(i+1, 0) = schurMatrixT.t(i+1, i+1) + p; + _Vals.r(i, 1) = z; + _Vals.r(i+1,1) = -z; + + if(!(math::nd4j_isfin(_Vals.t(i,0)) && math::nd4j_isfin(_Vals.t(i+1,0)) && math::nd4j_isfin(_Vals.t(i,1))) && math::nd4j_isfin(_Vals.t(i+1,1))) { + throw std::runtime_error("ops::helpers::igenValsAndVec::calcEigenVals: got infinite eigen value !"); + return; + } + + i += 2; + } + } +} + +////////////////////////////////////////////////////////////////////////// +template +void EigenValsAndVecs::calcPseudoEigenVecs(NDArray& schurMatrixT, NDArray& schurMatrixU) { + + const int numOfCols = schurMatrixU.sizeAt(1); + + T norm = 0; + for (int j = 0; j < numOfCols; ++j) + norm += schurMatrixT({j,j+1, math::nd4j_max(j-1, 0),numOfCols}).reduceNumber(reduce::ASum).template t(0); + + if (norm == T(0)) + return; + + for (int n = numOfCols-1; n >= 0; n--) { + + T p = _Vals.t(n, 0); // real part + T q = _Vals.t(n, 1); // imaginary part + + if(q == (T)0) { // not complex + + T lastr((T)0), lastw((T)0); + int l = n; + + schurMatrixT.r(n, n) = T(1); + + for (int i = n-1; i >= 0; i--) { + + T w = schurMatrixT.t(i,i) - p; + T r = mmul(schurMatrixT({i,i+1, l,n+1}, true), schurMatrixT({l,n+1, n,n+1}, true)).template t(0); // dot + + if (_Vals.t(i, 1) < T(0)) { + lastw = w; + lastr = r; + } + else { + + l = i; + if (_Vals.t(i, 1) == T(0)) { + + if (w != T(0)) + schurMatrixT.r(i, n) = -r / w; + else + schurMatrixT.r(i, n) = -r / (DataTypeUtils::eps() * norm); + } + else { + + T x = schurMatrixT.t(i, i+1); + T y = schurMatrixT.t(i+1, i); + T denom = (_Vals.t(i, 0) - p) * (_Vals.t(i, 0) - p) + _Vals.t(i, 1) * _Vals.t(i, 1); + T t = (x * lastr - lastw * r) / denom; + schurMatrixT.r(i, n) = t; + + if (math::nd4j_abs(x) > math::nd4j_abs(lastw)) + schurMatrixT.r(i+1, n) = (-r - w * t) / x; + else + schurMatrixT.r(i+1, n) = (-lastr - y * t) / lastw; + } + + + T t = math::nd4j_abs(schurMatrixT.t(i, n)); + if((DataTypeUtils::eps() * t) * t > T(1)) + schurMatrixT({schurMatrixT.sizeAt(0)-numOfCols+i,-1, n,n+1}) /= t; + } + } + } + else if(q < T(0) && n > 0) { // complex + + T lastra(0), lastsa(0), lastw(0); + int l = n - 1; + + if(math::nd4j_abs(schurMatrixT.t(n, n-1)) > math::nd4j_abs(schurMatrixT.t(n-1, n))) { + + schurMatrixT.r(n-1, n-1) = q / schurMatrixT.t(n, n-1); + schurMatrixT.r(n-1, n) = -(schurMatrixT.t(n, n) - p) / schurMatrixT.t(n, n-1); + } + else { + divideComplexNums(T(0),-schurMatrixT.t(n-1,n), schurMatrixT.t(n-1,n-1)-p,q, schurMatrixT.r(n-1,n-1),schurMatrixT.r(n-1,n)); + } + + schurMatrixT.r(n,n-1) = T(0); + schurMatrixT.r(n,n) = T(1); + + for (int i = n-2; i >= 0; i--) { + + T ra = mmul(schurMatrixT({i,i+1, l,n+1}, true), schurMatrixT({l,n+1, n-1,n}, true)).template t(0); // dot + T sa = mmul(schurMatrixT({i,i+1, l,n+1}, true), schurMatrixT({l,n+1, n,n+1}, true)).template t(0); // dot + + T w = schurMatrixT.t(i,i) - p; + + if (_Vals.t(i, 1) < T(0)) { + lastw = w; + lastra = ra; + lastsa = sa; + } + else { + + l = i; + + if (_Vals.t(i, 1) == T(0)) { + divideComplexNums(-ra,-sa, w,q, schurMatrixT.r(i,n-1),schurMatrixT.r(i,n)); + } + else { + + T x = schurMatrixT.t(i,i+1); + T y = schurMatrixT.t(i+1,i); + T vr = (_Vals.t(i, 0) - p) * (_Vals.t(i, 0) - p) + _Vals.t(i, 1) * _Vals.t(i, 1) - q * q; + T vi = (_Vals.t(i, 0) - p) * T(2) * q; + + if ((vr == T(0)) && (vi == T(0))) + vr = DataTypeUtils::eps() * norm * (math::nd4j_abs(w) + math::nd4j_abs(q) + math::nd4j_abs(x) + math::nd4j_abs(y) + math::nd4j_abs(lastw)); + + divideComplexNums(x*lastra-lastw*ra+q*sa,x*lastsa-lastw*sa-q*ra, vr,vi, schurMatrixT.r(i,n-1),schurMatrixT.r(i,n)); + + if(math::nd4j_abs(x) > (math::nd4j_abs(lastw) + math::nd4j_abs(q))) { + + schurMatrixT.r(i+1,n-1) = (-ra - w * schurMatrixT.t(i,n-1) + q * schurMatrixT.t(i,n)) / x; + schurMatrixT.r(i+1,n) = (-sa - w * schurMatrixT.t(i,n) - q * schurMatrixT.t(i,n-1)) / x; + } + else + divideComplexNums(-lastra-y*schurMatrixT.t(i,n-1),-lastsa-y*schurMatrixT.t(i,n), lastw,q, schurMatrixT.r(i+1,n-1),schurMatrixT.r(i+1,n)); + } + + T t = math::nd4j_max(math::nd4j_abs(schurMatrixT.t(i, n-1)), math::nd4j_abs(schurMatrixT.t(i,n))); + if ((DataTypeUtils::eps() * t) * t > T(1)) + schurMatrixT({i,numOfCols, n-1,n+1}) /= t; + } + } + n--; + } + else + throw std::runtime_error("ops::helpers::EigenValsAndVecs::calcEigenVecs: internal bug !"); + } + + for (int j = numOfCols-1; j >= 0; j--) + schurMatrixU({0,0, j,j+1}, true).assign( mmul(schurMatrixU({0,0, 0,j+1}, true), schurMatrixT({0,j+1, j,j+1}, true)) ); +} + + +////////////////////////////////////////////////////////////////////////// +template +void EigenValsAndVecs::calcEigenVecs(const NDArray& schurMatrixU) { + + const T precision = T(2) * DataTypeUtils::eps(); + + const int numOfCols = schurMatrixU.sizeAt(1); + + for (int j = 0; j < numOfCols; ++j) { + + if(math::nd4j_abs(_Vals.t(j, 1)) <= math::nd4j_abs(_Vals.t(j, 0)) * precision || j+1 == numOfCols) { // real + + _Vecs.syncToDevice(); + _Vecs({0,0, j,j+1, 0,1}).assign(schurMatrixU({0,0, j,j+1})); + _Vecs({0,0, j,j+1, 1,2}) = (T)0; + + // normalize + const T norm2 = _Vecs({0,0, j,j+1, 0,1}).reduceNumber(reduce::SquaredNorm).template t(0); + if(norm2 > (T)0) + _Vecs({0,0, j,j+1, 0,1}) /= math::nd4j_sqrt(norm2); + } + else { // complex + + for (int i = 0; i < numOfCols; ++i) { + _Vecs.r(i, j, 0) = _Vecs.r(i, j+1, 0) = schurMatrixU.t(i, j); + _Vecs.r(i, j, 1) = schurMatrixU.t(i, j+1); + _Vecs.r(i, j+1, 1) = -schurMatrixU.t(i, j+1); + } + + // normalize + T norm2 = _Vecs({0,0, j,j+1, 0,0}).reduceNumber(reduce::SquaredNorm).template t(0); + if(norm2 > (T)0) + _Vecs({0,0, j,j+1, 0,0}) /= math::nd4j_sqrt(norm2); + + // normalize + norm2 = _Vecs({0,0, j+1,j+2, 0,0}).reduceNumber(reduce::SquaredNorm).template t(0); + if(norm2 > (T)0) + _Vecs({0,0, j+1,j+2, 0,0}) /= math::nd4j_sqrt(norm2); + + ++j; + } + } +} + + +template class ND4J_EXPORT EigenValsAndVecs; +template class ND4J_EXPORT EigenValsAndVecs; +template class ND4J_EXPORT EigenValsAndVecs; +template class ND4J_EXPORT EigenValsAndVecs; + +} +} +} \ No newline at end of file diff --git a/libnd4j/include/helpers/impl/FullPivLU.cpp b/libnd4j/include/helpers/impl/FullPivLU.cpp new file mode 100644 index 000000000..efb7571ed --- /dev/null +++ b/libnd4j/include/helpers/impl/FullPivLU.cpp @@ -0,0 +1,170 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include +#include +#include + + +namespace sd { +namespace ops { +namespace helpers { + + +////////////////////////////////////////////////////////////////////////// +// A{M,K} * x{K,N} = b{M,N} +template +void FullPivLU::solve(const NDArray& A, const NDArray& b, NDArray& x) { + + if(A.rankOf() != 2) + throw std::runtime_error("FullPivLU::solve: input matrix A must be 2D !"); + + if(A.sizeAt(0) != b.sizeAt(0)) + throw std::runtime_error("FullPivLU::solve: A and b must have the same number of rows !"); + + if(A.sizeAt(1) != x.sizeAt(0)) + throw std::runtime_error("FullPivLU::solve: number of A columns must be equal to number of x rows !"); + + NDArray LU = A.dup(); + + const int rows = LU.sizeAt(0); + const int cols = LU.sizeAt(1); + const int diagLen = math::nd4j_min(rows, cols); + + std::vector rowsInds(rows), colsInds(cols); + + int numOfTranspos = 0; + int nonZeroPivots1 = diagLen; + + T maxPivot = T(0); + + for(int k = 0; k < diagLen; ++k) { + + NDArray bottomRightCorner = LU({k,rows, k,cols}, true); + const int indPivot = static_cast(bottomRightCorner.indexReduceNumber(indexreduce::IndexAbsoluteMax).t(0)); + + int colPivot = indPivot % (cols-k); + int rowPivot = indPivot / (cols-k); + + T currentMax = math::nd4j_abs(bottomRightCorner.t(rowPivot, colPivot)); + + // take into account that this was calculated in corner, not in whole LU + rowPivot += k; + colPivot += k; + + if(currentMax == T(0)) { + + nonZeroPivots1 = k; + + for(int i = k; i < diagLen; ++i) + rowsInds[i] = colsInds[i] = i; + + break; + } + + if(currentMax > maxPivot) + maxPivot = currentMax; + + rowsInds[k] = rowPivot; + colsInds[k] = colPivot; + + if(k != rowPivot) { + NDArray row1 = LU({k,k+1, 0,0}, true); + NDArray row2 = LU({rowPivot,rowPivot+1, 0,0}, true); + row1.swapUnsafe(row2); + ++numOfTranspos; + } + if(k != colPivot) { + NDArray col1 = LU({0,0, k,k+1}, true); + NDArray col2 = LU({0,0, colPivot,colPivot+1}, true); + col1.swapUnsafe(col2); + ++numOfTranspos; + } + + if(k < rows-1) + LU({k+1,rows, k,k+1}, true) /= LU.t(k, k); + + if(k < diagLen-1) + LU({k+1,rows, k+1,cols},true) -= mmul(LU({k+1,rows, k,k+1},true), LU({k,k+1, k+1,cols},true)); + } + + //***************************************************// + + const T threshold = maxPivot * DataTypeUtils::eps() * (T)diagLen; + + int nonZeroPivots2 = 0; + for(int i = 0; i < nonZeroPivots1; ++i) + nonZeroPivots2 += static_cast(math::nd4j_abs(LU.t(i,i)) > threshold); + + if(nonZeroPivots2 == 0) { + x.nullify(); + return; + } + + //***************************************************// + + std::vector rowsPermut1(rows), rowsPermut2(rows), colsPermut(cols); + std::iota(rowsPermut1.begin(), rowsPermut1.end(), 0); + std::iota(colsPermut.begin(), colsPermut.end(), 0); + + for(int k = diagLen-1; k >= 0; --k) + math::nd4j_swap(rowsPermut1[k], rowsPermut1[rowsInds[k]]); + + for(int k = 0; k < diagLen; ++k) + math::nd4j_swap(colsPermut[k], colsPermut[colsInds[k]]); + + for(int i = 0; i < rows; ++i) + for(int j = 0; j < rows; ++j) + if(i == rowsPermut1[j]) { rowsPermut2[i] = j; break; } + + //***************************************************// + + NDArray c = b.ulike(); + + for (int i = 0; i < rows; ++i) + c({i,i+1, 0,0}, true).assign(b({rowsPermut2[i],rowsPermut2[i]+1, 0,0}, true)); + + + NDArray cTopRows1 = c({0,diagLen, 0,0}, true); + // TriangularSolver::solve(LU({0,diagLen, 0,diagLen}, true), cTopRows1, true, true, cTopRows1); + ops::helpers::triangularSolve2D(nullptr, LU({0,diagLen, 0,diagLen}, true), cTopRows1,true,true, cTopRows1); + + if(rows > cols) + c({cols,-1, 0,0}, true) -= mmul(LU({cols,-1, 0,0},true), c({0,cols, 0,0}, true)); + + NDArray cTopRows2 = c({0,nonZeroPivots2, 0,0}, true); + // TriangularSolver::solve(LU({0,nonZeroPivots2, 0,nonZeroPivots2}, true), cTopRows2, false, false, cTopRows2); + ops::helpers::triangularSolve2D(nullptr, LU({0,nonZeroPivots2, 0,nonZeroPivots2}, true),cTopRows2,false,false, cTopRows2); + + for(int i = 0; i < nonZeroPivots2; ++i) + x({colsPermut[i],colsPermut[i]+1, 0,0}, true).assign(c({i,i+1, 0,0}, true)); + + for(int i = nonZeroPivots2; i < cols; ++i) + x({colsPermut[i],colsPermut[i]+1, 0,0}, true).nullify(); +} + +template class ND4J_EXPORT FullPivLU; +template class ND4J_EXPORT FullPivLU; +template class ND4J_EXPORT FullPivLU; +template class ND4J_EXPORT FullPivLU; + +} +} +} diff --git a/libnd4j/include/helpers/impl/HessenbergAndSchur.cpp b/libnd4j/include/helpers/impl/HessenbergAndSchur.cpp new file mode 100644 index 000000000..31495cab9 --- /dev/null +++ b/libnd4j/include/helpers/impl/HessenbergAndSchur.cpp @@ -0,0 +1,383 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include +#include +#include +#include + + +namespace sd { +namespace ops { +namespace helpers { + + +////////////////////////////////////////////////////////////////////////// +template +Hessenberg::Hessenberg(const NDArray& matrix) { + + if(matrix.rankOf() != 2) + throw std::runtime_error("ops::helpers::Hessenberg constructor: input matrix must be 2D !"); + + if(matrix.sizeAt(0) == 1) { + _Q = NDArray(matrix.ordering(), {1,1}, matrix.dataType(), matrix.getContext()); + _Q = 1; + _H = matrix.dup(); + return; + } + + if(matrix.sizeAt(0) != matrix.sizeAt(1)) + throw std::runtime_error("ops::helpers::Hessenberg constructor: input array must be 2D square matrix !"); + + _H = matrix.dup(); + _Q = matrix.ulike(); + + evalData(); +} + +////////////////////////////////////////////////////////////////////////// +template +void Hessenberg::evalData() { + + const int rows = _H.sizeAt(0); + + NDArray hhCoeffs(_H.ordering(), {rows - 1}, _H.dataType(), _H.getContext()); + + // calculate _H + for(uint i = 0; i < rows - 1; ++i) { + + T coeff, norm; + + NDArray tail1 = _H({i+1,-1, i,i+1}); + NDArray tail2 = _H({i+2,-1, i,i+1}, true); + + Householder::evalHHmatrixDataI(tail1, coeff, norm); + + _H({0,0, i,i+1}). template r(i+1) = norm; + hhCoeffs. template r(i) = coeff; + + NDArray bottomRightCorner = _H({i+1,-1, i+1,-1}, true); + Householder::mulLeft(bottomRightCorner, tail2, coeff); + + NDArray rightCols = _H({0,0, i+1,-1}, true); + Householder::mulRight(rightCols, tail2.transpose(), coeff); + } + + // calculate _Q + HHsequence hhSeq(_H, hhCoeffs, 'u'); + hhSeq._diagSize = rows - 1; + hhSeq._shift = 1; + hhSeq.applyTo_(_Q); + + // fill down with zeros starting at first subdiagonal + _H.fillAsTriangular(0, -1, 0, _H, 'l'); +} + +////////////////////////////////////////////////////////////////////////// +template +Schur::Schur(const NDArray& matrix) { + + if(matrix.rankOf() != 2) + throw std::runtime_error("ops::helpers::Schur constructor: input matrix must be 2D !"); + + if(matrix.sizeAt(0) != matrix.sizeAt(1)) + throw std::runtime_error("ops::helpers::Schur constructor: input array must be 2D square matrix !"); + + evalData(matrix); +} + +////////////////////////////////////////////////////////////////////////// +template +void Schur::evalData(const NDArray& matrix) { + + const T scale = matrix.reduceNumber(reduce::AMax).template t(0); + + const T almostZero = DataTypeUtils::min(); + + if(scale < DataTypeUtils::min()) { + + _T = matrix.ulike(); + _U = matrix.ulike(); + + _T.nullify(); + _U.setIdentity(); + + return; + } + + // perform Hessenberg decomposition + Hessenberg hess(matrix / scale); + + _T = std::move(hess._H); + _U = std::move(hess._Q); + + calcFromHessenberg(); + + _T *= scale; +} + +////////////////////////////////////////////////////////////////////////// +template +void Schur::splitTwoRows(const int ind, const T shift) { + + const int numCols = _T.sizeAt(1); + + T p = (T)0.5 * (_T.t(ind-1, ind-1) - _T.t(ind, ind)); + + T q = p*p + _T.t(ind, ind-1) * _T.t(ind-1, ind); + + _T.r(ind, ind) += shift; + _T.r(ind-1, ind-1) += shift; + + if (q >= (T)0) { + + T z = math::nd4j_sqrt(math::nd4j_abs(q)); + + NDArray rotation(_T.ordering(), {2, 2}, _T.dataType(), _T.getContext()); + + if (p >= (T)0) + JacobiSVD::createJacobiRotationGivens(p+z, _T.t(ind, ind-1), rotation); + else + JacobiSVD::createJacobiRotationGivens(p-z, _T.t(ind, ind-1), rotation); + + NDArray rightCols = _T({0,0, ind-1,-1}); + JacobiSVD::mulRotationOnLeft(ind-1, ind, rightCols, rotation.transpose()); + + NDArray topRows = _T({0,ind+1, 0,0}); + JacobiSVD::mulRotationOnRight(ind-1, ind, topRows, rotation); + + JacobiSVD::mulRotationOnRight(ind-1, ind, _U, rotation); + + _T.r(ind, ind-1) = (T)0; + } + + if (ind > 1) + _T.r(ind-1, ind-2) = (T)0; +} + + +////////////////////////////////////////////////////////////////////////// +template +void Schur::calcShift(const int ind, const int iter, T& shift, NDArray& shiftVec) { + + // shiftVec has length = 3 + + shiftVec.r(0) = _T.t(ind, ind); + shiftVec.r(1) = _T.t(ind-1, ind-1); + shiftVec.r(2) = _T.t(ind, ind-1) * _T.t(ind-1, ind); + + if (iter == 10) { + shift += shiftVec.t(0); + + for (int i = 0; i <= ind; ++i) + _T.r(i,i) -= shiftVec.t(0); + + T s = math::nd4j_abs(_T.t(ind, ind-1)) + math::nd4j_abs(_T.t(ind-1, ind-2)); + + shiftVec.r(0) = T(0.75) * s; + shiftVec.r(1) = T(0.75) * s; + shiftVec.r(2) = T(-0.4375) * s*s; + } + + if (iter == 30) { + + T s = (shiftVec.t(1) - shiftVec.t(0)) / T(2.0); + s = s*s + shiftVec.t(2); + + if (s > T(0)) { + + s = math::nd4j_sqrt(s); + + if (shiftVec.t(1) < shiftVec.t(0)) + s = -s; + + s = s + (shiftVec.t(1) - shiftVec.t(0)) / T(2.0); + s = shiftVec.t(0) - shiftVec.t(2) / s; + shift += s; + + for (int i = 0; i <= ind; ++i) + _T.r(i,i) -= s; + + shiftVec = T(0.964); + } + } +} + +////////////////////////////////////////////////////////////////////////// +template +void Schur::initFrancisQR(const int ind1, const int ind2, const NDArray& shiftVec, int& ind3, NDArray& householderVec) { + + // shiftVec has length = 3 + + for (ind3 = ind2-2; ind3 >= ind1; --ind3) { + + const T mm = _T.t(ind3, ind3); + const T r = shiftVec.t(0) - mm; + const T s = shiftVec.t(1) - mm; + + householderVec.r(0) = (r * s - shiftVec.t(2)) / _T.t(ind3+1, ind3) + _T.t(ind3, ind3+1); + householderVec.r(1) = _T.t(ind3+1, ind3+1) - mm - r - s; + householderVec.r(2) = _T.t(ind3+2, ind3+1); + + if (ind3 == ind1) + break; + + const T lhs = _T.t(ind3,ind3-1) * (math::nd4j_abs(householderVec.t(1)) + math::nd4j_abs(householderVec.t(2))); + const T rhs = householderVec.t(0) * (math::nd4j_abs(_T.t(ind3-1, ind3-1)) + math::nd4j_abs(mm) + math::nd4j_abs(_T.t(ind3+1, ind3+1))); + + if(math::nd4j_abs(lhs) < DataTypeUtils::eps() * rhs) + break; + } +} + +////////////////////////////////////////////////////////////////////////// +template +void Schur::doFrancisQR(const int ind1, const int ind2, const int ind3, const NDArray& householderVec) { + + if(!(ind2 >= ind1)) + throw std::runtime_error("ops::helpers::Schur:doFrancisQR: wrong input indexes, condition ind2 >= ind1 must be true !"); + if(!(ind2 <= ind3-2)) + throw std::runtime_error("ops::helpers::Schur:doFrancisQR: wrong input indexes, condition iind2 <= ind3-2 must be true !"); + + const int numCols = _T.sizeAt(1); + + for (int k = ind2; k <= ind3-2; ++k) { + + const bool firstIter = (k == ind2); + + T coeff, normX; + NDArray tail(_T.ordering(), {2, 1}, _T.dataType(), _T.getContext()); + Householder::evalHHmatrixData(firstIter ? householderVec : _T({k,k+3, k-1,k}), tail, coeff, normX); + + if (normX != T(0)) { + + if (firstIter && k > ind1) + _T.r(k, k-1) = -_T.t(k, k-1); + else if (!firstIter) + _T.r(k, k-1) = normX; + + NDArray block1 = _T({k,k+3, k,numCols}, true); + Householder::mulLeft(block1, tail, coeff); + + NDArray block2 = _T({0,math::nd4j_min(ind3,k+3)+1, k,k+3}, true); + Householder::mulRight(block2, tail, coeff); + + NDArray block3 = _U({0,numCols, k,k+3}, true); + Householder::mulRight(block3, tail, coeff); + } + } + + T coeff, normX; + NDArray tail(_T.ordering(), {1, 1}, _T.dataType(), _T.getContext()); + Householder::evalHHmatrixData(_T({ind3-1,ind3+1, ind3-2,ind3-1}), tail, coeff, normX); + + if (normX != T(0)) { + + _T.r(ind3-1, ind3-2) = normX; + + NDArray block1 = _T({ind3-1,ind3+1, ind3-1,numCols}, true); + Householder::mulLeft(block1, tail, coeff); + + NDArray block2 = _T({0,ind3+1, ind3-1,ind3+1}, true); + Householder::mulRight(block2, tail, coeff); + + NDArray block3 = _U({0,numCols, ind3-1,ind3+1}, true); + Householder::mulRight(block3, tail, coeff); + } + + for (int i = ind2+2; i <= ind3; ++i) { + _T.r(i, i-2) = T(0); + if (i > ind2+2) + _T.r(i, i-3) = T(0); + } +} + +////////////////////////////////////////////////////////////////////////// +template +void Schur::calcFromHessenberg() { + + const int maxIters = _maxItersPerRow * _T.sizeAt(0); + + const int numCols = _T.sizeAt(1); + int iu = numCols - 1; + int iter = 0; + int totalIter = 0; + + T shift = T(0); + + T norm = 0; + for (int j = 0; j < numCols; ++j) + norm += _T({0,math::nd4j_min(numCols,j+2), j,j+1}).reduceNumber(reduce::ASum).template t(0); + + if(norm != T(0)) { + + while (iu >= 0) { + + const int il = getSmallSubdiagEntry(iu); + + if (il == iu) { + + _T.r(iu,iu) = _T.t(iu,iu) + shift; + if (iu > 0) + _T.r(iu, iu-1) = T(0); + iu--; + iter = 0; + + } + else if (il == iu-1) { + + splitTwoRows(iu, shift); + iu -= 2; + iter = 0; + } + else { + + NDArray householderVec(_T.ordering(), {3}, _T.dataType(), _T.getContext()); + NDArray shiftVec (_T.ordering(), {3}, _T.dataType(), _T.getContext()); + + calcShift(iu, iter, shift, shiftVec); + + ++iter; + ++totalIter; + + if (totalIter > maxIters) + break; + + int im; + initFrancisQR(il, iu, shiftVec, im, householderVec); + doFrancisQR(il, im, iu, householderVec); + } + } + } +} + +template class ND4J_EXPORT Hessenberg; +template class ND4J_EXPORT Hessenberg; +template class ND4J_EXPORT Hessenberg; +template class ND4J_EXPORT Hessenberg; + +template class ND4J_EXPORT Schur; +template class ND4J_EXPORT Schur; +template class ND4J_EXPORT Schur; +template class ND4J_EXPORT Schur; + +} +} +} \ No newline at end of file diff --git a/libnd4j/include/helpers/impl/MmulHelper.cpp b/libnd4j/include/helpers/impl/MmulHelper.cpp index 8e37fd530..ba86bb1b5 100644 --- a/libnd4j/include/helpers/impl/MmulHelper.cpp +++ b/libnd4j/include/helpers/impl/MmulHelper.cpp @@ -207,7 +207,7 @@ sd::NDArray* MmulHelper::mmul(const sd::NDArray* A, const sd::NDArray* B, sd::ND const bool isBVector = shape::isCommonVector(B->shapeInfo(), lenDim); // dot product of 2 vectors - if(isAVector && isBVector && (aRank != 2 || aRank == 2 && (A->isSameShape(B) || bRank == 1 && A->sizeAt(1) == 1))) // (1x1x1 * 1x1) or (1x4 * 1*4) or (4x1 * 4x1) or (4x1 * 4) + if(A->lengthOf() == B->lengthOf() && isAVector && isBVector && (aRank != 2 || aRank == 2 && (A->isSameShape(B) || bRank == 1 && A->sizeAt(1) == 1))) // (1x1x1 * 1x1) or (1x4 * 1*4) or (4x1 * 4x1) or (4x1 * 4) return dot(A, B, C, alpha, beta); // matrix x matrix diff --git a/libnd4j/include/helpers/impl/Sqrtm.cpp b/libnd4j/include/helpers/impl/Sqrtm.cpp new file mode 100644 index 000000000..5fe45656f --- /dev/null +++ b/libnd4j/include/helpers/impl/Sqrtm.cpp @@ -0,0 +1,276 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include +#include +#include +#include +#include +#include + + +namespace sd { +namespace ops { +namespace helpers { + +////////////////////////////////////////////////////////////////////////// +template +static void sqrtmQuasiTrianDiag(const NDArray& matrixT, NDArray& sqrtT ) { + + const int rows = matrixT.sizeAt(0); + + for(int i = 0; i < rows; i++) { + + if (i == rows - 1 || matrixT.t(i+1, i) == (T)0) { + const auto elemT = matrixT.t(i, i); + if(elemT < (T)0) + throw std::runtime_error("ops::helpers::Sqrtm::sqrtmQuasiTrianDiag: can't take sqrt of negative diagonal element of T matrix !"); + sqrtT.r(i,i) = math::nd4j_sqrt(elemT); + } + else { + + EigenValsAndVecs es(matrixT({i,i+2, i,i+2}, true)); // es._Vecs {2,2,2}, es._Vals{2,2} + + const NDArray& vecs = es._Vecs; + const NDArray& vals = es._Vals; + + const T& vecsReal00 = vecs.t(0,0,0); + const T& vecsImag00 = vecs.t(0,0,1); + const T& vecsReal01 = vecs.t(0,1,0); + const T& vecsImag01 = vecs.t(0,1,1); + const T& vecsReal10 = vecs.t(1,0,0); + const T& vecsImag10 = vecs.t(1,0,1); + const T& vecsReal11 = vecs.t(1,1,0); + const T& vecsImag11 = vecs.t(1,1,1); + + // es.eigenvalues().cwiseSqrt().asDiagonal() + T eigenValsSqrt[2][2]; + eigenValsSqrt[0][0] = vals.t(0,0); + eigenValsSqrt[0][1] = vals.t(0,1); + eigenValsSqrt[1][0] = vals.t(1,0); + eigenValsSqrt[1][1] = vals.t(1,1); + EigenValsAndVecs::sqrtComplexNum(eigenValsSqrt[0][0], eigenValsSqrt[0][1]); + EigenValsAndVecs::sqrtComplexNum(eigenValsSqrt[1][0], eigenValsSqrt[1][1]); + + // es.eigenvectors() * es.eigenvalues().cwiseSqrt().asDiagonal() + T vecsElem[2][2][2]; + EigenValsAndVecs::multiplyComplexNums(vecsReal00,vecsImag00, eigenValsSqrt[0][0],eigenValsSqrt[0][1], vecsElem[0][0][0],vecsElem[0][0][1]); + EigenValsAndVecs::multiplyComplexNums(vecsReal01,vecsImag01, eigenValsSqrt[1][0],eigenValsSqrt[1][1], vecsElem[0][1][0],vecsElem[0][1][1]); + EigenValsAndVecs::multiplyComplexNums(vecsReal10,vecsImag10, eigenValsSqrt[0][0],eigenValsSqrt[0][1], vecsElem[1][0][0],vecsElem[1][0][1]); + EigenValsAndVecs::multiplyComplexNums(vecsReal11,vecsImag11, eigenValsSqrt[1][0],eigenValsSqrt[1][1], vecsElem[1][1][0],vecsElem[1][1][1]); + + // es.eigenvectors().inverse() + T vecsElemInv[2][2][2]; + + T tempReal, tempImag, divisorReal, divisorImag; + EigenValsAndVecs::multiplyComplexNums(vecsReal00,vecsImag00, vecsReal11,vecsImag11, divisorReal,divisorImag); + EigenValsAndVecs::multiplyComplexNums(vecsReal01,vecsImag01, vecsReal10,vecsImag10, tempReal,tempImag); + divisorReal -= tempReal; + divisorImag -= tempImag; + + EigenValsAndVecs::divideComplexNums(vecsReal11,vecsImag11, divisorReal,divisorImag, vecsElemInv[0][0][0],vecsElemInv[0][0][1]); + EigenValsAndVecs::divideComplexNums(-vecsReal01,-vecsImag01, divisorReal,divisorImag, vecsElemInv[0][1][0],vecsElemInv[0][1][1]); + EigenValsAndVecs::divideComplexNums(-vecsReal10,-vecsImag10, divisorReal,divisorImag, vecsElemInv[1][0][0],vecsElemInv[1][0][1]); + EigenValsAndVecs::divideComplexNums(vecsReal00,vecsImag00, divisorReal,divisorImag, vecsElemInv[1][1][0],vecsElemInv[1][1][1]); + + // result + T result[2][2][2]; + + EigenValsAndVecs::multiplyComplexNums(vecsElem[0][0][0],vecsElem[0][0][1], vecsElemInv[0][0][0],vecsElemInv[0][0][1], tempReal,tempImag); + EigenValsAndVecs::multiplyComplexNums(vecsElem[0][1][0],vecsElem[0][1][1], vecsElemInv[1][0][0],vecsElemInv[1][0][1], result[0][0][0],result[0][0][1]); + result[0][0][0] += tempReal; + + EigenValsAndVecs::multiplyComplexNums(vecsElem[0][0][0],vecsElem[0][0][1], vecsElemInv[0][1][0],vecsElemInv[0][1][1], tempReal,tempImag); + EigenValsAndVecs::multiplyComplexNums(vecsElem[0][1][0],vecsElem[0][1][1], vecsElemInv[1][1][0],vecsElemInv[1][1][1], result[0][1][0],result[0][1][1]); + result[0][1][0] += tempReal; + + EigenValsAndVecs::multiplyComplexNums(vecsElem[1][0][0],vecsElem[1][0][1], vecsElemInv[0][0][0],vecsElemInv[0][0][1], tempReal,tempImag); + EigenValsAndVecs::multiplyComplexNums(vecsElem[1][1][0],vecsElem[1][1][1], vecsElemInv[1][0][0],vecsElemInv[1][0][1], result[1][0][0],result[1][0][1]); + result[1][0][0] += tempReal; + + EigenValsAndVecs::multiplyComplexNums(vecsElem[1][0][0],vecsElem[1][0][1], vecsElemInv[0][1][0],vecsElemInv[0][1][1], tempReal,tempImag); + EigenValsAndVecs::multiplyComplexNums(vecsElem[1][1][0],vecsElem[1][1][1], vecsElemInv[1][1][0],vecsElemInv[1][1][1], result[1][1][0],result[1][1][1]); + result[1][1][0] += tempReal; + + sqrtT.r(i,i) = result[0][0][0]; + sqrtT.r(i,i+1) = result[0][1][0]; + sqrtT.r(i+1,i) = result[1][0][0]; + sqrtT.r(i+1,i+1) = result[1][1][0]; + + ++i; + } + } +} + +////////////////////////////////////////////////////////////////////////// +// all matrices are {2,2} here +template +static void sqrtmQuasiTrianAuxEq(const NDArray& A, const NDArray& B, const NDArray& C, NDArray& X) { + + NDArray tempMatrix(A.ordering(), {4,4}, A.dataType(), A.getContext()); + + tempMatrix.r(0,0) = A.t(0,0) + B.t(0,0); + tempMatrix.r(1,1) = A.t(0,0) + B.t(1,1); + tempMatrix.r(2,2) = A.t(1,1) + B.t(0,0); + tempMatrix.r(3,3) = A.t(1,1) + B.t(1,1); + tempMatrix.r(0,1) = B.t(1,0); + tempMatrix.r(0,2) = A.t(0,1); + tempMatrix.r(1,0) = B.t(0,1); + tempMatrix.r(1,3) = A.t(0,1); + tempMatrix.r(2,0) = A.t(1,0); + tempMatrix.r(2,3) = B.t(1,0); + tempMatrix.r(3,1) = A.t(1,0); + tempMatrix.r(3,2) = B.t(0,1); + tempMatrix.r(0,3) = (T)0; + tempMatrix.r(1,2) = (T)0; + tempMatrix.r(2,1) = (T)0; + tempMatrix.r(3,0) = (T)0; + + NDArray result(A.ordering(), {4,1}, A.dataType(), A.getContext()); + result.r(0,0) = C.t(0,0); + result.r(1,0) = C.t(0,1); + result.r(2,0) = C.t(1,0); + result.r(3,0) = C.t(1,1); + + FullPivLU::solve(tempMatrix, result, result); + + X.r(0,0) = result.t(0); + X.r(0,1) = result.t(1); + X.r(1,0) = result.t(2); + X.r(1,1) = result.t(3); +} + + +////////////////////////////////////////////////////////////////////////// +template +static void sqrtmQuasiTrianOffDiag(const NDArray& matrixT, NDArray& sqrtT ) { + + const int rows = matrixT.sizeAt(0); + + for (int j = 1; j < rows; j++) { + + if (matrixT.t(j, j-1) != (T)0) + continue; + + for (int i = j - 1; i >= 0; i--) { + + if (i > 0 && matrixT.t(i, i-1) != (T)0) + continue; + + const bool iBlockIs2x2 = (i < rows - 1) && (matrixT.t(i+1, i) != (T)0); + const bool jBlockIs2x2 = (j < rows - 1) && (matrixT.t(j+1, j) != (T)0); + + if (iBlockIs2x2 && jBlockIs2x2) { + + NDArray A = sqrtT({i,i+2, i,i+2}, true); + NDArray B = sqrtT({j,j+2, j,j+2}, true); + NDArray X = matrixT({i,i+2, j,j+2}, true);//.dup(); + + if (j - i > 2) + X -= mmul(sqrtT({i,i+2, i+2,j}, true), sqrtT({i+2,j, j,j+2}, true)); + + sqrtmQuasiTrianAuxEq(A, B, X, X); + + sqrtT.syncToDevice(); + sqrtT({i,i+2, j,j+2}, true).assign(X); + } + else if (iBlockIs2x2 && !jBlockIs2x2) { + + NDArray rhs = matrixT({i,i+2, j,j+1}, true);//.dup(); + + if (j - i > 2) + rhs -= mmul(sqrtT({i,i+2, i+2,j}, true), sqrtT({i+2,j, j,j+1}, true)); + + NDArray A(matrixT.ordering(), {2,2}, matrixT.dataType(), matrixT.getContext()); + A.r(0,0) = A.r(1,1) = sqrtT.t(j,j); + A.r(0,1) = A.r(1,0) = T(0); + A += sqrtT({i,i+2, i,i+2}, true); + + FullPivLU::solve(A,rhs,rhs); + + // sqrtT.syncToDevice(); + sqrtT({i,i+2, j,j+1}, true).assign(rhs); + } + else if (!iBlockIs2x2 && jBlockIs2x2) { + + NDArray rhs = matrixT({i,i+1, j,j+2}, true);//.dup(); + + if (j - i > 1) + rhs -= mmul(sqrtT({i,i+1, i+1,j}, true), sqrtT({i+1,j, j,j+2}, true)); + + NDArray A(matrixT.ordering(), {2,2}, matrixT.dataType(), matrixT.getContext()); + A.r(0,0) = A.r(1,1) = sqrtT.t(i,i); + A.r(0,1) = A.r(1,0) = T(0); + A += sqrtT({j,j+2, j,j+2}, true).transpose(); + + NDArray rhsT = rhs.transpose(); + FullPivLU::solve(A,rhsT,rhsT); + + // sqrtT.syncToDevice(); + sqrtT({i,i+1, j,j+2}, true).assign(rhs); + } + else if (!iBlockIs2x2 && !jBlockIs2x2) { + + T temp = mmul(sqrtT({i,i+1, i+1,j}), sqrtT({i+1,j, j,j+1})).t(0); // dot + sqrtT.r(i,j) = (matrixT.t(i,j) - temp ) / (sqrtT.t(i,i) + sqrtT.t(j,j)); + } + } + } +} + +////////////////////////////////////////////////////////////////////////// +template +void Sqrtm::calc(const NDArray& in, NDArray& out) { + + if(in.rankOf() != 2 || in.sizeAt(0) != in.sizeAt(1)) + throw std::runtime_error("ops::helpers::Sqrtm::calc: input matrix must have rank 2 and be square !"); + if(!out.isSameShape(in)) + throw std::runtime_error("ops::helpers::Sqrtm::calc: output matrix must have the same shape as input one!"); + + if(in.lengthOf() == 1) { + out.r(0) = math::nd4j_sqrt(in.t(0)); + return; + } + + ops::helpers::Schur schur(in); + + const NDArray& t1 = schur._T; + const NDArray& t2 = schur._U; + + NDArray sqrtT = in.ulike(); + sqrtT.nullify(); + + sqrtmQuasiTrianDiag(schur._T, sqrtT); + sqrtmQuasiTrianOffDiag(schur._T, sqrtT); + + // out = U * sqrtT * U^T; + NDArray temp = mmul(sqrtT, schur._U.transpose()); + MmulHelper::mmul(&schur._U, &temp, &out); +} + +template class ND4J_EXPORT Sqrtm; +template class ND4J_EXPORT Sqrtm; +template class ND4J_EXPORT Sqrtm; +template class ND4J_EXPORT Sqrtm; + + +} +} +} \ No newline at end of file diff --git a/libnd4j/include/helpers/impl/biDiagonalUp.cpp b/libnd4j/include/helpers/impl/biDiagonalUp.cpp new file mode 100644 index 000000000..d5326c21a --- /dev/null +++ b/libnd4j/include/helpers/impl/biDiagonalUp.cpp @@ -0,0 +1,160 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// Created by Yurii Shyrma on 18.12.2017 +// + + +#include +#include + + +namespace sd { +namespace ops { +namespace helpers { + + +////////////////////////////////////////////////////////////////////////// +BiDiagonalUp::BiDiagonalUp(const NDArray& matrix): _HHmatrix(NDArray(matrix.ordering(), {matrix.sizeAt(0), matrix.sizeAt(1)}, matrix.dataType(), matrix.getContext())), + _HHbidiag(NDArray(matrix.ordering(), {matrix.sizeAt(1), matrix.sizeAt(1)}, matrix.dataType(), matrix.getContext())) { + + // input validation + if(matrix.rankOf() != 2 || matrix.isScalar()) + throw std::runtime_error("ops::helpers::biDiagonalizeUp constructor: input array must be 2D matrix !"); + + _HHmatrix.assign(&matrix); + _HHbidiag.assign(0.); + + evalData(); +} + +template +void BiDiagonalUp::_evalData() { + + const auto rows = _HHmatrix.sizeAt(0); + const auto cols = _HHmatrix.sizeAt(1); + + if(rows < cols) + throw std::runtime_error("ops::helpers::BiDiagonalizeUp::evalData method: this procedure is applicable only for input matrix with rows >= cols !"); + + T coeff, normX; + + T x, y; + + for(Nd4jLong i = 0; i < cols-1; ++i ) { + + // evaluate Householder matrix nullifying columns + NDArray column1 = _HHmatrix({i,rows, i,i+1}); + + x = _HHmatrix.t(i,i); + y = _HHbidiag.t(i,i); + + Householder::evalHHmatrixDataI(column1, x, y); + + _HHmatrix.r(i, i) = x; + _HHbidiag.r(i, i) = y; + + // multiply corresponding matrix block on householder matrix from the left: P * bottomRightCorner + NDArray bottomRightCorner1 = _HHmatrix({i,rows, i+1,cols}, true); // {i, cols} + Householder::mulLeft(bottomRightCorner1, _HHmatrix({i+1,rows, i,i+1}, true), _HHmatrix.t(i,i)); + + if(i == cols-2) + continue; // do not apply right multiplying at last iteration + + // evaluate Householder matrix nullifying rows + NDArray row1 = _HHmatrix({i,i+1, i+1,cols}); + + x = _HHmatrix.t(i,i+1); + y = _HHbidiag.t(i,i+1); + + Householder::evalHHmatrixDataI(row1, x, y); + + _HHmatrix.r(i, i+1) = x; + _HHbidiag.r(i, i+1) = y; + + // multiply corresponding matrix block on householder matrix from the right: bottomRightCorner * P + NDArray bottomRightCorner2 = _HHmatrix({i+1,rows, i+1,cols}, true); // {i, rows} + + Householder::mulRight(bottomRightCorner2, _HHmatrix({i,i+1, i+2,cols}, true), _HHmatrix.t(i,i+1)); + } + + NDArray row2 =_HHmatrix({cols-2,cols-1, cols-1,cols}); + + x = _HHmatrix.t(cols-2,cols-1); + y = _HHbidiag.t(cols-2,cols-1); + + Householder::evalHHmatrixDataI(row2, x, y); + + _HHmatrix.r(cols-2,cols-1) = x; + _HHbidiag.r(cols-2,cols-1) = y; + + NDArray column2 = _HHmatrix({cols-1,rows, cols-1,cols}); + + x = _HHmatrix.t(cols-1,cols-1); + y = _HHbidiag.t(cols-1,cols-1); + + Householder::evalHHmatrixDataI(column2, x, y); + + _HHmatrix.r(cols-1, cols-1) = x; + _HHbidiag.r(cols-1, cols-1) = y; +} + +////////////////////////////////////////////////////////////////////////// +void BiDiagonalUp::evalData() { + auto xType = _HHmatrix.dataType(); + BUILD_SINGLE_SELECTOR(xType, _evalData, ();, FLOAT_TYPES); +} + +////////////////////////////////////////////////////////////////////////// +template +HHsequence BiDiagonalUp::makeHHsequence_(const char type) { + + const int diagSize = type == 'u' ? _HHbidiag.sizeAt(0) : _HHbidiag.sizeAt(0) - 1; + + _hhCoeffs = NDArray(_HHmatrix.ordering(), {diagSize}, _HHmatrix.dataType(), _HHmatrix.getContext()); + + if(type == 'u') + for(int i = 0; i < diagSize; ++i) + _hhCoeffs.r(i) = _HHmatrix.t(i,i); + else + for(int i = 0; i < diagSize; ++i) + _hhCoeffs.r(i) = _HHmatrix.t(i,i+1); + + HHsequence result(_HHmatrix, _hhCoeffs, type); + + if(type != 'u') { + result._diagSize = diagSize; + result._shift = 1; + } + + return result; +} + +////////////////////////////////////////////////////////////////////////// +HHsequence BiDiagonalUp::makeHHsequence(const char type) { + auto xType = _HHmatrix.dataType(); + BUILD_SINGLE_SELECTOR(xType, return makeHHsequence_, (type);, FLOAT_TYPES); +} + + + +BUILD_SINGLE_TEMPLATE(template void BiDiagonalUp::_evalData, (), FLOAT_TYPES); +BUILD_SINGLE_TEMPLATE(template HHsequence BiDiagonalUp::makeHHsequence_, (const char type), FLOAT_TYPES); + +} +} +} \ No newline at end of file diff --git a/libnd4j/include/helpers/impl/hhColPivQR.cpp b/libnd4j/include/helpers/impl/hhColPivQR.cpp new file mode 100644 index 000000000..6f4bbebc9 --- /dev/null +++ b/libnd4j/include/helpers/impl/hhColPivQR.cpp @@ -0,0 +1,147 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// Created by Yurii Shyrma on 11.01.2018 +// + +#include +#include + +namespace sd { +namespace ops { +namespace helpers { + + +////////////////////////////////////////////////////////////////////////// +HHcolPivQR::HHcolPivQR(const NDArray& matrix) { + + _qr = matrix.dup(); + _diagSize = math::nd4j_min(matrix.sizeAt(0), matrix.sizeAt(1)); + _coeffs = NDArray(matrix.ordering(), {1, _diagSize}, matrix.dataType(), matrix.getContext()); + + _permut = NDArray(matrix.ordering(), {matrix.sizeAt(1), matrix.sizeAt(1)}, matrix.dataType(), matrix.getContext()); + + evalData(); +} + + void HHcolPivQR::evalData() { + BUILD_SINGLE_SELECTOR(_qr.dataType(), _evalData, (), FLOAT_TYPES); + } + +////////////////////////////////////////////////////////////////////////// +template +void HHcolPivQR::_evalData() { + + const int rows = _qr.sizeAt(0); + const int cols = _qr.sizeAt(1); + + NDArray transp(_qr.ordering(), {cols}/*{1, cols}*/, _qr.dataType(), _qr.getContext()); + NDArray normsUpd(_qr.ordering(), {cols}/*{1, cols}*/, _qr.dataType(), _qr.getContext()); + NDArray normsDir(_qr.ordering(), {cols}/*{1, cols}*/, _qr.dataType(), _qr.getContext()); + + int transpNum = 0; + + for (int k = 0; k < cols; ++k) + normsDir.r(k) = normsUpd.r(k) = _qr({0,0, k,k+1}).reduceNumber(reduce::Norm2).t(0); + + T normScaled = (normsUpd.reduceNumber(reduce::Max)).t(0) * DataTypeUtils::eps(); + T threshold1 = normScaled * normScaled / (T)rows; + T threshold2 = math::nd4j_sqrt(DataTypeUtils::eps()); + + T nonZeroPivots = _diagSize; + T maxPivot = 0.; + + for(int k = 0; k < _diagSize; ++k) { + + int biggestColIndex = normsUpd({k,-1}).indexReduceNumber(indexreduce::IndexMax).e(0); + T biggestColNorm = normsUpd({k,-1}).reduceNumber(reduce::Max).t(0); + T biggestColSqNorm = biggestColNorm * biggestColNorm; + biggestColIndex += k; + + if(nonZeroPivots == (T)_diagSize && biggestColSqNorm < threshold1 * (T)(rows-k)) + nonZeroPivots = k; + + transp.r(k) = (T)biggestColIndex; + + if(k != biggestColIndex) { + + NDArray temp1(_qr({0,0, k,k+1})); + NDArray temp2(_qr({0,0, biggestColIndex,biggestColIndex+1})); + temp1.swapUnsafe(temp2); + + math::nd4j_swap(normsUpd.r(k), normsUpd.r(biggestColIndex)); + math::nd4j_swap(normsDir.r(k), normsDir.r(biggestColIndex)); + + ++transpNum; + } + + T normX, c; + NDArray qrBlock = _qr({k,rows, k,k+1}); + Householder::evalHHmatrixDataI(qrBlock, c, normX); + + _coeffs.r(k) = c; + + _qr.r(k,k) = normX; + + T max = math::nd4j_abs(normX); + if(max > maxPivot) + maxPivot = max; + + if(k < rows && (k+1) < cols) { + NDArray qrBlock = _qr({k,rows, k+1,cols}, true); + NDArray tail = _qr({k+1,rows, k, k+1}, true); + Householder::mulLeft(qrBlock, tail, _coeffs.t(k)); + } + + for (int j = k + 1; j < cols; ++j) { + + if (normsUpd.t(j) != (T)0.f) { + + T temp = math::nd4j_abs(_qr.t(k, j)) / normsUpd.t(j); + temp = ((T)1. + temp) * ((T)1. - temp); + temp = temp < (T)0. ? (T)0. : temp; + T temp2 = temp * normsUpd.t(j) * normsUpd.t(j) / (normsDir.t(j)*normsDir.t(j)); + + if (temp2 <= threshold2) { + if(k+1 < rows && j < cols) + normsDir.r(j) = _qr({k+1,rows, j,j+1}).reduceNumber(reduce::Norm2).t(0); + + normsUpd.r(j) = normsDir.t(j); + } + else + normsUpd.r(j) = normsUpd.t(j) * math::nd4j_sqrt(temp); + } + } + } + + _permut.setIdentity(); + + for(int k = 0; k < _diagSize; ++k) { + + int idx = transp.e(k); + NDArray temp1 = _permut({0,0, k, k+1}); + NDArray temp2 = _permut({0,0, idx,idx+1}); + temp1.swapUnsafe(temp2); + } +} + +BUILD_SINGLE_TEMPLATE(template void HHcolPivQR::_evalData, (), FLOAT_TYPES); + +} +} +} + diff --git a/libnd4j/include/helpers/cpu/hhSequence.cpp b/libnd4j/include/helpers/impl/hhSequence.cpp similarity index 59% rename from libnd4j/include/helpers/cpu/hhSequence.cpp rename to libnd4j/include/helpers/impl/hhSequence.cpp index 8a2a35329..dc038dfc8 100644 --- a/libnd4j/include/helpers/cpu/hhSequence.cpp +++ b/libnd4j/include/helpers/impl/hhSequence.cpp @@ -20,7 +20,6 @@ #include #include -#include namespace sd { namespace ops { @@ -29,40 +28,32 @@ namespace helpers { ////////////////////////////////////////////////////////////////////////// HHsequence::HHsequence(const NDArray& vectors, const NDArray& coeffs, const char type): _vectors(vectors), _coeffs(coeffs) { - + _diagSize = sd::math::nd4j_min(_vectors.sizeAt(0), _vectors.sizeAt(1)); - _shift = 0; + _shift = 0; _type = type; } ////////////////////////////////////////////////////////////////////////// template -void HHsequence::_mulLeft(NDArray& matrix) { +void HHsequence::mulLeft_(NDArray& matrix) { const int rows = _vectors.sizeAt(0); const int cols = _vectors.sizeAt(1); - const int inRows = matrix.sizeAt(0); + const int inRows = matrix.sizeAt(0); - NDArray* block(nullptr); + for(int i = _diagSize - 1; i >= 0; --i) { - for(int i = _diagSize - 1; i >= 0; --i) { - if(_type == 'u') { - - block = new NDArray(matrix({inRows-rows+_shift+ i,inRows, 0,0}, true)); - T _x = _coeffs.e(i); - Householder::mulLeft(*block, _vectors({i + 1 + _shift, rows, i, i+1}, true), _x); - _coeffs.p(i, _x); + + NDArray block = matrix({inRows-rows+_shift+ i,inRows, 0,0}, true); + Householder::mulLeft(block, _vectors({i + 1 + _shift, rows, i, i+1}, true), _coeffs.t(i)); } else { - block = new NDArray(matrix({inRows-cols+_shift+i,inRows, 0,0}, true)); - T _x = _coeffs.e(i); - Householder::mulLeft(*block, _vectors({i, i+1, i + 1 + _shift, cols}, true), _x); - _coeffs.p(i, _x); + NDArray block = matrix({inRows-cols+_shift+i,inRows, 0,0}, true); + Householder::mulLeft(block, _vectors({i, i+1, i + 1 + _shift, cols}, true), _coeffs.t(i)); } - - delete block; } } @@ -70,55 +61,51 @@ void HHsequence::_mulLeft(NDArray& matrix) { ////////////////////////////////////////////////////////////////////////// NDArray HHsequence::getTail(const int idx) const { - + int first = idx + 1 + _shift; - + if(_type == 'u') return _vectors({first, -1, idx, idx+1}, true); else - return _vectors({idx, idx+1, first, -1}, true); + return _vectors({idx, idx+1, first, -1}, true); } - ////////////////////////////////////////////////////////////////////////// template -void HHsequence::_applyTo(NDArray& dest) { - +void HHsequence::applyTo_(NDArray& dest) { + int size = _type == 'u' ? _vectors.sizeAt(0) : _vectors.sizeAt(1); if(dest.rankOf() != 2 || (dest.sizeAt(0) != size && dest.sizeAt(1) != size)) - dest = NDArrayFactory::create(dest.ordering(), {size, size}, dest.dataType(), dest.getContext()); + dest = NDArray(dest.ordering(), {size, size}, dest.dataType(), dest.getContext()); dest.setIdentity(); - + for(int k = _diagSize - 1; k >= 0; --k) { - + int curNum = size - k - _shift; if(curNum < 1 || (k + 1 + _shift) >= size ) continue; auto block = dest({dest.sizeAt(0)-curNum,dest.sizeAt(0), dest.sizeAt(1)-curNum,dest.sizeAt(1)}, true); - T _x = _coeffs.e(k); - Householder::mulLeft(block, getTail(k), _x); - - _coeffs.p(k, _x); - } -} - - - void HHsequence::applyTo(NDArray& dest) { - auto xType = _coeffs.dataType(); - - BUILD_SINGLE_SELECTOR(xType, _applyTo, (dest), FLOAT_TYPES); + Householder::mulLeft(block, getTail(k), _coeffs.t(k)); } +} - void HHsequence::mulLeft(NDArray& matrix) { - auto xType = _coeffs.dataType(); +////////////////////////////////////////////////////////////////////////// +void HHsequence::applyTo(NDArray& dest) { + auto xType = _coeffs.dataType(); + BUILD_SINGLE_SELECTOR(xType, applyTo_, (dest), FLOAT_TYPES); +} - BUILD_SINGLE_SELECTOR(xType, _mulLeft, (matrix), FLOAT_TYPES); - } +////////////////////////////////////////////////////////////////////////// +void HHsequence::mulLeft(NDArray& matrix) { + auto xType = _coeffs.dataType(); + BUILD_SINGLE_SELECTOR(xType, mulLeft_, (matrix), FLOAT_TYPES); +} + +BUILD_SINGLE_TEMPLATE(template void HHsequence::applyTo_, (sd::NDArray &dest), FLOAT_TYPES); +BUILD_SINGLE_TEMPLATE(template void HHsequence::mulLeft_, (NDArray& matrix), FLOAT_TYPES); - BUILD_SINGLE_TEMPLATE(template void HHsequence::_applyTo, (sd::NDArray &dest), FLOAT_TYPES); - BUILD_SINGLE_TEMPLATE(template void HHsequence::_mulLeft, (NDArray& matrix), FLOAT_TYPES); } } } diff --git a/libnd4j/include/helpers/impl/householder.cpp b/libnd4j/include/helpers/impl/householder.cpp new file mode 100644 index 000000000..e9572f9f6 --- /dev/null +++ b/libnd4j/include/helpers/impl/householder.cpp @@ -0,0 +1,218 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// Created by Yurii Shyrma on 18.12.2017 +// + +#include + +namespace sd { +namespace ops { +namespace helpers { + + +////////////////////////////////////////////////////////////////////////// +// template +// NDArray Householder::evalHHmatrix(const NDArray& x) { + +// // input validation +// if(x.rankOf() != 1 && !x.isScalar()) +// throw std::runtime_error("ops::helpers::Householder::evalHHmatrix method: iinput array must have rank = 1 or to be scalar!"); + +// const auto xLen = x.lengthOf(); + +// NDArray w(x.ordering(), {xLen, 1}, x.dataType(), x.getContext()); // column-vector + +// NDArray xTail = xLen > 1 ? x({1,-1}) : NDArray(); +// T tailXnorm = xLen > 1 ? xTail.reduceNumber(reduce::SquaredNorm).t(0) : (T)0; + +// const auto xFirstElem = x.t(0); + +// T coeff, normX; + +// if(tailXnorm <= DataTypeUtils::min()) { + +// normX = xFirstElem; +// coeff = 0.f; +// if(xLen > 1) +// w({1,-1, 0,0}) = 0.f; +// } +// else { + +// normX = math::nd4j_sqrt(xFirstElem*xFirstElem + tailXnorm); + +// if(xFirstElem >= (T)0.f) +// normX = -normX; // choose opposite sign to lessen roundoff error + +// coeff = (normX - xFirstElem) / normX; + +// if(xLen > 1) +// w({1,-1, 0,0}).assign(xTail / (xFirstElem - normX)); +// } + +// w.t(0) = (T)1; + +// NDArray identity(x.ordering(), {xLen, xLen}, x.dataType(), x.getContext()); +// identity.setIdentity(); // identity matrix + +// return identity - mmul(w, w.transpose()) * coeff; +// } + +////////////////////////////////////////////////////////////////////////// +template +void Householder::evalHHmatrixData(const NDArray& x, NDArray& tail, T& coeff, T& normX) { + + // input validation + if(x.rankOf() != 1 && !x.isScalar()) + throw std::runtime_error("ops::helpers::Householder::evalHHmatrixData method: input array must have rank = 1 or to be scalar!"); + + if(!x.isScalar() && x.lengthOf() != tail.lengthOf() + 1) + throw std::runtime_error("ops::helpers::Householder::evalHHmatrixData method: input tail vector must have length less than unity compared to input x vector!"); + + const auto xLen = x.lengthOf(); + + const NDArray xTail = xLen > 1 ? x({1,-1}) : NDArray(); + + T tailXnorm = xLen > 1 ? xTail.reduceNumber(reduce::SquaredNorm).t(0) : (T)0; + + const auto xFirstElem = x.t(0); + + if(tailXnorm <= DataTypeUtils::min()) { + + normX = xFirstElem; + coeff = (T)0.f; + tail = (T)0.f; + } + else { + + normX = math::nd4j_sqrt(xFirstElem*xFirstElem + tailXnorm); + + if(xFirstElem >= (T)0.f) + normX = -normX; // choose opposite sign to lessen roundoff error + + coeff = (normX - xFirstElem) / normX; + + tail.assign(xTail / (xFirstElem - normX)); + } +} + +////////////////////////////////////////////////////////////////////////// +template +void Householder::evalHHmatrixDataI(NDArray& x, T& coeff, T& normX) { + + // input validation + if(x.rankOf() != 1 && !x.isScalar()) + throw std::runtime_error("ops::helpers::Householder::evalHHmatrixDataI method: input array must have rank = 1 or to be scalar!"); + + int rows = (int)x.lengthOf()-1; + int num = 1; + + if(rows == 0) { + rows = 1; + num = 0; + } + + NDArray tail = x({num, -1}); + + evalHHmatrixData(x, tail, coeff, normX); +} + +////////////////////////////////////////////////////////////////////////// +template +void Householder::mulLeft(NDArray& matrix, const NDArray& tail, const T coeff) { + + // if(matrix.rankOf() != 2) + // throw "ops::helpers::Householder::mulLeft method: input array must be 2D matrix !"; + + if(matrix.sizeAt(0) == 1 && coeff != (T)0) { + + matrix *= (T) 1.f - coeff; + } + else if(coeff != (T)0.f) { + + NDArray bottomPart = matrix({1,matrix.sizeAt(0), 0,0}, true); + NDArray fistRow = matrix({0,1, 0,0}, true); + + if(tail.isColumnVector()) { + + auto resultingRow = mmul(tail.transpose(), bottomPart); + resultingRow += fistRow; + resultingRow *= coeff; + fistRow -= resultingRow; + bottomPart -= mmul(tail, resultingRow); + } + else { + + auto resultingRow = mmul(tail, bottomPart); + resultingRow += fistRow; + resultingRow *= coeff; + fistRow -= resultingRow; + bottomPart -= mmul(tail.transpose(), resultingRow); + } + } +} + + +////////////////////////////////////////////////////////////////////////// +template +void Householder::mulRight(NDArray& matrix, const NDArray& tail, const T coeff) { + + // if(matrix.rankOf() != 2) + // throw "ops::helpers::Householder::mulRight method: input array must be 2D matrix !"; + + if(matrix.sizeAt(1) == 1 && coeff != (T)0) { + matrix *= (T)1.f - coeff; + } + else if(coeff != (T)0.f) { + + NDArray rightPart = matrix({0,0, 1,matrix.sizeAt(1)}, true); + NDArray fistCol = matrix({0,0, 0,1}, true); + + if(tail.isColumnVector()) { + + auto resultingCol = mmul(rightPart, tail); + resultingCol += fistCol; + resultingCol *= coeff; + fistCol -= resultingCol; + rightPart -= mmul(resultingCol, tail.transpose()); + } + else { + + auto resultingCol = mmul(rightPart, tail.transpose()); + resultingCol += fistCol; + resultingCol *= coeff; + fistCol -= resultingCol; + rightPart -= mmul(resultingCol, tail); + } + } +} + + +template class ND4J_EXPORT Householder; +template class ND4J_EXPORT Householder; +template class ND4J_EXPORT Householder; +template class ND4J_EXPORT Householder; + + + + + + + +} +} +} diff --git a/libnd4j/include/helpers/cpu/jacobiSVD.cpp b/libnd4j/include/helpers/impl/jacobiSVD.cpp similarity index 58% rename from libnd4j/include/helpers/cpu/jacobiSVD.cpp rename to libnd4j/include/helpers/impl/jacobiSVD.cpp index 372a2a409..7fbf183b2 100644 --- a/libnd4j/include/helpers/cpu/jacobiSVD.cpp +++ b/libnd4j/include/helpers/impl/jacobiSVD.cpp @@ -20,8 +20,7 @@ #include #include -#include - +#include namespace sd { namespace ops { @@ -43,27 +42,27 @@ JacobiSVD::JacobiSVD(const NDArray& matrix, const bool calcU, const bool calc _calcV = calcV; _fullUV = fullUV; - _s = NDArrayFactory::create(matrix.ordering(), {_diagSize, 1}, matrix.dataType(), matrix.getContext()); + _s = NDArray(matrix.ordering(), {_diagSize, 1}, matrix.dataType(), matrix.getContext()); if(_calcU) { if(_fullUV) - _u = NDArrayFactory::create(matrix.ordering(), {_rows, _rows}, matrix.dataType(), matrix.getContext()); + _u = NDArray(matrix.ordering(), {_rows, _rows}, matrix.dataType(), matrix.getContext()); else - _u = NDArrayFactory::create(matrix.ordering(), {_rows, _diagSize}, matrix.dataType(), matrix.getContext()); + _u = NDArray(matrix.ordering(), {_rows, _diagSize}, matrix.dataType(), matrix.getContext()); } else - _u = NDArrayFactory::create(matrix.ordering(), {_rows, 1}, matrix.dataType(), matrix.getContext()); + _u = NDArray(matrix.ordering(), {_rows, 1}, matrix.dataType(), matrix.getContext()); if(_calcV) { if(_fullUV) - _v = NDArrayFactory::create(matrix.ordering(), {_cols, _cols}, matrix.dataType(), matrix.getContext()); + _v = NDArray(matrix.ordering(), {_cols, _cols}, matrix.dataType(), matrix.getContext()); else - _v = NDArrayFactory::create(matrix.ordering(), {_cols, _diagSize}, matrix.dataType(), matrix.getContext()); + _v = NDArray(matrix.ordering(), {_cols, _diagSize}, matrix.dataType(), matrix.getContext()); } else - _v = NDArrayFactory::create(matrix.ordering(), {_cols, 1}, matrix.dataType(), matrix.getContext()); + _v = NDArray(matrix.ordering(), {_cols, 1}, matrix.dataType(), matrix.getContext()); - _m = NDArrayFactory::create(matrix.ordering(), {_diagSize, _diagSize}, matrix.dataType(), matrix.getContext()); + _m = NDArray(matrix.ordering(), {_diagSize, _diagSize}, matrix.dataType(), matrix.getContext()); evalData(matrix); } @@ -77,16 +76,19 @@ void JacobiSVD::mulRotationOnLeft(const int i, const int j, NDArray& block, c if(j+1 > block.sizeAt(0)) throw std::runtime_error("ops::helpers::JacobiSVD mulRotationOnLeft: second arguments is out of array row range !"); - auto pTemp = block({i,j+1,j-i, 0,0,0}, true, true); - auto temp = pTemp; - pTemp.assign(mmul(rotation, temp)); + auto temp = block({i,j+1,j-i, 0,0,0}, true, true); + temp.assign(mmul(rotation, temp)); + + // auto pTemp = block({i,j+1,j-i, 0,0,0}, true, true); + // auto temp = pTemp.dup(); + // pTemp.assign(mmul(rotation, temp)); } else { if(j+1 > block.sizeAt(0) || i+1 > block.sizeAt(0)) throw std::runtime_error("ops::helpers::JacobiSVD mulRotationOnLeft: some or both integer arguments are out of array row range !"); - auto temp = NDArrayFactory::create(block.ordering(), {2, block.sizeAt(1)}, block.dataType(), block.getContext()); + NDArray temp(block.ordering(), {2, block.sizeAt(1)}, block.dataType(), block.getContext()); auto row1 = block({i,i+1, 0,0}, true); auto row2 = block({j,j+1, 0,0}, true); auto rowTemp1 = temp({0,1, 0,0}, true); @@ -108,16 +110,19 @@ void JacobiSVD::mulRotationOnRight(const int i, const int j, NDArray& block, if(j+1 > block.sizeAt(1)) throw std::runtime_error("ops::helpers::JacobiSVD mulRotationOnRight: second argument is out of array column range !"); - auto pTemp = block({0,0,0, i,j+1,j-i}, true, true); - auto temp = pTemp; - pTemp.assign(mmul(temp, rotation)); + auto temp = block({0,0,0, i,j+1,j-i}, true, true); + temp.assign(mmul(temp, rotation)); + + // auto pTemp = block({0,0,0, i,j+1,j-i}, true, true); + // auto temp = pTemp.dup(); + // pTemp.assign(mmul(temp, rotation)); } else { if(j+1 > block.sizeAt(1) || i+1 > block.sizeAt(1)) throw std::runtime_error("ops::helpers::JacobiSVD mulRotationOnRight: some or both integer arguments are out of array column range !"); - auto temp = NDArrayFactory::create(block.ordering(), {block.sizeAt(0), 2}, block.dataType(), block.getContext()); + NDArray temp(block.ordering(), {block.sizeAt(0), 2}, block.dataType(), block.getContext()); auto col1 = block({0,0, i,i+1}, true); auto col2 = block({0,0, j,j+1}, true); auto colTemp1 = temp({0,0, 0,1}, true); @@ -134,123 +139,148 @@ void JacobiSVD::mulRotationOnRight(const int i, const int j, NDArray& block, template bool JacobiSVD::isBlock2x2NotDiag(NDArray& block, int p, int q, T& maxElem) { - auto rotation = NDArrayFactory::create(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext()); - T n = math::nd4j_sqrt(block.e(p,p) * block.e(p,p) + block.e(q,p) * block.e(q,p)); + NDArray rotation(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext()); + + T n = math::nd4j_sqrt(block.t(p, p) * block.t(p, p) + block.t(q, p)*block.t(q, p)); const T almostZero = DataTypeUtils::min(); const T precision = DataTypeUtils::eps(); if(n == (T)0.f) { - block.p(p, p, 0.f); - block.p(q, p, 0.f); + block.r(p, p) = (T)0; + block.r(q, p) = (T)0; } else { - T v = block.e(p, p) / n; + T v = block.t(p, p) / n; - rotation.p(0, 0, v); - rotation.p(1,1, v); + rotation.r(0,0) = rotation.r(1,1) = v; - v = block.e(q,p) / n; - rotation.p(0, 1, v); + v = block.t(q, p) / n; + rotation.r(0,1) = v; - rotation.p(1,0, -rotation.template e(0, 1)); + rotation.r(1,0) = -rotation.template t(0,1); mulRotationOnLeft(p, q, block, rotation); - if(_calcU) { - auto temp2 = rotation.transpose(); - mulRotationOnRight(p, q, _u, temp2); - } + if(_calcU) + mulRotationOnRight(p, q, _u, rotation.transpose()); } - maxElem = math::nd4j_max(maxElem, math::nd4j_max(math::nd4j_abs(block.e(p,p)), math::nd4j_abs(block.e(q,q)))); + maxElem = math::nd4j_max(maxElem, math::nd4j_max(math::nd4j_abs(block.t(p, p)), math::nd4j_abs(block.t(q, q)))); T threshold = math::nd4j_max(almostZero, precision * maxElem); - const bool condition1 = math::nd4j_abs(block.e(p,q)) > threshold; - const bool condition2 = math::nd4j_abs(block.e(q,p)) > threshold; - return condition1 || condition2; + return math::nd4j_abs(block.t(p, q)) > threshold || math::nd4j_abs(block.t(q, p)) > threshold; } ////////////////////////////////////////////////////////////////////////// template bool JacobiSVD::createJacobiRotation(const T& x, const T& y, const T& z, NDArray& rotation) { - T denom = 2.* math::nd4j_abs(y); + T denom = (T)(2.f)* math::nd4j_abs(y); if(denom < DataTypeUtils::min()) { - rotation.p(0,0, 1.f); - rotation.p(1,1, 1.f); - rotation.p(0,1, 0.f); - rotation.p(1,0, 0.f); + rotation.r(0,0) = rotation.r(1,1) = (T)1.f; + rotation.r(0,1) = rotation.r(1,0) = (T)0.f; + return false; } else { T tau = (x-z)/denom; - T w = math::nd4j_sqrt(tau*tau + 1.); + T w = math::nd4j_sqrt(tau*tau + (T)1.f); T t; if(tau > (T)0.) - t = 1. / (tau + w); + t = (T)1.f / (tau + w); else - t = 1. / (tau - w); + t = (T)1.f / (tau - w); - T sign = t > (T)0. ? 1. : -1.; - T n = 1. / math::nd4j_sqrt(t*t + 1.f); - rotation.p(0,0, n); - rotation.p(1,1, n); + T sign = t > (T)0. ? (T)1.f : (T)-1.f; - rotation.p(0,1, -sign * (y / math::nd4j_abs(y)) * math::nd4j_abs(t) * n); - rotation.p(1,0, -rotation.e(0,1)); + T cos = (T)1.f / math::nd4j_sqrt(t*t + (T)1.f); + T sin = -sign * (y / math::nd4j_abs(y)) * math::nd4j_abs(t) * cos; + + rotation.r(0,1) = sin; + rotation.r(1,0) = -sin; + rotation.r(0,0) = rotation.r(1,1) = cos; return true; } } + +////////////////////////////////////////////////////////////////////////// +template +void JacobiSVD::createJacobiRotationGivens(const T& p, const T& q, NDArray& rotation) { + + T cos, sin; + + if(q == (T)0) { + + cos = p < (T)0 ? (T)-1 : (T)1; + sin = (T)0; + } + else if(p == (T)0) { + + cos = (T)0; + sin = q < (T)0 ? (T)1 : (T)-1; + } + else if(math::nd4j_abs(p) > math::nd4j_abs(q)) { + + T t = q / p; + T u = math::nd4j_sqrt((T)1 + t*t); + if(p < (T)0) + u = -u; + cos = (T)1 / u; + sin = -t * cos; + } + else { + T t = p / q; + T u = math::nd4j_sqrt((T)1 + t*t); + if(q < (T)0) + u = -u; + sin = -(T)1 / u; + cos = -t * sin; + } + + rotation.r(0,1) = sin; + rotation.r(1,0) = -sin; + rotation.r(0,0) = rotation.r(1,1) = cos; +} + + ////////////////////////////////////////////////////////////////////////// template void JacobiSVD::svd2x2(const NDArray& block, int p, int q, NDArray& left, NDArray& right) { - auto m = NDArrayFactory::create(block.ordering(), {2, 2}, block.dataType(), block.getContext()); - m.p(0,0, block.e(p,p)); - m.p(0,1, block.e(p,q)); - m.p(1,0, block.e(q,p)); - m.p(1,1, block.e(q,q)); + NDArray m(block.ordering(), {2, 2}, block.dataType(), block.getContext()); + m.r(0,0) = block.t(p,p); + m.r(0,1) = block.t(p,q); + m.r(1,0) = block.t(q,p); + m.r(1,1) = block.t(q,q); - auto rotation = NDArrayFactory::create(block.ordering(), {2, 2}, block.dataType(), block.getContext()); - T t = m.e(0,0) + m.e(1,1); - T d = m.e(1,0) - m.e(0,1); + NDArray rotation(block.ordering(), {2, 2}, block.dataType(), block.getContext()); + T t = m.t(0,0) + m.t(1,1); + T d = m.t(1,0) - m.t(0,1); if(math::nd4j_abs(d) < DataTypeUtils::min()) { - rotation.p(0,0, 1.f); - rotation.p(1,1, 1.f); - rotation.p(0,1, 0.f); - rotation.p(1,0, 0.f); + rotation.r(0,0) = rotation.r(1,1) = (T)1; + rotation.r(0,1) = rotation.r(1,0) = (T)0; } else { T u = t / d; - T tmp = math::nd4j_sqrt(1. + u*u); - rotation.p(0,0, u / tmp); - rotation.p(1,1, u / tmp); - rotation.p(0,1, 1.f / tmp); - rotation.p(1,0, -rotation.e(0,1)); + T tmp = math::nd4j_sqrt((T)1.f + u*u); + rotation.r(0,0) = rotation.r(1,1) = u / tmp; + rotation.r(0,1) = (T)1.f / tmp; + rotation.r(1,0) = -rotation.t(0,1); } m.assign(mmul(rotation, m)); - auto _x = m.e(0,0); - auto _y = m.e(0,1); - auto _z = m.e(1,1); + createJacobiRotation(m.t(0,0), m.t(0,1), m.t(1,1), right); - createJacobiRotation(_x, _y, _z, right); - - m.p(0, 0, _x); - m.p(0, 1, _y); - m.p(1, 1, _z); - - auto temp = right.transpose(); - left.assign(mmul(rotation, temp)); + left.assign(mmul(rotation, right.transpose())); } @@ -261,7 +291,7 @@ void JacobiSVD::evalData(const NDArray& matrix) { const T precision = (T)2.f * DataTypeUtils::eps(); const T almostZero = DataTypeUtils::min(); - T scale = matrix.reduceNumber(reduce::AMax).e(0); + T scale = matrix.reduceNumber(reduce::AMax).template t(0); if(scale== (T)0.f) scale = (T)1.f; @@ -285,13 +315,12 @@ void JacobiSVD::evalData(const NDArray& matrix) { } else if(_rows < _cols) { - auto matrixT = matrix.transpose(); - HHcolPivQR qr(matrixT / scale); + HHcolPivQR qr(matrix.transpose() / scale); _m.assign(qr._qr({0,_rows, 0,_rows})); _m.fillAsTriangular(0., 0, 0, _m, 'l'); _m.transposei(); - HHsequence hhSeg(qr._qr, qr._coeffs, 'u'); // type = 'u' is not mistake here ! + HHsequence hhSeg(qr._qr, qr._coeffs, 'u'); // type = 'u' is not mistake here ! if(_fullUV) hhSeg.applyTo(_v); @@ -305,7 +334,7 @@ void JacobiSVD::evalData(const NDArray& matrix) { } else { - _m.assign(static_cast(matrix({0,_diagSize, 0,_diagSize})) / scale); + _m.assign(matrix({0,_diagSize, 0,_diagSize}) / scale); if(_calcU) _u.setIdentity(); @@ -316,7 +345,7 @@ void JacobiSVD::evalData(const NDArray& matrix) { T maxDiagElem = 0.; for(int i = 0; i < _diagSize; ++i) { - T current = math::nd4j_abs(_m.e(i,i)); + T current = math::nd4j_abs(_m.t(i,i)); if(maxDiagElem < current ) maxDiagElem = current; } @@ -333,29 +362,27 @@ void JacobiSVD::evalData(const NDArray& matrix) { T threshold = math::nd4j_max(almostZero, precision * maxDiagElem); - if(math::nd4j_abs(_m.e(p,q)) > threshold || math::nd4j_abs(_m.e(q,p)) > threshold){ + if(math::nd4j_abs(_m.t(p,q)) > threshold || math::nd4j_abs(_m.t(q,p)) > threshold){ stop = false; // if(isBlock2x2NotDiag(_m, p, q, maxDiagElem)) { - auto rotLeft = NDArrayFactory::create(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext()); - auto rotRight = NDArrayFactory::create(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext()); + NDArray rotLeft(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext()); + NDArray rotRight(_m.ordering(), {2, 2}, _m.dataType(), _m.getContext()); svd2x2(_m, p, q, rotLeft, rotRight); mulRotationOnLeft(p, q, _m, rotLeft); - if(_calcU) { - auto temp = rotLeft.transpose(); - mulRotationOnRight(p, q, _u, temp); - } + if(_calcU) + mulRotationOnRight(p, q, _u, rotLeft.transpose()); mulRotationOnRight(p, q, _m, rotRight); if(_calcV) mulRotationOnRight(p, q, _v, rotRight); - maxDiagElem = math::nd4j_max(maxDiagElem, math::nd4j_max(math::nd4j_abs(_m.e(p,p)), math::nd4j_abs(_m.e(q,q)))); + maxDiagElem = math::nd4j_max(maxDiagElem, math::nd4j_max(math::nd4j_abs(_m.t(p,p)), math::nd4j_abs(_m.t(q,q)))); } } } @@ -363,8 +390,10 @@ void JacobiSVD::evalData(const NDArray& matrix) { } for(int i = 0; i < _diagSize; ++i) { - _s.p(i, math::nd4j_abs(_m.e(i,i))); - if(_calcU && _m.e(i,i) < (T)0.) { + + _s.r(i) = math::nd4j_abs(_m.t(i,i)); + + if(_calcU && _m.t(i,i) < (T)0.) { auto temp = _u({0,0, i,i+1}, true); temp.applyTransform(transform::Neg, temp, nullptr); } @@ -375,7 +404,7 @@ void JacobiSVD::evalData(const NDArray& matrix) { for(int i = 0; i < _diagSize; i++) { int pos = (_s({i,-1, 0,0}).indexReduceNumber(indexreduce::IndexMax, nullptr)).template e(0); - T maxSingVal = _s({i,-1, 0,0}).reduceNumber(reduce::Max).template e(0); + T maxSingVal = _s({i,-1, 0,0}).reduceNumber(reduce::Max).template t(0); if(maxSingVal == (T)0.) break; @@ -384,34 +413,24 @@ void JacobiSVD::evalData(const NDArray& matrix) { pos += i; - T _e0 = _s.e(i); - T _e1 = _s.e(pos); - _s.p(pos, _e0); - _s.p(i, _e1); - //math::nd4j_swap(_s(i), _s(pos)); + math::nd4j_swap(_s.r(i), _s.r(pos)); if(_calcU) { auto temp1 = _u({0,0, pos,pos+1}, true); auto temp2 = _u({0,0, i,i+1}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); + temp1.swapUnsafe(temp2); } if(_calcV) { auto temp1 = _v({0,0, pos, pos+1}, true); auto temp2 = _v({0,0, i, i+1}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); + temp1.swapUnsafe(temp2); } } } } - - template class ND4J_EXPORT JacobiSVD; template class ND4J_EXPORT JacobiSVD; template class ND4J_EXPORT JacobiSVD; diff --git a/libnd4j/include/helpers/jacobiSVD.h b/libnd4j/include/helpers/jacobiSVD.h index f6f161bbb..615811e9a 100644 --- a/libnd4j/include/helpers/jacobiSVD.h +++ b/libnd4j/include/helpers/jacobiSVD.h @@ -31,13 +31,13 @@ namespace helpers { template class JacobiSVD { - public: + public: NDArray _m; NDArray _s; // vector with singular values NDArray _u; NDArray _v; - + int _diagSize; int _rows; int _cols; @@ -52,7 +52,8 @@ class JacobiSVD { bool isBlock2x2NotDiag(NDArray& block, int p, int q, T& maxElem); static bool createJacobiRotation(const T& x, const T& y, const T& z, NDArray& rotation); - + static void createJacobiRotationGivens(const T& p, const T& q, NDArray& rotation); + static void svd2x2(const NDArray& block, int p, int q, NDArray& left, NDArray& right); static void mulRotationOnLeft(const int i, const int j, NDArray& block, const NDArray& rotation); diff --git a/libnd4j/include/helpers/shape.h b/libnd4j/include/helpers/shape.h index 8cde62ea1..65cf29b66 100644 --- a/libnd4j/include/helpers/shape.h +++ b/libnd4j/include/helpers/shape.h @@ -528,7 +528,7 @@ namespace shape { * Returns the element wise stride for this information * buffer */ - ND4J_EXPORT _CUDA_HD Nd4jLong elementWiseStride(const Nd4jLong *buffer); + ND4J_EXPORT _CUDA_HD Nd4jLong elementWiseStride(const Nd4jLong *shapeInfo); /** diff --git a/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu b/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu index 334584fab..6d2bcadf5 100644 --- a/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu +++ b/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu @@ -31,23 +31,37 @@ namespace sd { auto tid = blockIdx.x * blockDim.x + threadIdx.x; int totalThreads = gridDim.x * blockDim.x; - __shared__ Nd4jLong resultLength; + __shared__ Nd4jLong resultLength, xEws, yEws; + __shared__ bool sameOffsets, sameOrders; __shared__ T* input; __shared__ T* output; + if (0 == threadIdx.x) { resultLength = shape::length(theFirstShape); input = reinterpret_cast(theSecondBuffer); output = reinterpret_cast(theFirstBuffer); + + sameOffsets = shape::haveSameShapeAndStrides(theFirstShape, theSecondShape); + sameOrders = shape::order(theFirstShape) == shape::order(theSecondShape); + + xEws = shape::elementWiseStride(theFirstShape); + yEws = shape::elementWiseStride(theSecondShape); } __syncthreads(); for (int i = tid; i < resultLength; i += totalThreads) { - auto xEws = shape::order(theFirstShape) == 'c'? shape::elementWiseStride(theFirstShape) :1; - auto yEws = shape::order(theSecondShape) == 'c'? shape::elementWiseStride(theSecondShape):1; - - auto xOffset = shape::getIndexOffset(i * xEws, theFirstShape); - auto yOffset = shape::getIndexOffset(i * yEws, theSecondShape); - sd::math::nd4j_swap(output[xOffset], input[yOffset]); + if(sameOrders && xEws > 0 && yEws > 0) { + sd::math::nd4j_swap(output[i*xEws], input[i*yEws]); + } + else if(sameOffsets) { + const auto offset = shape::getIndexOffset(i, theFirstShape); + sd::math::nd4j_swap(output[offset], input[offset]); + } + else{ + const auto xOffset = shape::getIndexOffset(i, theFirstShape); + const auto yOffset = shape::getIndexOffset(i, theSecondShape); + sd::math::nd4j_swap(output[xOffset], input[yOffset]); + } } } diff --git a/libnd4j/include/ops/declarable/generic/linalg/sqrtm.cpp b/libnd4j/include/ops/declarable/generic/linalg/sqrtm.cpp new file mode 100644 index 000000000..37472008d --- /dev/null +++ b/libnd4j/include/ops/declarable/generic/linalg/sqrtm.cpp @@ -0,0 +1,53 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#include +#if NOT_EXCLUDED(OP_sqrtm) +#include +#include + + +namespace sd { +namespace ops { + +CONFIGURABLE_OP_IMPL(sqrtm, 1, 1, false, 0, 0) { + + auto input = INPUT_VARIABLE(0); + auto output = OUTPUT_VARIABLE(0); + + REQUIRE_TRUE(input->rankOf() > 1, 0, "CONFIGURABLE_OP sqrtm: input array rank is required to be > 1, but got %i instead !", input->rankOf()); + REQUIRE_TRUE(input->sizeAt(-2) == input->sizeAt(-1), 0, "CONFIGURABLE_OP sqrtm: two last dimensions of input array should be square matrices, but got such wrong shape instead: %s!", ShapeUtils::shapeAsString(input).c_str()); + + helpers::sqrtm(block.launchContext(), input, output); + + return Status::OK(); +} + +////////////////////////////////////////////////////////////////////////// +DECLARE_TYPES(sqrtm) { + getOpDescriptor()->setAllowedInputTypes(sd::DataType::ANY)->setAllowedOutputTypes({ALL_FLOATS}); +} + + + +} +} + +#endif \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/generic/blas/svd.cpp b/libnd4j/include/ops/declarable/generic/linalg/svd.cpp similarity index 100% rename from libnd4j/include/ops/declarable/generic/blas/svd.cpp rename to libnd4j/include/ops/declarable/generic/linalg/svd.cpp diff --git a/libnd4j/include/ops/declarable/generic/linalg/triangular_solve.cpp b/libnd4j/include/ops/declarable/generic/linalg/triangular_solve.cpp index c9d23753c..49ec1e135 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/triangular_solve.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/triangular_solve.cpp @@ -55,13 +55,13 @@ namespace sd { isLower = !isLower; }; - auto res = helpers::triangularSolveFunctor(block.launchContext(), input, b, isLower, useAdjoint, z); + auto res = helpers::triangularSolveFunctor(block.launchContext(), input, b, isLower, false, z); if (input != a) delete input; return Status::OK(); } - + DECLARE_SHAPE_FN(triangular_solve) { auto in0 = inputShape->at(1); auto in1 = inputShape->at(1); diff --git a/libnd4j/include/ops/declarable/headers/blas.h b/libnd4j/include/ops/declarable/headers/blas.h index 09215e113..6fd5a3894 100644 --- a/libnd4j/include/ops/declarable/headers/blas.h +++ b/libnd4j/include/ops/declarable/headers/blas.h @@ -24,7 +24,7 @@ namespace sd { namespace ops { - + /** * This op is general matmum implementation. Depending on inputs dimensionality output result might be different. * matrix x matrix = BLAS gemm @@ -75,11 +75,11 @@ namespace sd { * alpha: vector of T * beta: vector of T * ...: A, B matrices sequentially. i.e: AAAAABBBBB - * + * * Integer arguments: * transA, transB, M, N, K, ldA, ldB, ldC - usual BLAS gemm arguments * batchCount - number of operations in this batch - * + * * PLEASE NOTE: M, N, K, ldA, ldB, ldC should be equal for all matrices within batch. */ #if NOT_EXCLUDED(OP_batched_gemm) @@ -88,25 +88,39 @@ namespace sd { /** * performs singular value decomposition (SVD) of one or more matrices, evaluates the SVD of each inner-most 2D matrix in input array: - * x[..., :, :] = u[..., :, :] * s[...,:] * transpose(v[..., :, :]) + * x[..., :, :] = u[..., :, :] * s[...,:] * transpose(v[..., :, :]) * * Input array: * x[..., Rows, Cols], the necessary condition is: rank of x >= 2 - * + * * Outputs arrays: * s[..., diagSize] - array with singular values which are stored in decreasing order, diagSize is smaller among Rows and Cols * u[..., Rows, Rows] if IArgs[1] is true, else u[..., Rows, diagSize] - array with right singular vectors * v[..., Cols, Cols] if IArgs[1] is true, else v[..., Cols, diagSize] - array with left singular vectors - * + * * Integer arguments: * IArgs[0] - bool, whether to calculate u and v, s is calculated in any case * IArgs[1] - bool, whether to calculate full-sized u and v * IArgs[2] - the number of cols or rows which determines what algorithm to use. More precisely: * if diagSize < IArgs[2] then Jacobi algorithm is used, in opposite case the Divide-And-Conquer is applied - * Recommended value is 16. + * Recommended value is 16. */ #if NOT_EXCLUDED(OP_svd) - DECLARE_CUSTOM_OP(svd, 1, 1, false, 0, 3); + DECLARE_CUSTOM_OP(svd, 1, 1, false, 0, 3); + #endif + + /** + * calculates square root of matrix such that + * x[..., M, M] = z[..., M, M] x z[..., M, M] + * + * Input array: + * x[..., M, M], the necessary condition is: rank of x >= 2 and equality of last two dimensions + * + * Outputs arrays: + * z - same shape as x + */ + #if NOT_EXCLUDED(OP_sqrtm) + DECLARE_CONFIGURABLE_OP(sqrtm, 1, 1, false, 0, 0); #endif } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/betaInc.cpp b/libnd4j/include/ops/declarable/helpers/cpu/betaInc.cpp index ec06610b8..0056fec6d 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/betaInc.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/betaInc.cpp @@ -118,7 +118,7 @@ static void betaIncForArray(sd::LaunchContext * context, const NDArray& a, const auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) - output.t(i) = betaIncCore(a.t(i), b.t(i), x.t(i)); + output.r(i) = betaIncCore(a.t(i), b.t(i), x.t(i)); }; samediff::Threads::parallel_for(func, 0, xLen); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/extract_patches.cpp b/libnd4j/include/ops/declarable/helpers/cpu/extract_patches.cpp index 15ea569e8..ba04fd9aa 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/extract_patches.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/extract_patches.cpp @@ -73,7 +73,7 @@ namespace helpers { bool setUp = (theSame && row >= 0 && col >= 0 && row < rowDim && col < colDim) || (!theSame); if (setUp) { - outMatrix->t(i, j, pos) = patch->e(row, col, pixel); + outMatrix->r(i, j, pos) = patch->e(row, col, pixel); } pos++; } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp index d2c918da9..7317f8a73 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp @@ -73,7 +73,7 @@ namespace helpers { else if (val >= nudged_max) val = nudged_max; // quantization itself - output->t(e + i) = math::nd4j_floor((val - nudged_min)/scale + T(0.5)) * scale + nudged_min; + output->r(e + i) = math::nd4j_floor((val - nudged_min)/scale + T(0.5)) * scale + nudged_min; } } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp b/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp index 2f0f00779..68b2130ac 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/image_resize.cpp @@ -318,7 +318,7 @@ namespace helpers { } // copy pixel over all channels for (Nd4jLong e = 0; e < channels; e++) - output->t(b, y, x, e) = images->t(b, inY, inX, e); + output->r(b, y, x, e) = images->t(b, inY, inX, e); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp index 675fb2794..204b05530 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp @@ -40,7 +40,7 @@ namespace helpers { for (auto x = 0; x < lastDims.size(); x++) { for (auto r = 0; r < rows; r++) { - lastDims[x]->t(r,r) = (T)value; + lastDims[x]->r(r,r) = (T)value; } } @@ -71,7 +71,7 @@ namespace helpers { if (err) return err; // alternate moment: inverse lower triangular matrix to solve equation A'x = b' => L^Tx = L^-1 * b' // solve one upper triangular system (to avoid float problems) - + // 5. Solve two triangular systems: auto rightB = rightOutput.ulike(); helpers::triangularSolveFunctor(context, &leftOutput, &rightOutput, true, false, &rightB); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp index 0f435cfdb..482709455 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp @@ -34,7 +34,7 @@ namespace helpers { if (theFirst != theSecond) for (int i = 0; i < matrix->columns(); i++) { - math::nd4j_swap(matrix->t(theFirst, i), matrix->t(theSecond, i)); + math::nd4j_swap(matrix->r(theFirst, i), matrix->r(theSecond, i)); } } BUILD_SINGLE_TEMPLATE(template void swapRows_, (NDArray* matrix, int theFirst, int theSecond), FLOAT_TYPES); @@ -71,12 +71,12 @@ namespace helpers { auto invertDiagonals = PRAGMA_THREADS_FOR { for (int i = start; i < stop; i += increment) - invertedMatrix->t(i, i) /= inputMatrix->t(i, i); + invertedMatrix->r(i, i) /= inputMatrix->t(i, i); }; auto invertSubDiagonals = PRAGMA_THREADS_FOR { for (int i = start; i < stop; i += increment) - invertedMatrix->t(i, i - 1) -= (inputMatrix->t(i, i - 1) * invertedMatrix->t(i - 1, i - 1) / inputMatrix->t(i, i)); + invertedMatrix->r(i, i - 1) -= (inputMatrix->t(i, i - 1) * invertedMatrix->t(i - 1, i - 1) / inputMatrix->t(i, i)); }; samediff::Threads::parallel_for(invertDiagonals, 0, n, 1); @@ -86,7 +86,7 @@ namespace helpers { for (int i = 1; i < n; i++) { for (int j = 0; j < i - 1 ; j++) for (int k = 0; k < i; k++) - invertedMatrix->t(i, j) -= ((invertedMatrix->t(k, j) * inputMatrix->t(i, k) / inputMatrix->t(i, i))); + invertedMatrix->r(i, j) -= ((invertedMatrix->t(k, j) * inputMatrix->t(i, k) / inputMatrix->t(i, i))); } } @@ -108,13 +108,13 @@ namespace helpers { auto invertDiagonals = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i += increment) - invertedMatrix->t(i, i) /= inputMatrix->t(i, i); + invertedMatrix->r(i, i) /= inputMatrix->t(i, i); }; //PRAGMA_OMP_PARALLEL_FOR_IF(n > Environment::getInstance()->elementwiseThreshold()) auto invertUpDiagonals = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i += increment) - invertedMatrix->t(i, i + 1) -= (inputMatrix->t(i, i + 1) * invertedMatrix->t(i + 1, i + 1) / + invertedMatrix->r(i, i + 1) -= (inputMatrix->t(i, i + 1) * invertedMatrix->t(i + 1, i + 1) / inputMatrix->t(i, i)); }; @@ -125,7 +125,7 @@ namespace helpers { for (auto i = n - 2; i >= 0; i--) { for (auto j = i + 2; j < n; j++) for (auto k = i; k < n; k++) - invertedMatrix->t(i, j) -= ((invertedMatrix->t(k, j) * inputMatrix->t(i, k) / inputMatrix->t(i, i))); + invertedMatrix->r(i, j) -= ((invertedMatrix->t(k, j) * inputMatrix->t(i, k) / inputMatrix->t(i, i))); } } @@ -169,10 +169,10 @@ namespace helpers { swapCount++; for( int j = i + 1; j < rowNum; j++ ) { - compoundMatrix.t(j, i) /= compoundMatrix.t(i, i); + compoundMatrix.r(j, i) /= compoundMatrix.t(i, i); //PRAGMA_OMP_PARALLEL_FOR for( int k = i + 1; k < rowNum; k++ ) { - compoundMatrix.t(j, k) -= compoundMatrix.t(j, i) * compoundMatrix.t(i, k); + compoundMatrix.r(j, k) -= compoundMatrix.t(j, i) * compoundMatrix.t(i, k); } } } @@ -190,7 +190,7 @@ namespace helpers { for (auto i = 0; i < rowNum; i++) { for (auto j = 0; j < columnNum; j++) { if (permutationMatrix.t(i, j) != 0) { - permutaionVector.template t(i) = j; + permutaionVector.template r(i) = j; } } } @@ -268,7 +268,7 @@ namespace helpers { sum += compound->t(i,j) * compound->t(j,k); // Evaluating U(i, k) - compound->t(i, k) = input.t(i, k) - sum; + compound->r(i, k) = input.t(i, k) - sum; } // Lower Triangular @@ -279,7 +279,7 @@ namespace helpers { sum += compound->t(k,j) * compound->t(j, i); // Evaluating L(k, i) - compound->t(k, i) = (input.t(k, i) - sum) / compound->t(i,i); + compound->r(k, i) = (input.t(k, i) - sum) / compound->t(i,i); } } } @@ -412,12 +412,12 @@ template lowerMatrix.setIdentity(); // set up U to identity matrix for (int k = 1; k < n; k++) { // and then put all values under main diagonal on to it for (int j = 0; j < k; j++) - lowerMatrix.template t(k, j) = compound.template t(k, j); + lowerMatrix.template r(k, j) = compound.template t(k, j); } upperMatrix.setIdentity(); // set up U to identity matrix for (int k = 0; k < n; k++) { // and then put all values under main diagonal on to it for (int j = k; j < n; j++) - upperMatrix.template t(k, j) = compound.template e(k, j); + upperMatrix.template r(k, j) = compound.template t(k, j); } invertUpperMatrix(&upperMatrix, &matrix); @@ -426,7 +426,7 @@ template sd::MmulHelper::mmul(&matrix, &upperMatrix, &compound, 1.0, 0.0); sd::MmulHelper::mmul(&compound, &permutation, &matrix, 1.0, 0.0); for (int k = e * n2, row = 0; k < (e + 1) * n2; k++) { - output->t(k) = matrix.template t(row++); + output->r(k) = matrix.template t(row++); } } @@ -470,7 +470,7 @@ template invertLowerMatrix(&matrix, &lowerMatrix); for (int k = e * n2, row = 0; k < (e + 1) * n2; k++) { - output->t(k) = lowerMatrix.template t(row++); + output->r(k) = lowerMatrix.template t(row++); } } @@ -597,7 +597,7 @@ template for (Nd4jLong e = 0; e < totalCount; e++) { for (size_t i = 0; i < n; ++i) - output->t(e) += sd::math::nd4j_log(sd::math::nd4j_pow(matricies.at(e)->t(i, i), T(2))); + output->r(e) += sd::math::nd4j_log(sd::math::nd4j_pow(matricies.at(e)->t(i, i), T(2))); } return ND4J_STATUS_OK; } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp b/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp index d748aa6b0..2a0c5af95 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp @@ -47,8 +47,8 @@ static void mergeMaxIndex_(const std::vector& inArrs, NDArray& o idx = static_cast(i); } } - // FIXME, use .r(e) - output.t(e) = static_cast(idx); + + output.r(e) = static_cast(idx); } }; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/random.cpp b/libnd4j/include/ops/declarable/helpers/cpu/random.cpp index 1e96211b3..b0e1553e4 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/random.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/random.cpp @@ -68,7 +68,7 @@ namespace helpers { beta != nullptr ? copyBeta->t(e) * u : u); } else { - output->t(pos + e) = math::nd4j_igamma(copyAlpha->t(e), + output->r(pos + e) = math::nd4j_igamma(copyAlpha->t(e), beta != nullptr ? copyBeta->t(e) * u : u); } } @@ -121,7 +121,7 @@ namespace helpers { if (directOut) outputBuf[pos + e] = x; else - output->t(pos + e) = x; + output->r(pos + e) = x; } } } @@ -146,7 +146,7 @@ namespace helpers { else { PRAGMA_OMP_PARALLEL_FOR for (Nd4jLong i = 0; i < output->lengthOf(); i++) { - output->t(i) = rng.relativeT(i, minVal, maxVal); + output->r(i) = rng.relativeT(i, minVal, maxVal); } } } @@ -159,12 +159,12 @@ namespace helpers { // methods: gumbel trick + softmax + argmax template void fillRandomMultiNomial_(LaunchContext* context, graph::RandomGenerator& rng, NDArray& input, NDArray& output, const Nd4jLong numOfSamples, const int dimC) { - + const Tx* x = input.bufferAsT(); Tz* z = output.bufferAsT(); - + Tx minVal = DataTypeUtils::min(); - Tx maxVal = 1.0; + Tx maxVal = 1.0; auto dimA = (0 == dimC) ? 1 : 0; const Nd4jLong batchValue = output.sizeAt(dimC); @@ -178,7 +178,7 @@ namespace helpers { auto func = PRAGMA_THREADS_FOR_2D{ for (auto nBatchIndex = start_x; nBatchIndex < stop_x; nBatchIndex += inc_x) { for (auto nSampleIndexInBatch = start_y; nSampleIndexInBatch < stop_y; nSampleIndexInBatch += inc_y) { - + const Tx* xTad = x + (nBatchIndex * xDimCstride); Tz* zTad = z + (nBatchIndex * zDimCstride); Tz& arg = zTad[nSampleIndexInBatch * zDimAstride]; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp b/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp index 2e336da23..a7f40899a 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp @@ -54,8 +54,8 @@ void randomShuffle_(NDArray& input, NDArray& output, sd::graph::RandomGenerator& T t0 = input.t(i); T t1 = input.t(r); //math::nd4j_swap(input(i), input(r)); - input.t(i) = t1; - input.t(r) = t0; + input.r(i) = t1; + input.r(r) = t0; } } else { @@ -66,11 +66,11 @@ void randomShuffle_(NDArray& input, NDArray& output, sd::graph::RandomGenerator& // FIXME: parallelism!! for(int i = firstDim-1; i > 0; --i) { int r = rng.relativeInt(i) % i; - output.t(i) = input.t(indices[r]); + output.r(i) = input.t(indices[r]); if(i == r) continue; - output.t(r) = input.t(indices[i]); + output.r(r) = input.t(indices[i]); math::nd4j_swap(indices[i], indices[r]); } rng.rewindH(firstDim-1); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/segment.cpp b/libnd4j/include/ops/declarable/helpers/cpu/segment.cpp index e57264e66..50ff79679 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/segment.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/segment.cpp @@ -46,7 +46,7 @@ namespace helpers { idx = indices->e(e); val = input->t(e); } - output->t(idx) = val; + output->r(idx) = val; } } else { @@ -65,7 +65,7 @@ namespace helpers { if (indices->e(i) == idx) { for (Nd4jLong e = 0; e < maxT->lengthOf(); e++) { - maxT->t(e) = sd::math::nd4j_max(maxT->t(e), listOfTensors.at(i)->t(e)); + maxT->r(e) = sd::math::nd4j_max(maxT->t(e), listOfTensors.at(i)->t(e)); } } else { @@ -96,7 +96,7 @@ namespace helpers { idx = indices->e(e); val = input->t(e); } - output->t(idx) = val; + output->r(idx) = val; } } else { @@ -417,7 +417,7 @@ namespace helpers { for (size_t idx = 1; idx < fi->second.size(); ++idx) { val = sd::math::nd4j_min(val, input->t(fi->second.at(idx))); } - output->t(fi->first) = val; + output->r(fi->first) = val; } } else { @@ -436,7 +436,7 @@ namespace helpers { auto minT = listOfTensors.at(fi->second.at(idx)); for (Nd4jLong e = 0; e < outputT->lengthOf(); ++e) { - outputT->t(e) = sd::math::nd4j_min(minT->t(e), outputT->t(e)); + outputT->r(e) = sd::math::nd4j_min(minT->t(e), outputT->t(e)); } } //outputT->assign(maxT); @@ -890,7 +890,7 @@ namespace helpers { for (auto e = start; e < stop; e++) { auto classNum = indices->e(e); if (sd::math::nd4j_abs(tempRes.t(classNum) - input->t(e)) < 1.e-6) - output->t(e) = gradOut->t(classNum); + output->r(e) = gradOut->t(classNum); } }; @@ -913,7 +913,7 @@ namespace helpers { for (Nd4jLong e = 0; e < current->lengthOf(); e++) { if (sd::math::nd4j_abs(listOfBPTensors.at(classNum)->t(e) - current->t(e)) < 1.e-6) - currentOut->t(e) = currentGradOut->t(e); + currentOut->r(e) = currentGradOut->t(e); } } //}; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/sequence_mask.cpp b/libnd4j/include/ops/declarable/helpers/cpu/sequence_mask.cpp index 8e25c4690..3c8ce573e 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/sequence_mask.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/sequence_mask.cpp @@ -31,7 +31,7 @@ namespace helpers { for (auto i = start_x; i < stop_x; i += inc_x) for (auto k = start_y; k < stop_y; k += inc_y) if (i < input->t(k)) - output->t(k * maxIndex + i) = B(true); //, T(1.0f)); + output->r(k * maxIndex + i) = B(true); //, T(1.0f)); }; samediff::Threads::parallel_for(func, 0, maxIndex, 1, 0, input->lengthOf(), 1); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/solve.cpp b/libnd4j/include/ops/declarable/helpers/cpu/solve.cpp index 9a06975aa..a0034bb5d 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/solve.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/solve.cpp @@ -43,7 +43,7 @@ namespace helpers { for (auto batch = start; batch < stop; batch++) { for (Nd4jLong r = 0; r < rows; r++) { for (Nd4jLong c = 0; c < r; c++) { - math::nd4j_swap(outputPart[batch]->t(r, c) , outputPart[batch]->t(c, r)); + math::nd4j_swap(outputPart[batch]->r(r, c) , outputPart[batch]->r(c, r)); } } } @@ -67,7 +67,7 @@ namespace helpers { for (auto batch = 0; batch < permutationsPart.size(); ++batch) { for (Nd4jLong row = 0; row < PPart[batch]->rows(); ++row) { - PPart[batch]->t(row, permutationsPart[batch]->t(row)) = T(1.f); + PPart[batch]->r(row, permutationsPart[batch]->t(row)) = T(1.f); } } @@ -78,7 +78,7 @@ namespace helpers { ResultSet leftLowerPart = leftLower.allTensorsAlongDimension({-2, -1}); for (auto i = 0; i < leftLowerPart.size(); i++) { for (Nd4jLong r = 0; r < leftLowerPart[i]->rows(); r++) - leftLowerPart[i]->t(r,r) = (T)1.f; + leftLowerPart[i]->r(r,r) = (T)1.f; } // stage 2: triangularSolveFunctor for Lower with given b helpers::triangularSolveFunctor(context, &leftLower, &rightPermuted, true, false, &rightOutput); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/svd.cpp b/libnd4j/include/ops/declarable/helpers/cpu/svd.cpp index c4f99af3f..6910960ef 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/svd.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/svd.cpp @@ -27,911 +27,6 @@ namespace sd { namespace ops { namespace helpers { - -////////////////////////////////////////////////////////////////////////// -template -SVD::SVD(const NDArray& matrix, const int switchSize, const bool calcU, const bool calcV, const bool fullUV ) { - - if(matrix.rankOf() != 2 || matrix.isScalar()) - throw std::runtime_error("ops::helpers::SVD constructor: input array must be 2D matrix !"); - - const int rows = matrix.sizeAt(0); - const int cols = matrix.sizeAt(1); - - if(cols > rows) { - - _transp = true; - _diagSize = rows; - } - else { - - _transp = false; - _diagSize = cols; - } - - _switchSize = switchSize; - _calcU = calcU; - _calcV = calcV; - _fullUV = fullUV; - - if (_transp) - math::nd4j_swap(_calcU, _calcV); - - _s = NDArrayFactory::create(matrix.ordering(), {_diagSize, 1}, matrix.getContext()); - _m = NDArrayFactory::create(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.getContext()); - _m.assign(0.); - - if (_calcU) - _u = NDArrayFactory::create(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.getContext()); - else - _u = NDArrayFactory::create(matrix.ordering(), {2, _diagSize + 1}, matrix.getContext()); - _u.assign(0.); - - if (_calcV) { - _v = NDArrayFactory::create(matrix.ordering(), {_diagSize, _diagSize}, matrix.getContext()); - _v.assign(0.); - } - - evalData(matrix); -} - -////////////////////////////////////////////////////////////////////////// -template -SVD::SVD(const NDArray& matrix, const int switchSize, const bool calcU, const bool calcV, const bool fullUV, const char t) { - - if(matrix.rankOf() != 2 || matrix.isScalar()) - throw std::runtime_error("ops::helpers::SVD constructor: input array must be 2D matrix !"); - - const int rows = matrix.sizeAt(0); - const int cols = matrix.sizeAt(1); - - if(cols > rows) { - - _transp = true; - _diagSize = rows; - } - else { - - _transp = false; - _diagSize = cols; - } - - _switchSize = switchSize; - _calcU = calcU; - _calcV = calcV; - _fullUV = fullUV; - - if (_transp) - math::nd4j_swap(_calcU, _calcV); - - _s = NDArrayFactory::create(matrix.ordering(), {_diagSize, 1}, matrix.getContext()); - _m = NDArrayFactory::create(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.getContext()); - _m.assign(0.f); - - if (_calcU) - _u = NDArrayFactory::create(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.getContext()); - else - _u = NDArrayFactory::create(matrix.ordering(), {2, _diagSize + 1}, matrix.getContext()); - _u.assign(0.); - - if (_calcV) { - _v = NDArrayFactory::create(matrix.ordering(), {_diagSize, _diagSize}, matrix.getContext()); - _v.assign(0.); - } -} - - -////////////////////////////////////////////////////////////////////////// -template -void SVD::deflation1(int col1, int shift, int ind, int size) { - - if(ind <= 0) - throw std::runtime_error("ops::helpers::SVD::deflation1 method: input int must satisfy condition ind > 0 !"); - - int first = col1 + shift; - T cos = _m.e(first, first); - T sin = _m.e(first+ind, first); - T denom = math::nd4j_sqrt(cos*cos + sin*sin); - - if (denom == (T)0.) { - - _m.p(first+ind, first+ind, 0.f); - return; - } - - cos /= denom; - sin /= denom; - - _m.p(first,first, denom); - _m.p(first+ind, first, 0.f); - _m.p(first+ind, first+ind, 0.f); - - auto rotation = NDArrayFactory::create(_m.ordering(), {2, 2}, _m.getContext()); - rotation.p(0, 0, cos); - rotation.p(0, 1, -sin); - rotation.p(1, 0, sin); - rotation.p(1, 1, cos); - - if (_calcU) { - auto temp = _u({col1,col1+size+1, 0,0}, true); - JacobiSVD::mulRotationOnRight(col1, col1+ind, temp, rotation); - } - else - JacobiSVD::mulRotationOnRight(col1, col1+ind, _u, rotation); -} - -////////////////////////////////////////////////////////////////////////// -template -void SVD::deflation2(int col1U , int col1M, int row1W, int col1W, int ind1, int ind2, int size) { - - if(ind1 >= ind2) - throw std::runtime_error("ops::helpers::SVD::deflation2 method: input intes must satisfy condition ind1 < ind2 !"); - - if(size <= 0) - throw std::runtime_error("ops::helpers::SVD::deflation2 method: input size must satisfy condition size > 0 !"); - - T cos = _m.e(col1M+ind1, col1M); - T sin = _m.e(col1M+ind2, col1M); - T denom = math::nd4j_sqrt(cos*cos + sin*sin); - - if (denom == (T)0.) { - - _m.p(col1M + ind1, col1M + ind1, _m.e(col1M + ind2, col1M + ind2)); - return; - } - - cos /= denom; - sin /= denom; - _m.p(col1M + ind1, col1M, denom); - _m.p(col1M + ind2, col1M + ind2, _m.e(col1M + ind1, col1M + ind1)); - _m.p(col1M + ind2, col1M, 0.f); - - auto rotation = NDArrayFactory::create(_m.ordering(), {2, 2}, _m.getContext()); - rotation.p(0,0, cos); - rotation.p(1,1, cos); - - rotation.p(0,1, -sin); - rotation.p(1,0, sin); - - if (_calcU) { - auto temp = _u({col1U,col1U+size+1, 0,0}, true); - JacobiSVD::mulRotationOnRight(col1U+ind1, col1U+ind2, temp, rotation); - } - else - JacobiSVD::mulRotationOnRight(col1U+ind1, col1U+ind2, _u, rotation); - - if (_calcV) { - auto temp = _v({row1W,row1W+size, 0,0}, true); - JacobiSVD::mulRotationOnRight(col1W+ind1, col1W+ind2, temp, rotation); - } -} - -////////////////////////////////////////////////////////////////////////// -// has effect on block from (col1+shift, col1+shift) to (col2+shift, col2+shift) inclusively -template -void SVD::deflation(int col1, int col2, int ind, int row1W, int col1W, int shift) -{ - - const int len = col2 + 1 - col1; - - auto colVec0 = new NDArray(_m({col1+shift,col1+shift+len, col1+shift,col1+shift+1}, true)); - - auto diagInterval = _m({col1+shift, col1+shift+len, col1+shift,col1+shift+len}, true).diagonal('c'); - - const T almostZero = DataTypeUtils::min(); - T maxElem; - if(len == 1) - maxElem = math::nd4j_abs(diagInterval.template e(0)); - else - maxElem = diagInterval({1,-1, 0,0}, true).reduceNumber(reduce::AMax).template e(0); - T maxElem0 = colVec0->reduceNumber(reduce::AMax).template e(0); - - T eps = math::nd4j_max(almostZero, DataTypeUtils::eps() * maxElem); - T epsBig = (T)8. * DataTypeUtils::eps() * math::nd4j_max(maxElem0, maxElem); - - if(diagInterval.template e(0) < epsBig) - diagInterval.p(Nd4jLong(0), epsBig); - - for(int i=1; i < len; ++i) - if(math::nd4j_abs(colVec0->template e(i)) < eps) - colVec0->p(i, 0.f); - - for(int i=1; i < len; i++) - if(diagInterval.template e(i) < epsBig) { - deflation1(col1, shift, i, len); - for(int i = 0; i < len; ++i) - diagInterval.p(i, _m.e(col1+shift+i,col1+shift+i)); - } - - { - - bool totDefl = true; - for(int i=1; i < len; i++) - if(colVec0->template e(i) >= almostZero) { - totDefl = false; - break; - } - - int* permut = nullptr; - ALLOCATE(permut, _m.getContext()->getWorkspace(), 3*_diagSize, int); - { - permut[0] = 0; - int p = 1; - - for(int i=1; i(diagInterval.template e(i)) < almostZero) - permut[p++] = i; - - int k = 1, m = ind+1; - - for( ; p < len; ++p) { - if(k > ind) - permut[p] = m++; - else if(m >= len) - permut[p] = k++; - else if(diagInterval.template e(k) < diagInterval.template e(m)) - permut[p] = m++; - else - permut[p] = k++; - } - } - - if(totDefl) { - for(int i=1; i(diagInterval.template e(ki)) < almostZero || diagInterval.template e(0) < diagInterval.template e(ki)) - permut[i-1] = permut[i]; - else { - permut[i-1] = 0; - break; - } - } - } - - int *tInd = permut + len; - int *tCol = permut + 2*len; - - for(int m = 0; m < len; m++) { - tCol[m] = m; - tInd[m] = m; - } - - for(int i = totDefl ? 0 : 1; i < len; i++) { - - const int ki = permut[len - (totDefl ? i+1 : i)]; - const int jac = tCol[ki]; - - T _e0 = diagInterval.template e(jac); - //math::nd4j_swap(diagInterval)(i), (*diagInterval)(jac)); - diagInterval.p(jac, diagInterval.template e(i)); - diagInterval.p(i, _e0); - - if(i!=0 && jac!=0) { - _e0 = colVec0->template e(jac); - //math::nd4j_swap((*colVec0)(i), (*colVec0)(jac)); - colVec0->p(jac, colVec0->template e(i)); - colVec0->p(i, _e0); - } - - if (_calcU) { - auto temp1 = _u({col1,col1+len+1, col1+i, col1+i+1}, true); - auto temp2 = _u({col1,col1+len+1, col1+jac,col1+jac+1}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); - } - else { - auto temp1 = _u({0,2, col1+i, col1+i+1}, true); - auto temp2 = _u({0,2, col1+jac, col1+jac+1}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); - } - - if(_calcV) { - auto temp1 = _v({row1W,row1W+len, col1W+i, col1W+i+1}, true); - auto temp2 = _v({row1W,row1W+len, col1W+jac, col1W+jac+1}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); - } - - const int tI = tInd[i]; - tCol[tI] = jac; - tCol[ki] = i; - tInd[jac] = tI; - tInd[i] = ki; - } - - RELEASE(permut, _m.getContext()); - } - - { - int i = len-1; - - while(i > 0 && (math::nd4j_abs(diagInterval.template e(i)) < almostZero || math::nd4j_abs(colVec0->template e(i)) < almostZero)) - --i; - - for(; i > 1; --i) { - if( (diagInterval.template e(i) - diagInterval.template e(i-1)) < DataTypeUtils::eps()*maxElem ) { - if (math::nd4j_abs(diagInterval.template e(i) - diagInterval.template e(i-1)) >= epsBig) - throw std::runtime_error("ops::helpers::SVD::deflation: diagonal elements are not properly sorted !"); - deflation2(col1, col1 + shift, row1W, col1W, i-1, i, len); - } - } - } - - delete colVec0; -} - - -////////////////////////////////////////////////////////////////////////// -template -T SVD::secularEq(const T diff, const NDArray& col0, const NDArray& diag, const NDArray& permut, const NDArray& diagShifted, const T shift) { - - auto len = permut.lengthOf(); - T res = 1.; - T item; - for(int i=0; i(i); - item = col0.e(j) / ((diagShifted.e(j) - diff) * (diag.e(j) + shift + diff)); - res += item * col0.e(j); - } - - return res; -} - - -////////////////////////////////////////////////////////////////////////// -template -void SVD::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArray& permut, NDArray& singVals, NDArray& shifts, NDArray& mus) { - - auto len = col0.lengthOf(); - auto curLen = len; - - while(curLen > 1 && col0.e(curLen-1) == (T)0.f) - --curLen; - - for (int k = 0; k < len; ++k) { - - if (col0.e(k) == (T)0.f || curLen==1) { - - singVals.p(k, k==0 ? col0.e(0) : diag.e(k)); - mus.p(k, 0.f); - shifts.p(k, k==0 ? col0.e(0) : diag.e(k)); - continue; - } - - T left = diag.e(k); - T right; - - if(k==curLen-1) - right = diag.e(curLen-1) + col0.reduceNumber(reduce::Norm2).e(0); - else { - - int l = k+1; - while(col0.e(l) == (T)0.f) { - ++l; - if(l >= curLen) - throw std::runtime_error("ops::helpers::SVD::calcSingVals method: l >= curLen !"); - } - - right = diag.e(l); - } - - T mid = left + (right - left) / (T)2.; - T fMid = secularEq(mid, col0, diag, permut, diag, 0.); - T shift = (k == curLen-1 || fMid > (T)0.) ? left : right; - - auto diagShifted = diag - shift; - - T muPrev, muCur; - if (shift == left) { - muPrev = (right - left) * 0.1; - if (k == curLen-1) - muCur = right - left; - else - muCur = (right - left) * 0.5; - } - else { - muPrev = -(right - left) * 0.1; - muCur = -(right - left) * 0.5; - } - - T fPrev = secularEq(muPrev, col0, diag, permut, diagShifted, shift); - T fCur = secularEq(muCur, col0, diag, permut, diagShifted, shift); - - if (math::nd4j_abs(fPrev) < math::nd4j_abs(fCur)) { - math::nd4j_swap(fPrev, fCur); - math::nd4j_swap(muPrev, muCur); - } - - bool useBisection = fPrev * fCur > (T)0.; - while (fCur != (T).0 && - math::nd4j_abs(muCur - muPrev) > (T)8. * DataTypeUtils::eps() * math::nd4j_max(math::nd4j_abs(muCur), math::nd4j_abs(muPrev)) - && math::nd4j_abs(fCur - fPrev) > DataTypeUtils::eps() && !useBisection) { - - T a = (fCur - fPrev) / ((T)1./muCur - (T)1./muPrev); - T jac = fCur - a / muCur; - T muZero = -a/jac; - T fZero = secularEq(muZero, col0, diag, permut, diagShifted, shift); - - muPrev = muCur; - fPrev = fCur; - muCur = muZero; - fCur = fZero; - - if (shift == left && (muCur < (T)0. || muCur > right - left)) - useBisection = true; - if (shift == right && (muCur < -(right - left) || muCur > (T)0.)) - useBisection = true; - if (math::nd4j_abs(fCur) > math::nd4j_abs(fPrev) && math::nd4j_abs(fCur - fPrev) > (T)16. * DataTypeUtils::eps()) - useBisection = true; - } - - - if (useBisection) { - - T leftShifted, rightShifted; - if (shift == left) { - leftShifted = DataTypeUtils::min(); - rightShifted = (k==curLen-1) ? right : ((right - left) * (T)0.6); - } - else { - - leftShifted = -(right - left) * (T)0.6; - rightShifted = -DataTypeUtils::min(); - } - - T fLeft = secularEq(leftShifted, col0, diag, permut, diagShifted, shift); - T fRight = secularEq(rightShifted, col0, diag, permut, diagShifted, shift); - // if(fLeft * fRight >= (T)0.) - // throw "ops::helpers::SVD::calcSingVals method: fLeft * fRight >= (T)0. !"; - - while (rightShifted - leftShifted > (T)2.f * DataTypeUtils::eps() * math::nd4j_max(math::nd4j_abs(leftShifted), math::nd4j_abs(rightShifted))) { - - T midShifted = (leftShifted + rightShifted) / (T)2.; - fMid = secularEq(midShifted, col0, diag, permut, diagShifted, shift); - if (fLeft * fMid < (T)0.) - rightShifted = midShifted; - else { - leftShifted = midShifted; - fLeft = fMid; - } - } - muCur = (leftShifted + rightShifted) / (T)2.; - } - singVals.p(k, shift + muCur); - shifts.p(k, shift); - mus.p(k, muCur); - } - -} - - -////////////////////////////////////////////////////////////////////////// -template -void SVD::perturb(const NDArray& col0, const NDArray& diag, const NDArray& permut, const NDArray& singVals, const NDArray& shifts, const NDArray& mus, NDArray& zhat) { - - int n = col0.lengthOf(); - int m = permut.lengthOf(); - if(m==0) { - zhat.assign(0.); - return; - } - - int last = permut.e(m-1); - - for (int k = 0; k < n; ++k) { - - if (col0.e(k) == (T)0.f) - zhat.p(k, (T)0.f); - else { - T dk = diag.e(k); - T prod = (singVals.e(last) + dk) * (mus.e(last) + (shifts.e(last) - dk)); - - for(int l = 0; l(l); - if(i!=k) { - int j = i(l-1); - prod *= ((singVals.e(j)+dk) / ((diag.e(i)+dk))) * ((mus.e(j)+(shifts.e(j)-dk)) / ((diag.e(i)-dk))); - } - } - T tmp = math::nd4j_sqrt(prod); - zhat.p(k, col0.e(k) > (T)0.f ? tmp : -tmp); - } - } -} - - -////////////////////////////////////////////////////////////////////////// -template -void SVD::calcSingVecs(const NDArray& zhat, const NDArray& diag, const NDArray& perm, const NDArray& singVals, - const NDArray& shifts, const NDArray& mus, NDArray& U, NDArray& V) { - - int n = zhat.lengthOf(); - int m = perm.lengthOf(); - - for (int k = 0; k < n; ++k) { - - auto colU = new NDArray(U({0,0, k,k+1}, true)); - *colU = 0.; - NDArray* colV = nullptr; - - if (_calcV) { - colV = new NDArray(V({0,0, k,k+1}, true)); - *colV = 0.; - } - - if (zhat.e(k) == (T)0.f) { - colU->p(k, 1.f); - - if (_calcV) - colV->p(k, 1.f); - } - else { - - for(int l = 0; l < m; ++l) { - int i = perm.e(l); - U.p(i,k, zhat.e(i)/(((diag.e(i) - shifts.e(k)) - mus.e(k)) )/( (diag.e(i) + singVals.e(k)))); - } - U.p(n,k, 0.f); - *colU /= colU->reduceNumber(reduce::Norm2); - - if (_calcV) { - - for(int l = 1; l < m; ++l){ - int i = perm.e(l); - V.p(i,k, diag.e(i) * zhat.e(i) / (((diag.e(i) - shifts.e(k)) - mus.e(k)) )/( (diag.e(i) + singVals.e(k)))); - } - V.p(0,k, -1.f); - *colV /= colV->reduceNumber(reduce::Norm2); - } - } - delete colU; - if (_calcV) - delete colV; - } - - auto colU = U({0,0, n,n+1}, true); - colU = 0.; - colU.p(n, 1.); -} - - -////////////////////////////////////////////////////////////////////////// -template -void SVD::calcBlockSVD(int col1, int size, NDArray& U, NDArray& singVals, NDArray& V) { - - const T almostZero = DataTypeUtils::min(); - auto col0 = _m({col1, col1+size, col1, col1+1}, true); - auto diag = static_cast(_m({col1, col1+size, col1, col1+size}, true).diagonal('c')); - - diag.p(Nd4jLong(0), T(0)); - singVals = NDArrayFactory::create(_m.ordering(), {size, 1}, _m.getContext()); - U = NDArrayFactory::create(_u.ordering(), {size+1, size+1}, _u.getContext()); - if (_calcV) - V = NDArrayFactory::create(_v.ordering(), {size, size}, _v.getContext()); - - int curSize = size; - while(curSize > 1 && diag.template e(curSize-1) == (T)0.f) - --curSize; - - int m = 0; - std::vector indices; - for(int k = 0; k < curSize; ++k) - if(math::nd4j_abs(col0.template e(k)) > almostZero) - indices.push_back((T)k); - - auto permut = NDArrayFactory::create(_m.ordering(), {1, (int)indices.size()}, indices, _m.getContext()); - auto shifts = NDArrayFactory::create(_m.ordering(), {size, 1}, _m.getContext()); - auto mus = NDArrayFactory::create(_m.ordering(), {size, 1}, _m.getContext()); - auto zhat = NDArrayFactory::create(_m.ordering(), {size, 1}, _m.getContext()); - - calcSingVals(col0, diag, permut, singVals, shifts, mus); - perturb(col0, diag, permut, singVals, shifts, mus, zhat); - calcSingVecs(zhat, diag, permut, singVals, shifts, mus, U, V); - - for(int i=0; i(i) > singVals.e(i+1)) { - T _e0 = singVals.e(i); - T _e1 = singVals.e(i+1); - //math::nd4j_swap(singVals(i),singVals(i+1)); - singVals.p(i, _e1); - singVals.p(i+1, _e0); - - auto temp1 = U({0,0, i,i+1}, true); - auto temp2 = U({0,0, i+1,i+2}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); - - if(_calcV) { - auto temp1 = V({0,0, i,i+1}, true); - auto temp2 = V({0,0, i+1,i+2}, true); - auto temp3 = temp1; - temp1.assign(temp2); - temp2.assign(temp3); - } - } - } - - auto temp1 = singVals({0,curSize, 0,0}, true); - for (int e = 0; e < curSize / 2; ++e) { - T tmp = temp1.e(e); - temp1.p(e, temp1.e(curSize-1-e)); - temp1.p(curSize-1-e, tmp); - } - - auto temp2 = U({0,0, 0,curSize}, true); - for(int i = 0; i < curSize/2; ++i) { - auto temp3 = temp2({0,0, i,i+1}, true); - auto temp4 = temp2({0,0, curSize-1-i,curSize-i}, true); - auto temp5 = temp3; - temp3.assign(temp4); - temp4.assign(temp5); - } - - if (_calcV) { - auto temp2 = V({0,0, 0,curSize}, true); - for(int i = 0; i < curSize/2; ++i) { - auto temp3 = temp2({0,0, i,i+1}, true); - auto temp4 = temp2({0,0, curSize-1-i,curSize-i}, true); - auto temp5 = temp3; - temp3.assign(temp4); - temp4.assign(temp5); - } - } -} - - -////////////////////////////////////////////////////////////////////////// -template -void SVD::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shift) { - - // requires rows = cols + 1; - const int n = col2 - col1 + 1; - const int k = n/2; - const T almostZero = DataTypeUtils::min(); - T alphaK; - T betaK; - T r0; - T lambda, phi, c0, s0; - auto l = NDArrayFactory::create(_u.ordering(), {1, k}, _u.getContext()); - auto f = NDArrayFactory::create(_u.ordering(), {1, n-k-1}, _u.getContext()); - - if(n < _switchSize) { - - JacobiSVD jac(_m({col1,col1+n+1, col1,col1+n}, true), _calcU, _calcV, _fullUV); - - if (_calcU) { - auto temp = _u({col1,col1+n+1, col1,col1+n+1}, true); - temp.assign(jac._u); - } - else { - auto temp1 = _u({0,1, col1,col1+n+1}, true); - temp1.assign(jac._u({0,1, 0,0}, true)); - auto temp2 = _u({1,2, col1,col1+n+1}, true); - temp2.assign(jac._u({n,n+1, 0,0}, true)); - } - - if (_calcV) { - auto temp = _v({row1W,row1W+n, col1W,col1W+n}, true); - temp.assign(jac._v); - } - - auto temp = _m({col1+shift,col1+shift+n+1, col1+shift,col1+shift+n}, true); - temp.assign(0.); - auto diag = _m.diagonal('c'); - diag({col1+shift, col1+shift+n, 0,0}, true).assign(jac._s({0,n, 0,0}, true)); - - return; - } - - alphaK = _m.e(col1 + k, col1 + k); - betaK = _m.e(col1 + k + 1, col1 + k); - - DivideAndConquer(k + 1 + col1, col2, k + 1 + row1W, k + 1 + col1W, shift); - DivideAndConquer(col1, k - 1 + col1, row1W, col1W + 1, shift + 1); - - if (_calcU) { - lambda = _u.e(col1 + k, col1 + k); - phi = _u.e(col1 + k + 1, col2 + 1); - } - else { - lambda = _u.e(1, col1 + k); - phi = _u.e(0, col2 + 1); - } - - r0 = math::nd4j_sqrt((math::nd4j_abs(alphaK * lambda) * math::nd4j_abs(alphaK * lambda)) + math::nd4j_abs(betaK * phi) * math::nd4j_abs(betaK * phi)); - - if(_calcU) { - l.assign(_u({col1+k, col1+k+1, col1,col1+k}, true)); - f.assign(_u({col1+k+1,col1+k+2, col1+k+1,col1+n}, true)); - } - else { - l.assign(_u({1,2, col1, col1+k}, true)); - f.assign(_u({0,1, col1+k+1, col1+n}, true)); - } - - // UofSVD.printIndexedBuffer(); - // VofSVD.printIndexedBuffer(); - // singVals.printIndexedBuffer(); - // printf("!! \n"); - - if (_calcV) - _v.p(row1W+k, col1W, 1.f); - - if (r0 < almostZero){ - c0 = 1.; - s0 = 0.; - } - else { - c0 = alphaK * lambda / r0; - s0 = betaK * phi / r0; - } - - if (_calcU) { - - auto temp = _u({col1,col1+k+1, col1+k,col1+k+1}, true); - NDArray q1(temp); - - for (int i = col1 + k - 1; i >= col1; --i) { - auto temp = _u({col1,col1+k+1, i+1,i+2}, true); - temp.assign(_u({col1, col1+k+1, i, i+1}, true)); - } - - _u({col1,col1+k+1, col1,col1+1}, true).assign(q1 * c0); - _u({col1,col1+k+1, col2+1,col2+2}, true).assign(q1 * (-s0)); - _u({col1+k+1,col1+n+1, col1, col1+1}, true).assign(static_cast(_u({col1+k+1, col1+n+1, col2+1, col2+2}, true)) * s0); - _u({col1+k+1,col1+n+1, col2+1,col2+2}, true) *= c0; - } - else { - - T q1 = _u.e(0, col1 + k); - - for (int i = col1 + k - 1; i >= col1; --i) - _u.p(0, i+1, _u.e(0, i)); - - _u.p(0, col1, q1 * c0); - _u.p(0, col2+1, -q1*s0); - _u.p(1, col1, _u.e(1, col2+1) * s0); - _u.p(1, col2 + 1, _u.e(1, col2 + 1) * c0); - _u({1,2, col1+1, col1+k+1}, true) = 0.f; - _u({0,1, col1+k+1, col1+n}, true) = 0.f; - } - - _m.p(col1 + shift, col1 + shift, r0); - auto temp1 = _m({col1+shift+1,col1+shift+k+1, col1+shift,col1+shift+1}, true); - temp1.assign(l*alphaK); - auto temp2 = _m({col1+shift+k+1,col1+shift+n, col1+shift,col1+shift+1}, true); - temp2.assign(f*betaK); - - deflation(col1, col2, k, row1W, col1W, shift); - - NDArray UofSVD, VofSVD, singVals; - calcBlockSVD(col1 + shift, n, UofSVD, singVals, VofSVD); - - if(_calcU) { - auto pTemp = _u({col1, col1+n+1, col1,col1+n+1}, true); - auto temp = pTemp; - pTemp.assign(mmul(temp, UofSVD)); - } - else { - auto pTemp = _u({0,0, col1,col1+n+1}, true); - auto temp = pTemp; - pTemp.assign(mmul(temp, UofSVD)); - } - - if (_calcV) { - auto pTemp = _v({row1W,row1W+n, row1W,row1W+n}, true); - auto temp = pTemp; - pTemp.assign(mmul(temp, VofSVD)); - } - - auto blockM = _m({col1+shift,col1+shift+n, col1+shift,col1+shift+n}, true); - blockM = 0.f; - auto diag = blockM.diagonal('c'); - diag.assign(singVals); -} - -////////////////////////////////////////////////////////////////////////// -template -void SVD::exchangeUV(const HHsequence& hhU, const HHsequence& hhV, const NDArray& U, const NDArray& V) { - - if (_calcU) { - - int colsU = _fullUV ? hhU.rows() : _diagSize; - auto temp1 = NDArrayFactory::create(_u.ordering(), {hhU.rows(), colsU}, _u.getContext()); - temp1.setIdentity(); - _u = temp1; - - auto temp2 = _u({0,_diagSize, 0,_diagSize}, true); - temp2.assign(V({0,_diagSize, 0,_diagSize}, true)); - const_cast(hhU).mulLeft(_u); - } - - if (_calcV) { - - int colsV = _fullUV ? hhV.rows() : _diagSize; - auto temp1 = NDArrayFactory::create(_v.ordering(), {hhV.rows(), colsV}, _v.getContext()); - temp1.setIdentity(); - _v = temp1; - - auto temp2 = _v({0,_diagSize, 0,_diagSize}, true); - temp2.assign(U({0,_diagSize, 0,_diagSize}, true)); - const_cast(hhV).mulLeft(_v); - } -} - -////////////////////////////////////////////////////////////////////////// -template -void SVD::evalData(const NDArray& matrix) { - - const T almostZero = DataTypeUtils::min(); - - if(matrix.sizeAt(1) < _switchSize) { - - JacobiSVD jac(matrix, _calcU, _calcV, _fullUV); - - if(_calcU) - _u = jac._u; - if(_calcV) - _v = jac._v; - - _s.assign(jac._s); - - return; - } - - T scale = matrix.reduceNumber(reduce::AMax).e(0); - - if(scale == (T)0.) - scale = 1.; - - NDArray copy; - if(_transp) - copy = matrix.transpose(); - else - copy = matrix / scale; - - BiDiagonalUp biDiag(copy); - - _u = 0.; - _v = 0.; - - auto temp1 = biDiag._HHbidiag.transpose(); - auto temp2 = _m({0,_diagSize, 0,0}, true); - temp2.assign(temp1); - - auto temp3 = _m({_m.sizeAt(0)-1,_m.sizeAt(0), 0,0}, true); - temp3.assign(0.); - - DivideAndConquer(0, _diagSize - 1, 0, 0, 0); - - for (int i = 0; i < _diagSize; ++i) { - T a = math::nd4j_abs(_m.e(i, i)); - _s.p(i, a * scale); - if (a < almostZero) { - auto temp = _s({i+1,_diagSize, 0,0}, true); - temp.assign(0.); - break; - } - else if (i == _diagSize-1) - break; - } - - if(_transp) - exchangeUV(biDiag.makeHHsequence('v'), biDiag.makeHHsequence('u'), _v, _u); - else - exchangeUV(biDiag.makeHHsequence('u'), biDiag.makeHHsequence('v'), _u, _v); -} - - -BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT SVD,,FLOAT_TYPES); - - ////////////////////////////////////////////////////////////////////////// // svd operation, this function is not method of SVD class, it is standalone function template @@ -972,9 +67,10 @@ static void svd_(const NDArray* x, const std::vector& outArrs, const b } } - void svd(sd::LaunchContext * context, const NDArray* x, const std::vector& outArrs, const bool fullUV, const bool calcUV, const int switchNum) { - BUILD_SINGLE_SELECTOR(x->dataType(), svd_, (x, outArrs, fullUV, calcUV, switchNum), FLOAT_TYPES); - } +////////////////////////////////////////////////////////////////////////// +void svd(sd::LaunchContext * context, const NDArray* x, const std::vector& outArrs, const bool fullUV, const bool calcUV, const int switchNum) { + BUILD_SINGLE_SELECTOR(x->dataType(), svd_, (x, outArrs, fullUV, calcUV, switchNum), FLOAT_TYPES); +} } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp b/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp index fdab43261..65edeb71b 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp @@ -73,8 +73,8 @@ namespace helpers { NDArray sortedVals = NDArrayFactory::create('c', {k}, input->getContext()); NDArray topIndices = NDArrayFactory::create('c', {k}, input->getContext()); for (uint pos = 0; pos < k; ++pos) { - topIndices.t(pos) = pos; - topValues.t(pos) = trial.t(pos); + topIndices.r(pos) = pos; + topValues.r(pos) = trial.t(pos); } //std::vector sortedVals(topValues); sortedVals.assign(topValues);// = NDArrayFactory::create('c', {k}); @@ -93,9 +93,9 @@ namespace helpers { T* topBegin = reinterpret_cast(topValues.buffer()); T* topEnd = topBegin + k; auto exchangePos = std::distance(topBegin, std::find(topBegin, topEnd, sortedVals.t(0))); - topValues.t(exchangePos) = val; //*exchangeIt = val; - topIndices.t(exchangePos) = i; - sortedVals.t(0) = val; // suppress in sorted + topValues.r(exchangePos) = val; //*exchangeIt = val; + topIndices.r(exchangePos) = i; + sortedVals.r(0) = val; // suppress in sorted //std::sort(sortedVals.begin(), sortedVals.end()); // sorted in ascending order SpecialMethods::sortGeneric(sortedVals.buffer(), sortedVals.shapeInfo(), false); } @@ -107,7 +107,7 @@ namespace helpers { for (Nd4jLong j = 0; j < width; j++) for (uint pos = 0; pos < k; ++pos) if (topValues.t(pos) == trial.t(j)) - topIndices.t(pos) = j; + topIndices.r(pos) = j; } else { // else sort by indices std::map sortValsMap; @@ -121,8 +121,8 @@ namespace helpers { //}); Nd4jLong e = 0; for (auto it = sortValsMap.begin(); it != sortValsMap.end(); ++it, e++) { - topIndices.t(e) = it->first; - topValues.t(e) = it->second; + topIndices.r(e) = it->first; + topValues.r(e) = it->second; } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/triangular_solve.cpp b/libnd4j/include/ops/declarable/helpers/cpu/triangular_solve.cpp index bcf406392..86847da16 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/triangular_solve.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/triangular_solve.cpp @@ -39,17 +39,17 @@ namespace helpers { * * */ template - static void lowerTriangularSolve(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool adjoint, NDArray* output) { + static void lowerTriangularSolve(sd::LaunchContext * context, NDArray const * leftInput, NDArray const* rightInput, bool const unitsOnDiag, NDArray* output) { auto rows = leftInput->rows(); auto cols = rightInput->columns(); - //output->t(0,0) = rightInput->t(0,0) / leftInput->t(0,0); + //output->r(0,0) = rightInput->t(0,0) / leftInput->t(0,0); for (Nd4jLong r = 0; r < rows; r++) { for (Nd4jLong j = 0; j < cols; j++) { auto sum = rightInput->t(r, j); for (Nd4jLong c = 0; c < r; c++) { sum -= leftInput->t(r, c) * output->t(c, j); } - output->t(r, j) = sum / leftInput->t(r, r); + output->r(r, j) = unitsOnDiag?sum: sum / leftInput->t(r, r); } } } @@ -69,7 +69,7 @@ namespace helpers { * */ template - static void upperTriangularSolve(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool adjoint, NDArray* output) { + static void upperTriangularSolve(sd::LaunchContext* context, NDArray const* leftInput, NDArray const* rightInput, bool const unitsOnDiag, NDArray* output) { auto rows = leftInput->rows(); auto cols = rightInput->columns(); for (Nd4jLong r = rows; r > 0; r--) { @@ -78,11 +78,31 @@ namespace helpers { for (Nd4jLong c = r; c < rows; c++) { sum -= leftInput->t(r - 1, c) * output->t(c, j); } - output->t(r - 1, j) = sum / leftInput->t(r - 1, r - 1); + output->r(r - 1, j) = unitsOnDiag? sum : sum / leftInput->t(r - 1, r - 1); } } } + /// triangularSolve2D - 2D implementation of triangularSolveFunctor + /// \tparam T - type of NDArray output + /// \param context - launch context pointer + /// \param leftInput - T matrix of equation Tx = b + /// \param rightInput - b vector of equation Tx = b + /// \param lower - lower or upper triangular matrix + /// \param unitsOnDiag - solve for case when only units (1.0) on diagonal is assumed + /// \param output - output vector (x on equation Tx = b) + /// + template + void triangularSolve2D(sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output) { + if (lower) { + lowerTriangularSolve(context, &leftInput, &rightInput, unitsOnDiag, &output); + } + else { + upperTriangularSolve(context, &leftInput, &rightInput, unitsOnDiag, &output); + } + } + BUILD_SINGLE_TEMPLATE(template void triangularSolve2D, (sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output), FLOAT_TYPES); + template static int triangularSolveFunctor_(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool lower, bool adjoint, NDArray* output) { auto leftPart = leftInput->allTensorsAlongDimension({-2, -1}); @@ -92,9 +112,9 @@ namespace helpers { auto batchLoop = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { if (lower) { - lowerTriangularSolve(context, leftPart[i], rightPart[i], adjoint, outputPart[i]); + lowerTriangularSolve(context, leftPart[i], rightPart[i], false, outputPart[i]); } else { - upperTriangularSolve(context, leftPart[i], rightPart[i], adjoint, outputPart[i]); + upperTriangularSolve(context, leftPart[i], rightPart[i], false, outputPart[i]); } } }; @@ -116,13 +136,13 @@ namespace helpers { if (!lower) { for (Nd4jLong r = 0; r < rows; r++) { for (Nd4jLong c = 0; c <= r; c++) { - outputPart[batch]->t(r, c) = inputPart[batch]->t(c, r); + outputPart[batch]->r(r, c) = inputPart[batch]->t(c, r); } } } else { for (Nd4jLong r = 0; r < rows; r++) { for (Nd4jLong c = r; c < cols; c++) { - outputPart[batch]->t(r, c) = inputPart[batch]->t(c, r); + outputPart[batch]->r(r, c) = inputPart[batch]->t(c, r); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/triu.cpp b/libnd4j/include/ops/declarable/helpers/cpu/triu.cpp index 4194e976c..eb2074865 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/triu.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/triu.cpp @@ -38,7 +38,7 @@ static void triuBP_(sd::LaunchContext * context, const NDArray& input, const NDA auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { if (dOdI.t(i) != static_cast(0.f)) - dOdI.t(i) = static_cast(1.f); + dOdI.r(i) = static_cast(1.f); } }; samediff::Threads::parallel_for(func, 0, dLen); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu b/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu index c8f26de6f..6302262be 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu @@ -41,9 +41,9 @@ namespace sd { * * */ template - static __device__ void lowerTriangularSolve(T const* leftInput, Nd4jLong const* leftInputShape, + static _CUDA_HD void lowerTriangularSolve(T const* leftInput, Nd4jLong const* leftInputShape, T const* rightInput, Nd4jLong const* rightInputShape, - bool const adjoint, T* output, Nd4jLong const* outputShape, + bool const unitOnDiag, T* output, const Nd4jLong* outputShape, Nd4jLong rows, Nd4jLong cols) { for (auto r = 0; r < rows; r++) { @@ -62,7 +62,7 @@ namespace sd { auto zcIndex = shape::getOffset(outputShape, posZ, 0); sum -= leftInput[xcIndex] * output[zcIndex]; } - output[zIndex] = sum / leftInput[xIndex]; + output[zIndex] = unitOnDiag?sum:sum / leftInput[xIndex]; } } } @@ -82,9 +82,9 @@ namespace sd { * */ template - static __device__ void upperTriangularSolve(T const* leftInput, Nd4jLong const* leftInputShape, - T const* rightInput, Nd4jLong const* rightInputShape, bool const adjoint, T* output, - Nd4jLong const* outputShape, Nd4jLong rows, Nd4jLong cols) { + static _CUDA_HD void upperTriangularSolve(T const* leftInput, Nd4jLong const* leftInputShape, + T const* rightInput, Nd4jLong const* rightInputShape, bool const unitOnDiag, T* output, + const Nd4jLong* outputShape, Nd4jLong rows, Nd4jLong cols) { for (auto r = rows; r > 0; r--) { for (auto j = 0; j < cols; j++) { @@ -101,16 +101,16 @@ namespace sd { auto xcIndex = shape::getOffset(leftInputShape, pos, 0); sum -= leftInput[xcIndex] * output[zcIndex]; } - output[zIndex] = sum / leftInput[xIndex]; + output[zIndex] = unitOnDiag?sum:sum / leftInput[xIndex]; } } } template static __global__ void triangularSolveKernel(T const* leftInput, Nd4jLong const* leftPartShape, - T const* rightInput, Nd4jLong const* rightPartShape, bool const lower, bool const adjoint, T* output, - Nd4jLong const* outputShape, Nd4jLong const* tadLeftShape, Nd4jLong const* tadLeftOffset, Nd4jLong const* tadRightShape, - Nd4jLong const* tadRightOffset, Nd4jLong const* tadOutputShape, Nd4jLong const* tadOutputOffset, Nd4jLong batchNum) { + T const* rightInput, Nd4jLong const* rightPartShape, bool const lower, bool const unitsOnDiag, T* output, + const Nd4jLong* outputShape, const Nd4jLong* tadLeftShape, const Nd4jLong* tadLeftOffset, const Nd4jLong* tadRightShape, + const Nd4jLong* tadRightOffset, const Nd4jLong* tadOutputShape, const Nd4jLong* tadOutputOffset, Nd4jLong batchNum) { __shared__ Nd4jLong rows; __shared__ Nd4jLong cols; @@ -130,16 +130,16 @@ namespace sd { auto pRightPart = rightInput + tadRightOffset[i]; auto pOutputPart = output + tadOutputOffset[i]; if (lower) { - lowerTriangularSolve(pLeftPart, tadLeftShape, pRightPart, tadRightShape, adjoint, pOutputPart, tadOutputShape, rows, cols); + lowerTriangularSolve(pLeftPart, tadLeftShape, pRightPart, tadRightShape, unitsOnDiag, pOutputPart, tadOutputShape, rows, cols); } else { - upperTriangularSolve(pLeftPart, tadLeftShape, pRightPart, tadRightShape, adjoint, pOutputPart, tadOutputShape, rows, cols); + upperTriangularSolve(pLeftPart, tadLeftShape, pRightPart, tadRightShape, unitsOnDiag, pOutputPart, tadOutputShape, rows, cols); } } } template static int triangularSolveFunctor_(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, - bool lower, bool adjoint, NDArray* output) { + bool lower, bool unitsOnDiag, NDArray* output) { NDArray::prepareSpecialUse({output}, {leftInput, rightInput}); auto leftTads = ConstantTadHelper::getInstance()->tadForDimensions(leftInput->shapeInfo(), {-2, -1}); auto rightTads = ConstantTadHelper::getInstance()->tadForDimensions(rightInput->shapeInfo(), {-2, -1}); @@ -150,7 +150,7 @@ namespace sd { T const* rightBuf = reinterpret_cast(rightInput->specialBuffer()); T* outputBuf = reinterpret_cast(output->specialBuffer()); triangularSolveKernel<<<128, 128, 256, *stream>>>(leftBuf, leftInput->specialShapeInfo(), - rightBuf, rightInput->specialShapeInfo(), lower, adjoint, outputBuf, output->specialShapeInfo(), + rightBuf, rightInput->specialShapeInfo(), lower, unitsOnDiag, outputBuf, output->specialShapeInfo(), leftTads.specialShapeInfo(), leftTads.specialOffsets(), rightTads.specialShapeInfo(), rightTads.specialOffsets(), outputTads.specialShapeInfo(), outputTads.specialOffsets(), leftTads.numberOfTads()); @@ -161,8 +161,41 @@ namespace sd { } - int triangularSolveFunctor(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool lower, bool adjoint, NDArray* output) { - BUILD_SINGLE_SELECTOR(leftInput->dataType(), return triangularSolveFunctor_, (context, leftInput, rightInput, lower, adjoint, output), FLOAT_NATIVE); + /// triangularSolve2D - 2D implementation of triangularSolveFunctor + /// \tparam T - type of NDArray output + /// \param context - launch context pointer + /// \param leftInput - T matrix of equation Tx = b + /// \param rightInput - b vector of equation Tx = b + /// \param lower - lower or upper triangular matrix + /// \param unitsOnDiag - solve for case when only units (1.0) on diagonal is assumed + /// \param output - output vector (x on equation Tx = b) + /// + template + void triangularSolve2D(sd::LaunchContext* context, const NDArray& leftInput, const NDArray& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output) { + + triangularSolveFunctor_(context, const_cast(&leftInput), const_cast(&rightInput), lower, unitsOnDiag, &output); + + // leftInput.syncToHost(); rightInput.syncToHost(); output.syncToHost(); + // T const* pLeftPart = (T const*)leftInput.getBuffer(); + // T const* pRightPart = (T const*)rightInput.getBuffer(); + // T* pOutputPart = (T*)output.buffer(); + // auto rows = leftInput.rows(); + // auto cols = leftInput.columns(); + // if (lower) { + // lowerTriangularSolve(pLeftPart, leftInput.shapeInfo(), pRightPart, rightInput.shapeInfo(), unitsOnDiag, pOutputPart, output.shapeInfo(), rows, cols); + // } else { + // upperTriangularSolve(pLeftPart, leftInput.shapeInfo(), pRightPart, rightInput.shapeInfo(), unitsOnDiag, pOutputPart, output.shapeInfo(), rows, cols); + // } + // output.syncToDevice(); + } + BUILD_SINGLE_TEMPLATE(template void triangularSolve2D, (sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output), FLOAT_TYPES); +// template void triangularSolve2D(sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output); +// template void triangularSolve2D(sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output); +// template void triangularSolve2D(sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output); +// template void triangularSolve2D(sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output); + + int triangularSolveFunctor(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool lower, bool unitsOnDiag, NDArray* output) { + BUILD_SINGLE_SELECTOR(leftInput->dataType(), return triangularSolveFunctor_, (context, leftInput, rightInput, lower, unitsOnDiag, output), FLOAT_NATIVE); } template @@ -229,6 +262,76 @@ namespace sd { BUILD_SINGLE_SELECTOR(input->dataType(), adjointTriangularMatrix_, (context, input, lower, output), FLOAT_NATIVE); } - } - } +/* + ////////////////////////////////////////////////////////////////////////// + template + void triangularSolve2D(sd::LaunchContext* context, NDArray const& A, NDArray const& b, bool const lower, bool const unitsOnDiag, NDArray& x) { + + if(A.rankOf() != 2) + throw std::runtime_error("triangularSolve2D: input matrix A must be 2D !"); + + int temp; + + const bool isBvector = b.isCommonVector(temp); + const bool isXvector = x.isCommonVector(temp); + + if(A.sizeAt(0) != (isBvector ? b.lengthOf() : b.sizeAt(0))) + throw std::runtime_error("triangularSolve2D: A and b must have the same number of rows !"); + + if(A.sizeAt(1) != (isXvector ? x.lengthOf() : x.sizeAt(0))) + throw std::runtime_error("triangularSolve2D: columns number of array A must be equal to rows number of array x !"); + + if(isBvector) { + + if(lower) { + + for (int i = 0; i < A.sizeAt(0); ++i) { + T sum = b.t(i); + for (int j = 0; j < i; ++j) + sum -= A.t(i,j) * x.t(j); + x.r(i) = unitsOnDiag ? sum : sum / A.t(i,i); + } + } + else { + + for (int i = A.sizeAt(0) - 1; i >= 0; --i) { + T sum = b.t(i); + for (int j = i + 1; j < A.sizeAt(1); ++j) + sum -= A.t(i,j) * x.t(j); + x.r(i) = unitsOnDiag ? sum : sum / A.t(i,i); + } + } + } + else { + + if(lower) { + + for (int bCol = 0; bCol < b.sizeAt(1); ++bCol) { + for (int i = 0; i < A.sizeAt(0); ++i) { + T sum = b.t(i, bCol); + for (int j = 0; j < i; ++j) + sum -= A.t(i,j) * x.t(j, bCol); + x.r(i, bCol) = unitsOnDiag ? sum : sum / A.t(i,i); + } + } + } + else { + + for (int bCol = 0; bCol < b.sizeAt(1); ++bCol) { + for (int i = A.sizeAt(0) - 1; i >= 0; --i) { + T sum = b.t(i, bCol); + for (int j = i + 1; j < A.sizeAt(1); ++j) + sum -= A.t(i,j) * x.t(j, bCol); + x.r(i, bCol) = unitsOnDiag ? sum : sum / A.t(i,i); + } + } + } + } + } + BUILD_SINGLE_TEMPLATE(template void triangularSolve2D, (sd::LaunchContext* context, NDArray const& leftInput, NDArray const& rightInput, bool const lower, bool const unitsOnDiag, NDArray& output), FLOAT_TYPES); +*/ + + +} +} } diff --git a/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp b/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp index bbcb1eca3..4baa36d65 100644 --- a/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp +++ b/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp @@ -50,6 +50,7 @@ namespace sd { // make sure host buffer is updated values.syncToHost(); indices.syncToHost(); + output.syncToHost(); auto rank = output.rankOf(); diff --git a/libnd4j/include/ops/declarable/helpers/impl/sqrtm.cpp b/libnd4j/include/ops/declarable/helpers/impl/sqrtm.cpp new file mode 100644 index 000000000..b8cc6d8ac --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/impl/sqrtm.cpp @@ -0,0 +1,66 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * ThnIn program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which nIn available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * dnIntributed under the License nIn dnIntributed on an "AS nIn" BASnIn, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permnInsions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + + +#include +#include + +namespace sd { +namespace ops { +namespace helpers { + +////////////////////////////////////////////////////////////////////////// +template +static void sqrtm_(const NDArray* x, NDArray* z) { + + + if(x->rankOf() == 2) { + + ops::helpers::Sqrtm::calc(*x, *z); + } + else { + + auto listX = x->allTensorsAlongDimension({-2, -1}); + auto listZ = z->allTensorsAlongDimension({-2, -1}); + + auto func = PRAGMA_THREADS_FOR { + + for (auto i = start; i < stop; i++) + ops::helpers::Sqrtm::calc(*listX.at(i), *listZ.at(i)); + }; + + samediff::Threads::parallel_tad(func, 0, listX.size()); + } +} + + +////////////////////////////////////////////////////////////////////////// +void sqrtm(sd::LaunchContext* context, const NDArray* x, NDArray* z) { + + x->syncToHost(); + BUILD_SINGLE_SELECTOR(z->dataType(), sqrtm_, (x, z), FLOAT_TYPES); + z->syncToDevice(); +} + + + +} +} +} diff --git a/libnd4j/include/ops/declarable/helpers/sqrtm.h b/libnd4j/include/ops/declarable/helpers/sqrtm.h new file mode 100644 index 000000000..2a123d420 --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/sqrtm.h @@ -0,0 +1,39 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com) +// + +#ifndef LIBND4J_SQRTM_HELPER_H +#define LIBND4J_SQRTM_HELPER_H + +#include +#include "array/NDArray.h" + +namespace sd { +namespace ops { +namespace helpers { + +////////////////////////////////////////////////////////////////////////// +void sqrtm(sd::LaunchContext* context, const NDArray* x, NDArray* z); + + +} +} +} + +#endif //LIBND4J_SQRTM_HELPER_H diff --git a/libnd4j/include/ops/declarable/helpers/triangular_solve.h b/libnd4j/include/ops/declarable/helpers/triangular_solve.h index 73965f8c5..94e0198af 100644 --- a/libnd4j/include/ops/declarable/helpers/triangular_solve.h +++ b/libnd4j/include/ops/declarable/helpers/triangular_solve.h @@ -26,7 +26,9 @@ namespace sd { namespace ops { namespace helpers { - int triangularSolveFunctor(sd::LaunchContext* context, NDArray* leftInput, NDArray* rightInput, bool lower, bool adjoint, NDArray* output); + int triangularSolveFunctor(sd::LaunchContext* context, NDArray* leftInput, NDArray* rightInput, bool lower, bool unitsOnDiag, NDArray* output); + template + void triangularSolve2D(sd::LaunchContext* context, const NDArray& leftInput, const NDArray& rightInput, const bool lower, const bool unitsOnDiag, NDArray& output); void adjointMatrix(sd::LaunchContext* context, NDArray const* input, bool const lower, NDArray* output); } } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp index 4139e9785..e4391c688 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp @@ -47,7 +47,7 @@ TEST_F(DeclarableOpsTests11, test_listdiff_1) { auto result = op.evaluate({&x, &y}, {}, {}); ASSERT_EQ(Status::OK(), result.status()); - + } /////////////////////////////////////////////////////////////////// @@ -392,10 +392,10 @@ TEST_F(DeclarableOpsTests11, log_loss_grad_test12) { predictions.linspace(0.04, 0.04); labels.linspace(1); weights.assign(0.5); - weights.t(0) = 0.; - weights.t(1) = 0.; - weights.t(2) = 0.; - weights.t(3) = 0.; + weights.r(0) = 0.; + weights.r(1) = 0.; + weights.r(2) = 0.; + weights.r(3) = 0.; sd::ops::log_loss_grad op; @@ -431,9 +431,9 @@ TEST_F(DeclarableOpsTests11, log_loss_grad_test13) { predictions.linspace(0.04, 0.04); labels.linspace(1); weights.assign(0.5); - weights.t(0) = 0.; - weights.t(1) = 0.; - weights.t(2) = 0.; + weights.r(0) = 0.; + weights.r(1) = 0.; + weights.r(2) = 0.; sd::ops::log_loss_grad op; auto results = op.evaluate({&predictions, &weights, &labels}, {1e-7}, {3}); @@ -1608,7 +1608,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_2) { // z->printIndexedBuffer("Solve 4x4"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests11, Solve_Test_3) { @@ -1645,7 +1645,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_3) { // z->printIndexedBuffer("Solve 4x4"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// @@ -1678,7 +1678,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4) { // exp.printBuffer("4 Expec 4x4"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests11, Solve_Test_4_1) { @@ -1707,7 +1707,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_1) { // exp.printBuffer("4 Expec 4x4"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests11, Solve_Test_4_2) { @@ -1740,7 +1740,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_2) { // exp.printBuffer("4_2 Triangular_Expec 3x3"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// @@ -1774,7 +1774,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_3) { // exp.printBuffer("4_3 Triangular_Expec 3x3"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// @@ -1808,7 +1808,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_4) { // exp.printBuffer("4_4 Expec 3x3"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// @@ -1842,7 +1842,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_5) { // exp.printBuffer("4_5 Expec 3x3"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// @@ -1876,7 +1876,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_6) { // exp.printBuffer("4_6 Expec 3x3"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests11, Solve_Test_4_7) { @@ -1913,7 +1913,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_4_7) { // exp.printBuffer("4_7 Expec 3x3"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// @@ -1947,7 +1947,7 @@ TEST_F(DeclarableOpsTests11, Solve_Test_5) { // exp.printBuffer("4 Expec 4x4"); ASSERT_TRUE(exp.equalsTo(z)); - + } //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests11, SolveLS_Test_1) { @@ -2399,10 +2399,10 @@ TEST_F(DeclarableOpsTests11, mean_sqerr_loss_grad_test12) { predictions.linspace(0.04, 0.04); labels.linspace(1); weights.assign(0.5); - weights.t(0) = 0.; - weights.t(1) = 0.; - weights.t(2) = 0.; - weights.t(3) = 0.; + weights.r(0) = 0.; + weights.r(1) = 0.; + weights.r(2) = 0.; + weights.r(3) = 0.; sd::ops::mean_sqerr_loss_grad op; auto results = op.evaluate({&predictions, &weights, &labels}, {}, {3}); @@ -2436,9 +2436,9 @@ TEST_F(DeclarableOpsTests11, mean_sqerr_loss_grad_test13) { predictions.linspace(0.04, 0.04); labels.linspace(1); weights.assign(0.5); - weights.t(0) = 0.; - weights.t(1) = 0.; - weights.t(2) = 0.; + weights.r(0) = 0.; + weights.r(1) = 0.; + weights.r(2) = 0.; sd::ops::mean_sqerr_loss_grad op; auto results = op.evaluate({&predictions, &weights, &labels}, {}, {3}); @@ -2467,7 +2467,7 @@ TEST_F(DeclarableOpsTests11, SquaredSubtractTest_Test1) { ASSERT_EQ(Status::OK(), result.status()); ASSERT_TRUE(exp.equalsTo(result.at(0))); - + } TEST_F(DeclarableOpsTests11, SquaredSubtractTest_Test2) { @@ -2478,7 +2478,7 @@ TEST_F(DeclarableOpsTests11, SquaredSubtractTest_Test2) { auto result = op.evaluate({&x, &y}, {}, {}); ASSERT_EQ(Status::OK(), result.status()); ASSERT_TRUE(exp.equalsTo(result.at(0))); - + } TEST_F(DeclarableOpsTests11, SquaredSubtractTest_Test3) { @@ -2490,7 +2490,7 @@ TEST_F(DeclarableOpsTests11, SquaredSubtractTest_Test3) { auto result = op.evaluate({&x, &y, &eps}, {}, {}); ASSERT_EQ(Status::OK(), result.status()); ASSERT_TRUE(exp.equalsTo(result.at(0))); - + } /////////////////////////////////////////////////////////////////// @@ -2830,10 +2830,10 @@ TEST_F(DeclarableOpsTests11, absolute_difference_loss_grad_test12) { predictions.linspace(0.04, 0.04); labels.linspace(1); weights.assign(0.5); - weights.t(0) = 0.; - weights.t(1) = 0.; - weights.t(2) = 0.; - weights.t(3) = 0.; + weights.r(0) = 0.; + weights.r(1) = 0.; + weights.r(2) = 0.; + weights.r(3) = 0.; sd::ops::absolute_difference_loss_grad op; auto results = op.evaluate({&predictions, &weights, &labels}, {}, {3}); @@ -2867,9 +2867,9 @@ TEST_F(DeclarableOpsTests11, absolute_difference_loss_grad_test13) { predictions.linspace(0.04, 0.04); labels.linspace(1); weights.assign(0.5); - weights.t(0) = 0.; - weights.t(1) = 0.; - weights.t(2) = 0.; + weights.r(0) = 0.; + weights.r(1) = 0.; + weights.r(2) = 0.; sd::ops::absolute_difference_loss_grad op; auto results = op.evaluate({&predictions, &weights, &labels}, {}, {3}); @@ -3305,10 +3305,10 @@ TEST_F(DeclarableOpsTests11, sigm_cross_entropy_loss_grad_test12) { logits.linspace(-0.08, 0.04); labels.linspace(1); weights.assign(0.5); - weights.t(0) = 0.; - weights.t(1) = 0.; - weights.t(2) = 0.; - weights.t(3) = 0.; + weights.r(0) = 0.; + weights.r(1) = 0.; + weights.r(2) = 0.; + weights.r(3) = 0.; sd::ops::sigm_cross_entropy_loss_grad op; @@ -3344,9 +3344,9 @@ TEST_F(DeclarableOpsTests11, sigm_cross_entropy_loss_grad_test13) { logits.linspace(-0.08, 0.04); labels.linspace(1); weights.assign(0.5); - weights.t(0) = 0.; - weights.t(1) = 0.; - weights.t(2) = 0.; + weights.r(0) = 0.; + weights.r(1) = 0.; + weights.r(2) = 0.; sd::ops::sigm_cross_entropy_loss_grad op; auto results = op.evaluate({&logits, &weights, &labels}, {0.3}, {3}); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp index c37f3fe4a..c7222e6f7 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp @@ -2065,500 +2065,6 @@ TEST_F(DeclarableOpsTests13, lstmLayer_12) { #endif } -/////////////////////////////////////////////////////////////////// -TEST_F(DeclarableOpsTests13, lstmLayer_bp_1) { - - const int sL = 3; - const int bS = 2; - const int nIn = 2; - const int nOut = 3; - - const int dataFormat = 0; // [sL,bS,nIn] - const int directionMode = 0; // forward - const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates - const int cellAct = 0; // tanh activation for cell state - const int outAct = 0; // tanh activation for output - - const bool hasBiases = true; // biases array is provided - const bool hasSeqLen = false; // seqLen array is not provided - const auto hasInitH = true; // initial output is provided - const auto hasInitC = true; // initial cell state is provided - const auto hasPH = true; // peephole connections are absent - const auto retFullSeq = true; // dLdh per each time step - const auto retLastH = true; // output at last time step - const auto retLastC = true; // cells state at last time step - - const double cellClip = 0.5; // clipping - - NDArray x('c', {sL, bS, nIn}, sd::DataType::DOUBLE); - NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); - NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); - NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); - NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); - NDArray dLdh('c', {sL, bS, nOut}, sd::DataType::DOUBLE); - NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); - - x.linspace(-2,0.1); - hI.linspace(-1.5,0.1); - cI.linspace(0.7,-0.1); - Wx.linspace(1,-0.1); - Wr.linspace(-1,0.1); - Wp.linspace(0.2,0.2); - b.linspace(1,-0.15); - - std::vector tArgs = {cellClip}; - std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; - std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; - - const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); - const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - - sd::ops::lstmLayer opFF; - sd::ops::lstmLayer_bp opBP; - - const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP); - - ASSERT_TRUE(isGradCorrect); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(DeclarableOpsTests13, lstmLayer_bp_2) { - - const int sL = 3; - const int bS = 2; - const int nIn = 2; - const int nOut = 3; - - const int dataFormat = 1; // [bS,sL,nIn] - const int directionMode = 0; // forward - const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates - const int cellAct = 0; // tanh activation for cell state - const int outAct = 0; // tanh activation for output - - const bool hasBiases = true; // biases array is provided - const bool hasSeqLen = false; // seqLen array is not provided - const auto hasInitH = true; // initial output is provided - const auto hasInitC = true; // initial cell state is provided - const auto hasPH = true; // peephole connections are absent - const auto retFullSeq = true; // return whole h {h_0, h_1, ... , h_sL-1}, [sL,bS,nOut] - const auto retLastH = false; // output at last time step - const auto retLastC = true; // cells state at last time step - - const double cellClip = 0.5; // clipping - - NDArray x('c', {bS, sL, nIn}, sd::DataType::DOUBLE); - NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); - NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); - NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); - NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); - NDArray dLdh('c', {bS, sL, nOut}, sd::DataType::DOUBLE); - NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); - - x.linspace(-2,0.1); - hI.linspace(-1.5,0.1); - cI.linspace(0.7,-0.1); - Wx.linspace(1,-0.1); - Wr.linspace(-1,0.1); - Wp.linspace(0.2,0.2); - b.linspace(1,-0.15); - - std::vector tArgs = {cellClip}; - std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; - std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; - - const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); - const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); - - sd::ops::lstmLayer opFF; - sd::ops::lstmLayer_bp opBP; - - const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, std::vector(), {0., 1.}, GradCheck::LossFunc::MEAN); - - ASSERT_TRUE(isGradCorrect); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(DeclarableOpsTests13, lstmLayer_bp_3) { - - const int sL = 4; - const int bS = 3; - const int nIn = 3; - const int nOut = 2; - - const int dataFormat = 2; // [bS, nIn, sL] - const int directionMode = 0; // forward - const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates - const int cellAct = 0; // tanh activation for cell state - const int outAct = 0; // tanh activation for output - - const bool hasBiases = true; // biases array is provided - const bool hasSeqLen = true; // seqLen array is not provided - const auto hasInitH = true; // initial output is provided - const auto hasInitC = true; // initial cell state is provided - const auto hasPH = true; // peephole connections are absent - const auto retFullSeq = true; // dLdh per each time step - const auto retLastH = true; // output at last time step - const auto retLastC = true; // cells state at last time step - - const double cellClip = 0.5; // clipping - - NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE); - NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); - NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); - NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); - NDArray seqLen('c', {bS}, {2,0,4}, sd::DataType::DOUBLE); - NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); - NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE); - NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); - - x.linspace(-2,0.1); - hI.linspace(-1.5,0.1); - cI.linspace(0.7,-0.1); - Wx.linspace(1,-0.1); - Wr.linspace(-1,0.1); - Wp.linspace(0.2,0.2); - b.linspace(1,-0.15); - - std::vector tArgs = {cellClip}; - std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; - std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; - - const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); - const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - - sd::ops::lstmLayer opFF; - sd::ops::lstmLayer_bp opBP; - - const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); - - ASSERT_TRUE(isGradCorrect); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(DeclarableOpsTests13, lstmLayer_bp_4) { - - const int sL = 3; - const int bS = 2; - const int nIn = 2; - const int nOut = 3; - - const int dataFormat = 1; // [bS,sL,nIn] - const int directionMode = 1; // backward - const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates - const int cellAct = 0; // tanh activation for cell state - const int outAct = 0; // tanh activation for output - - const bool hasBiases = true; // biases array is provided - const bool hasSeqLen = false; // seqLen array is not provided - const auto hasInitH = true; // initial output is provided - const auto hasInitC = true; // initial cell state is provided - const auto hasPH = true; // peephole connections are absent - const auto retFullSeq = true; // dLdh per each time step - const auto retLastH = true; // output at last time step - const auto retLastC = true; // cells state at last time step - - const double cellClip = 0.5; // clipping - - NDArray x('c', {bS, sL, nIn}, sd::DataType::DOUBLE); - NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); - NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); - NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); - NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); - NDArray dLdh('c', {bS, sL, nOut}, sd::DataType::DOUBLE); - NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); - - x.linspace(-2,0.1); - hI.linspace(-1.5,0.1); - cI.linspace(0.7,-0.1); - Wx.linspace(1,-0.1); - Wr.linspace(-1,0.1); - Wp.linspace(0.2,0.2); - b.linspace(1,-0.15); - - std::vector tArgs = {cellClip}; - std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; - std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; - - const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); - const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - - sd::ops::lstmLayer opFF; - sd::ops::lstmLayer_bp opBP; - - const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP); - - ASSERT_TRUE(isGradCorrect); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(DeclarableOpsTests13, lstmLayer_bp_5) { - - const int sL = 3; - const int bS = 2; - const int nIn = 2; - const int nOut = 2; - - const int dataFormat = 2; // [bS, nIn, sL] - const int directionMode = 1; // backward - const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates - const int cellAct = 0; // tanh activation for cell state - const int outAct = 0; // tanh activation for output - - const bool hasBiases = true; // biases array is provided - const bool hasSeqLen = true; // seqLen array is not provided - const auto hasInitH = true; // initial output is provided - const auto hasInitC = true; // initial cell state is provided - const auto hasPH = true; // peephole connections are absent - const auto retFullSeq = true; // dLdh per each time step - const auto retLastH = true; // output at last time step - const auto retLastC = true; // cells state at last time step - - const double cellClip = 0.5; // clipping - - NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE); - NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); - NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); - NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); - NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); - NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); - NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE); - NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); - NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); - - x.linspace(-2,0.1); - hI.linspace(-1.5,0.1); - cI.linspace(0.7,-0.1); - Wx.linspace(1,-0.1); - Wr.linspace(-1,0.1); - Wp.linspace(0.2,0.2); - b.linspace(1,-0.15); - - std::vector tArgs = {cellClip}; - std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; - std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; - - const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); - const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - - sd::ops::lstmLayer opFF; - sd::ops::lstmLayer_bp opBP; - - const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); - - ASSERT_TRUE(isGradCorrect); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(DeclarableOpsTests13, lstmLayer_bp_6) { - - const int sL = 3; - const int bS = 2; - const int nIn = 2; - const int nOut = 2; - - const int dataFormat = 2; // [bS, nIn, sL] - const int directionMode = 2; // bidirectional sum - const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates - const int cellAct = 0; // tanh activation for cell state - const int outAct = 0; // tanh activation for output - - const bool hasBiases = true; // biases array is provided - const bool hasSeqLen = true; // seqLen array is not provided - const auto hasInitH = true; // initial output is provided - const auto hasInitC = true; // initial cell state is provided - const auto hasPH = true; // peephole connections are absent - const auto retFullSeq = true; // dLdh per each time step - const auto retLastH = true; // output at last time step - const auto retLastC = true; // cells state at last time step - - const double cellClip = 0.5; // clipping - - NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE); - NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE); - NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE); - NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE); - NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); - NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE); - NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE); - NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE); - NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE); - NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE); - NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE); - - x.linspace(-2,0.1); - hI.linspace(-1.5,0.1); - cI.linspace(0.7,-0.1); - Wx.linspace(1,-0.1); - Wr.linspace(-1,0.1); - Wp.linspace(0.2,0.2); - b.linspace(1,-0.15); - - std::vector tArgs = {cellClip}; - std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; - std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; - - const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); - const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs); - - sd::ops::lstmLayer opFF; - sd::ops::lstmLayer_bp opBP; - - const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); - - ASSERT_TRUE(isGradCorrect); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(DeclarableOpsTests13, lstmLayer_bp_7) { - - const int sL = 3; - const int bS = 2; - const int nIn = 2; - const int nOut = 2; - - const int dataFormat = 1; // [bS,sL,nIn] - const int directionMode = 3; // bidirectional concat - const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates - const int cellAct = 0; // tanh activation for cell state - const int outAct = 0; // tanh activation for output - - const bool hasBiases = true; // biases array is provided - const bool hasSeqLen = true; // seqLen array is not provided - const auto hasInitH = true; // initial output is provided - const auto hasInitC = true; // initial cell state is provided - const auto hasPH = true; // peephole connections are absent - const auto retFullSeq = true; // dLdh per each time step - const auto retLastH = true; // output at last time step - const auto retLastC = true; // cells state at last time step - - const double cellClip = 0.5; // clipping - - NDArray x('c', {bS,sL,nIn}, sd::DataType::DOUBLE); - NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE); - NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE); - NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE); - NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); - NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE); - NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE); - NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE); - NDArray dLdh('c', {bS,sL,2*nOut}, sd::DataType::DOUBLE); - NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE); - NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE); - - x.linspace(-2,0.1); - hI.linspace(-1.5,0.1); - cI.linspace(0.7,-0.1); - Wx.linspace(1,-0.1); - Wr.linspace(-1,0.1); - Wp.linspace(0.2,0.2); - b.linspace(1,-0.15); - - std::vector tArgs = {cellClip}; - std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; - std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; - - const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); - const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs); - - sd::ops::lstmLayer opFF; - sd::ops::lstmLayer_bp opBP; - - const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); - - ASSERT_TRUE(isGradCorrect); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(DeclarableOpsTests13, lstmLayer_bp_8) { - - const int sL = 3; - const int bS = 2; - const int nIn = 2; - const int nOut = 2; - - const int dataFormat = 3; // [sL, bS, nIn] - const int directionMode = 4; // bidirectional extra output dim - const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates - const int cellAct = 0; // tanh activation for cell state - const int outAct = 0; // tanh activation for output - - const bool hasBiases = true; // biases array is provided - const bool hasSeqLen = true; // seqLen array is not provided - const auto hasInitH = true; // initial output is provided - const auto hasInitC = true; // initial cell state is provided - const auto hasPH = true; // peephole connections are absent - const auto retFullSeq = true; // dLdh per each time step - const auto retLastH = true; // output at last time step - const auto retLastC = true; // cells state at last time step - - const double cellClip = 0.5; // clipping - - NDArray x('c', {sL, bS, nIn}, sd::DataType::DOUBLE); - NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE); - NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE); - NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE); - NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); - NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE); - NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE); - NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE); - NDArray dLdh('c', {sL, 2, bS, nOut}, sd::DataType::DOUBLE); - NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE); - NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE); - - x.linspace(-2,0.1); - hI.linspace(-1.5,0.1); - cI.linspace(0.7,-0.1); - Wx.linspace(1,-0.1); - Wr.linspace(-1,0.1); - Wp.linspace(0.2,0.2); - b.linspace(1,-0.15); - - std::vector tArgs = {cellClip}; - std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; - std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; - - const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); - const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs); - // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs); - - sd::ops::lstmLayer opFF; - sd::ops::lstmLayer_bp opBP; - - const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); - - ASSERT_TRUE(isGradCorrect); -} - //////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests13, batchnorm_test1) { diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp index 3d86cd92b..e01900e87 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp @@ -1923,7 +1923,6 @@ TEST_F(DeclarableOpsTests15, TestTensorMmul_BP17) { ASSERT_TRUE(isGradCorrect); } - ////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests15, gru_1) { @@ -1960,31 +1959,67 @@ TEST_F(DeclarableOpsTests15, gru_1) { } ////////////////////////////////////////////////////////////////////// -TEST_F(DeclarableOpsTests15, gru_bp_1) { +TEST_F(DeclarableOpsTests15, sqrtm_1) { - const int sL = 3; - const int bS = 2; - const int nIn = 5; - const int nOut = 4; + NDArray x1('c', {1,1}, {4.}, sd::DataType::DOUBLE); + NDArray x2('c', {2,2}, {1.3,2,0.3,.5}, sd::DataType::DOUBLE); + NDArray x3('c', {3,3}, {0.5 ,-0.4 ,1.2 ,-2.8 ,-0.2 ,-2.1 ,-2.4 ,-2.0 ,1.1}, sd::DataType::DOUBLE); + NDArray x4('c', {4,4}, {0.33 ,-7.25 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,7.59 ,3.44 ,2.24 ,-6.82 ,-1.15 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE); + NDArray x5('c', {5,5}, {2.4 ,0.3 ,0.0 ,1.1 ,1.8 ,0.1 ,1.7 ,2.7 ,1.5 ,2.6 ,0.6 ,2.1 ,2.2 ,1.0 ,0.2 ,1.2 ,2.8 ,1.9 ,0.8 ,2.0 ,0.5 ,1.6 ,0.9 ,1.4 ,2.5}, sd::DataType::DOUBLE); + NDArray exp1('c', {1,1}, {2.}, sd::DataType::DOUBLE); + NDArray exp2('c', {2,2}, {1.0163674, 1.3341597,0.200124, 0.4827035}, sd::DataType::DOUBLE); + NDArray exp3('c', {3,3}, {6.5692188, 2.6273616,-0.1387864,-16.8404762,-7.0296495, 0.9204148,-11.4664296,-5.834273 , 2.2087478}, sd::DataType::DOUBLE); + NDArray exp4('c', {4,4}, {1.161387 ,-1.9343154, 0.230372 , 0.8660897,0.80588 , 3.4045446,-1.0152824,-2.0369467,2.2589629, 1.9674252, 1.5109997,-1.4283141,0.0226356, 1.3032279,-1.00396 , 1.8278487}, sd::DataType::DOUBLE); + NDArray exp5('c', {5,5}, {1.4175046,-0.4425298, 0.1846149, 0.3166522, 0.9140631,-0.1929139, 0.2889113, 1.4045273, 0.2600026, 1.552021 , 0.1372758, 0.5703854, 1.3336126, 0.3869317,-0.082492 , + 0.8607272, 3.1792474,-0.9499947, 0.8541668,-1.4243879, 0.0081136,-0.0622248, 0.4534325, 0.4641865, 1.8132138}, sd::DataType::DOUBLE); - NDArray x('c', {sL, bS, nIn}, {0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. , 6.5, 7. , 7.5, 8. , 8.5, 9. , 9.5, 10. , 10.5, 11. , 11.5, 12. , 12.5, 13. , 13.5, 14. , 14.5, 15.}, sd::DataType::DOUBLE); - NDArray hI('c', {bS, nOut}, {-3,-2,-1,0,1,2,3,4}, sd::DataType::DOUBLE); - NDArray Wx('c', {nIn, 3*nOut}, sd::DataType::DOUBLE); - NDArray Wh('c', {nOut, 3*nOut}, sd::DataType::DOUBLE); - NDArray b('c', {3*nOut}, sd::DataType::DOUBLE); + sd::ops::sqrtm op; - NDArray dLdh('c', {sL, bS, nOut}, sd::DataType::DOUBLE); + auto results = op.evaluate({&x1}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + ASSERT_TRUE(exp1.isSameShape(results.at(0))); + ASSERT_TRUE(exp1.equalsTo(results.at(0))); - Wx.linspace(1,-0.1); - Wh.linspace(0.2,0.2); - b.linspace(1,-0.15); + results = op.evaluate({&x2}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + ASSERT_TRUE(exp2.isSameShape(results.at(0))); + ASSERT_TRUE(exp2.equalsTo(results.at(0))); - const OpArgsHolder argsHolderFF({&x, &hI, &Wx, &Wh, &b}, {}, {}); - const OpArgsHolder argsHolderBP({&x, &hI, &Wx, &Wh, &b, &dLdh}, {}, {}); + results = op.evaluate({&x3}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + ASSERT_TRUE(exp3.isSameShape(results.at(0))); + ASSERT_TRUE(exp3.equalsTo(results.at(0))); - sd::ops::gru opFF; - sd::ops::gru_bp opBP; + results = op.evaluate({&x4}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + ASSERT_TRUE(exp4.isSameShape(results.at(0))); + ASSERT_TRUE(exp4.equalsTo(results.at(0))); - const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP); + results = op.evaluate({&x5}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + ASSERT_TRUE(exp5.isSameShape(results.at(0))); + ASSERT_TRUE(exp5.equalsTo(results.at(0))); +} + +////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests15, sqrtm_2) { + + NDArray x('c', {10,10}, {-0.3 ,2.7 ,4.9 ,7.0 ,7.3 ,-1.3 ,0.5 ,9.9 ,-9.4 ,8.4 ,2.2 ,5.2 ,7.6 ,1.2 ,2.0 ,-3.8 ,2.1 ,6.1 ,1.6 ,6.9 ,5.1 ,5.3 ,6.4 ,8.7 ,0.1 ,8.5 , + 3.3 ,1.0 ,6.8 ,0.4 ,0.7 ,3.2 ,7.4 ,6.7 ,1.1 ,7.2 ,6.0 ,7.5 ,9.7 ,5.4 ,9.0 ,6.3 ,0.0 ,4.5 ,8.3 ,7.9 ,3.0 ,6.5 ,0.6 ,8.0 ,9.5 ,3.6 ,1.9 ,6.2 ,0.9 ,4.0 ,4.1 , + 8.1 ,3.9 ,4.3 ,4.7 ,3.7 ,3.4 ,5.8 ,10.0 ,8.6 ,9.3 ,9.1 ,4.6 ,1.4 ,7.8 ,1.5 ,7.7 ,4.2 ,9.6 ,8.2 ,-7.1 ,5.7 ,5.5 ,2.6 ,8.8 ,2.9 ,0.2 ,5.6 ,-2.5 ,8.9 ,2.8 ,0.8 ,1.5 ,3.1 ,3.5 ,4.4 ,2.4 ,9.2 ,-4.8 ,1.7 ,6.6 ,9.8 ,1.8 ,5.9}, sd::DataType::DOUBLE); + + NDArray expZ('c', {10,10}, {1.2779038, 0.0333321, 0.8215617, 0.5736392, 1.3973911, -1.1757741,0.1990005, 1.5893778, -3.0159568, 2.5829108,0.5692253, 2.219431 , 1.022612 , -0.3131795, -0.1957848, -1.7805065, + 0.6668489, 1.1968921, 0.9781974, 1.2007764,0.7028634, 0.7496937, 2.2511438, 2.1945378, 0.2559353, 2.8948612,-0.4306994, -0.9922216, 0.3884369, -1.4174481, + -1.6060233, 0.1571057, 1.432471 , 0.4508346, 0.0618069, -2.4511742,2.0641709, 2.4751085, 1.84787 , 3.4146313,0.7774219, 0.768369 , -0.1417226, -0.3970577, 2.9512879, 0.5474537, + 0.4991412, 0.7604095, 0.4523091, 1.7813704,2.5998339, 0.9402402, -0.82775 , 2.3637147, -0.6394584, 4.6181937,-0.1762181, -0.2820475, 0.9280713, -2.1876918, + 0.1576249, 0.336376 , 0.2017592, 0.851786 , 1.3542577, 1.2752901,2.9718476, 1.1102557, 0.0067319, -0.2652283,0.8839235, -0.2637131, 1.5687876, 0.5156139, 1.9015886, 0.9087172, + -1.5607482, 2.4216275, 1.0399745, -0.4930439,1.3044354, 0.1690006, 0.2106909, -0.2683631, -0.4193939, 1.0233265,0.4571777, -0.2024148, 2.3564855, 1.0442339, + 1.1073322, 1.0728525, -0.5917566, 2.2267418, -1.6096582, 2.0685315,0.6800798, 0.4451858, -0.4048465, 1.2347676}, sd::DataType::DOUBLE); + sd::ops::sqrtm op; + + auto results = op.evaluate({&x}, {}, {}); + ASSERT_EQ(ND4J_STATUS_OK, results.status()); + ASSERT_TRUE(expZ.isSameShape(results.at(0))); + ASSERT_TRUE(expZ.equalsTo(results.at(0))); } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp index f111a888a..5f1aefe36 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp @@ -241,6 +241,7 @@ TEST_F(DeclarableOpsTests19, test_threshold_encode_decode) { ASSERT_EQ(exp, initial); } +#ifdef _RELEASE TEST_F(DeclarableOpsTests19, test_threshold_encode_decode_2) { // [2,1,135079944,1,1,8192,1,99] auto initial = NDArrayFactory::create('c', {1, 135079944}); @@ -287,6 +288,7 @@ TEST_F(DeclarableOpsTests19, test_threshold_encode_decode_2) { ASSERT_EQ(exp, initial); } +#endif diff --git a/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp b/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp index e25bd0144..fae8c4918 100644 --- a/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp +++ b/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp @@ -45,61 +45,41 @@ public: }; -#ifndef __CUDABLAS__ - -TEST_F(HelpersTests1, test_binary_search_1) { - std::array array = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - - auto idx = sd::ops::helpers::binarySearch(array.data(), 2, 10); - ASSERT_EQ(2, idx); -} - -TEST_F(HelpersTests1, test_binary_search_2) { - std::array array = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - - auto idx = sd::ops::helpers::binarySearch(array.data(), 18, 10); - ASSERT_EQ(-1, idx); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, evalHHmatrix_test1) { +// /////////////////////////////////////////////////////////////////// +// TEST_F(HelpersTests1, evalHHmatrix_test1) { - auto x = NDArrayFactory::create('c', {1,4}, {14,17,3,1}); - auto exp = NDArrayFactory::create('c', {4,4}, {-0.629253, -0.764093, -0.13484, -0.0449467, -0.764093, 0.641653, -0.0632377, -0.0210792, -0.13484,-0.0632377, 0.98884,-0.00371987, -0.0449467,-0.0210792,-0.00371987, 0.99876}); +// auto x = NDArrayFactory::create('c', {4}, {14,17,3,1}); +// auto exp = NDArrayFactory::create('c', {4,4}, {-0.629253, -0.764093, -0.13484, -0.0449467, -0.764093, 0.641653, -0.0632377, -0.0210792, -0.13484,-0.0632377, 0.98884,-0.00371987, -0.0449467,-0.0210792,-0.00371987, 0.99876}); - auto result = ops::helpers::Householder::evalHHmatrix(x); - ASSERT_TRUE(result.isSameShape(&exp)); - ASSERT_TRUE(result.equalsTo(&exp)); +// auto result = ops::helpers::Householder::evalHHmatrix(x); +// ASSERT_TRUE(result.isSameShape(&exp)); +// ASSERT_TRUE(result.equalsTo(&exp)); -} +// } -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, evalHHmatrix_test2) { +// /////////////////////////////////////////////////////////////////// +// TEST_F(HelpersTests1, evalHHmatrix_test2) { - #ifdef __CUDABLAS__ - return; - #endif - auto x = NDArrayFactory::create('c', {1,3}, {14,-4,3}); - auto exp = NDArrayFactory::create('c', {3,3}, {-0.941742, 0.269069,-0.201802, 0.269069, 0.962715,0.0279639, -0.201802,0.0279639, 0.979027}); +// #ifdef __CUDABLAS__ +// return; +// #endif +// auto x = NDArrayFactory::create('c', {3}, {14,-4,3}); +// auto exp = NDArrayFactory::create('c', {3,3}, {-0.941742, 0.269069,-0.201802, 0.269069, 0.962715,0.0279639, -0.201802,0.0279639, 0.979027}); - auto result = ops::helpers::Householder::evalHHmatrix(x); +// auto result = ops::helpers::Householder::evalHHmatrix(x); - ASSERT_TRUE(result.isSameShape(&exp)); - ASSERT_TRUE(result.equalsTo(&exp)); - -} +// ASSERT_TRUE(result.isSameShape(&exp)); +// ASSERT_TRUE(result.equalsTo(&exp)); +// } ///////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, evalHHmatrixData_test1) { - #ifdef __CUDABLAS__ - return; - #endif - auto x = NDArrayFactory::create('c', {1,4}, {14,17,3,1}); - auto tail = NDArrayFactory::create('c', {1,3}); - auto expTail = NDArrayFactory::create('c', {1,3}, {0.468984, 0.0827618, 0.0275873}); + auto x = NDArrayFactory::create('c', {4}, {14,17,3,1}); + auto tail = NDArrayFactory::create('c', {3}); + auto expTail = NDArrayFactory::create('c', {3}, {0.468984, 0.0827618, 0.0275873}); const double normXExpected = -22.2486; const double coeffExpected = 1.62925; @@ -110,34 +90,24 @@ TEST_F(HelpersTests1, evalHHmatrixData_test1) { ASSERT_NEAR(coeff, coeffExpected, 1e-5); ASSERT_TRUE(tail.isSameShapeStrict(expTail)); ASSERT_TRUE(tail.equalsTo(&expTail)); - } - ///////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, Householder_mulLeft_test1) { - #ifdef __CUDABLAS__ - return; - #endif auto x = NDArrayFactory::create('c', {4,4}, {12 ,19 ,14 ,3 ,10 ,4 ,17 ,19 ,19 ,18 ,5 ,3 ,6 ,4 ,2 ,16}); auto tail = NDArrayFactory::create('c', {1,3}, {0.5,0.5,0.5}); auto exp = NDArrayFactory::create('c', {4,4}, {9.05,15.8,11.4, 0.8, 8.525, 2.4,15.7,17.9, 17.525,16.4, 3.7, 1.9, 4.525, 2.4, 0.7,14.9}); ops::helpers::Householder::mulLeft(x, tail, 0.1); - // expTail.printShapeInfo(); ASSERT_TRUE(x.isSameShapeStrict(exp)); ASSERT_TRUE(x.equalsTo(&exp)); - } ///////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, Householder_mulLeft_test2) { - #ifdef __CUDABLAS__ - return; - #endif auto x = NDArrayFactory::create('c', {4,4}, {12 ,19 ,14 ,3 ,10 ,4 ,17 ,19 ,19 ,18 ,5 ,3 ,6 ,4 ,2 ,16}); auto tail = NDArrayFactory::create('c', {3,1}, {0.5,0.5,0.5}); auto exp = NDArrayFactory::create('c', {4,4}, {9.05,15.8,11.4, 0.8, 8.525, 2.4,15.7,17.9, 17.525,16.4, 3.7, 1.9, 4.525, 2.4, 0.7,14.9}); @@ -152,9 +122,6 @@ TEST_F(HelpersTests1, Householder_mulLeft_test2) { ///////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, Householder_mulRight_test1) { - #ifdef __CUDABLAS__ - return; - #endif auto x = NDArrayFactory::create('c', {4,4}, {12 ,19 ,14 ,3 ,10 ,4 ,17 ,19 ,19 ,18 ,5 ,3 ,6 ,4 ,2 ,16}); auto tail = NDArrayFactory::create('c', {1,3}, {0.5,0.5,0.5}); auto exp = NDArrayFactory::create('c', {4,4}, {9,17.5,12.5, 1.5, 7, 2.5,15.5, 17.5, 15.8,16.4, 3.4, 1.4, 4.3,3.15,1.15,15.15}); @@ -163,16 +130,11 @@ TEST_F(HelpersTests1, Householder_mulRight_test1) { ASSERT_TRUE(x.isSameShapeStrict(exp)); ASSERT_TRUE(x.equalsTo(&exp)); - } - ///////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, BiDiagonalizeUp_test1) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {4,4}, {9,13,3,6,13,11,7,6,3,7,4,7,6,6,7,10}); auto hhMatrixExp = NDArrayFactory::create('c', {4,4}, {1.524000, 1.75682,0.233741,0.289458, 0.496646, 1.5655, 1.02929,0.971124, 0.114611,-0.451039, 1.06367,0, 0.229221,-0.272237,0.938237,0}); auto hhBidiagExp = NDArrayFactory::create('c', {4,4}, {-17.1756, 24.3869, 0, 0, 0,-8.61985,-3.89823, 0, 0, 0, 4.03047,4.13018, 0, 0, 0,1.21666}); @@ -189,15 +151,11 @@ TEST_F(HelpersTests1, BiDiagonalizeUp_test1) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, BiDiagonalizeUp_test2) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12}); auto hhMatrixExp = NDArrayFactory::create('c', {5,4}, {1.52048, 1.37012, 0.636326, -0.23412, 0.494454, 1.66025, 1.66979,-0.444696, 0.114105,0.130601, 1.58392, 0, -0.22821, 0.215638,0.0524781, 1.99303, 0.0760699,0.375605, 0.509835,0.0591568}); auto hhBidiagExp = NDArrayFactory::create('c', {4,4}, {-17.2916,7.03123, 0, 0, 0, 16.145,-22.9275, 0, 0, 0, -9.9264,-11.5516, 0, 0, 0,-12.8554}); ops::helpers::BiDiagonalUp object(matrix); - // object._HHmatrix.printBuffer(); ASSERT_TRUE(hhMatrixExp.isSameShapeStrict(object._HHmatrix)); ASSERT_TRUE(hhMatrixExp.equalsTo(&object._HHmatrix)); @@ -208,9 +166,6 @@ TEST_F(HelpersTests1, BiDiagonalizeUp_test2) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, BiDiagonalizeUp_test3) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {6,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12, 0,-15,10,2}); auto hhMatrixExp = NDArrayFactory::create('c', {6,4}, {1.52048, 1.37012, 0.636326, -0.23412, 0.494454, 1.65232, 1.59666,-0.502606, 0.114105, 0.129651, 1.35075, 0, -0.22821, 0.214071, 0.103749, 1.61136, 0.0760699, 0.372875, 0.389936, 0.2398, 0,0.0935171,-0.563777, 0.428587}); auto hhBidiagExp = NDArrayFactory::create('c', {4,4}, {-17.2916,7.03123, 0, 0, 0,16.3413,-20.7828, 0, 0, 0,-18.4892,4.13261, 0, 0, 0,-21.323}); @@ -227,9 +182,6 @@ TEST_F(HelpersTests1, BiDiagonalizeUp_test3) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test1) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12}); auto vectorsUseqExp = NDArrayFactory::create('c', {5,4}, {1.52048, 1.37012, 0.636326, -0.23412, 0.494454, 1.66025, 1.66979,-0.444696, 0.114105,0.130601, 1.58392, 0, -0.22821,0.215638,0.0524781, 1.99303, 0.0760699,0.375605, 0.509835,0.0591568}); auto vectorsVseqExp = NDArrayFactory::create('c', {5,4}, {1.52048, 1.37012, 0.636326, -0.23412, 0.494454, 1.66025, 1.66979,-0.444696, 0.114105,0.130601, 1.58392, 0, -0.22821,0.215638,0.0524781, 1.99303, 0.0760699,0.375605, 0.509835,0.0591568}); @@ -254,9 +206,6 @@ TEST_F(HelpersTests1, HHsequence_test1) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test2) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {6,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12 ,0,-15,10,2}); auto vectorsUseqExp = NDArrayFactory::create('c', {6,4}, {1.52048, 1.37012, 0.636326, -0.23412, 0.494454, 1.65232, 1.59666,-0.502606, 0.114105, 0.129651, 1.35075, 0, -0.22821, 0.214071, 0.103749, 1.61136, 0.0760699, 0.372875, 0.389936, 0.2398, 0,0.0935171,-0.563777, 0.428587}); auto vectorsVseqExp = NDArrayFactory::create('c', {6,4}, {1.52048, 1.37012, 0.636326, -0.23412, 0.494454, 1.65232, 1.59666,-0.502606, 0.114105, 0.129651, 1.35075, 0, -0.22821, 0.214071, 0.103749, 1.61136, 0.0760699, 0.372875, 0.389936, 0.2398, 0,0.0935171,-0.563777, 0.428587}); @@ -281,9 +230,6 @@ TEST_F(HelpersTests1, HHsequence_test2) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test3) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {4,4}, {9,13,3,6, 13,11,7,6, 3,7,4,7, 6,6,7,10}); auto vectorsUseqExp = NDArrayFactory::create('c', {4,4}, {1.524, 1.75682,0.233741,0.289458, 0.496646, 1.5655, 1.02929,0.971124, 0.114611,-0.451039, 1.06367, 0, 0.229221,-0.272237,0.938237, 0}); auto vectorsVseqExp = NDArrayFactory::create('c', {4,4}, {1.524, 1.75682,0.233741,0.289458, 0.496646, 1.5655, 1.02929,0.971124, 0.114611,-0.451039, 1.06367, 0, 0.229221,-0.272237,0.938237, 0}); @@ -308,9 +254,6 @@ TEST_F(HelpersTests1, HHsequence_test3) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test4) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {4,4}, {9,13,3,6, 13,11,7,6, 3,7,4,7, 6,6,7,10}); auto exp = NDArrayFactory::create('c', {4,4}, {2.49369, 2.62176, 5.88386, 7.69905, -16.0588,-18.7319,-9.15007,-12.6164, 4.7247, 3.46252, 1.02038, -1.4533, 2.9279,-2.29178, 1.90139,-0.66187}); @@ -325,9 +268,6 @@ TEST_F(HelpersTests1, HHsequence_test4) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test5) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12}); auto exp = NDArrayFactory::create('c', {5,4}, {4.52891, 8.09473,-2.73704,-13.0302, -11.0752, 7.41549,-3.75125,0.815252, -7.76818,-15.9102,-9.90869,-11.8677, 1.63942,-17.0312,-9.05102,-4.49088, -9.63311,0.540226,-1.52764, 5.79111}); @@ -342,9 +282,6 @@ TEST_F(HelpersTests1, HHsequence_test5) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test6) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12}); auto matrix2 = NDArrayFactory::create('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15}); auto exp = NDArrayFactory::create('c', {6,4}, {9,-1,3,9, -4.43019,-15.1713, -3.2854,-7.65743, -9.39162,-7.03599, 8.03827, 9.48453, -2.97785, -16.424, 5.35265,-20.1171, -0.0436177, -13.118,-8.37287,-17.3012, -1.14074, 4.18282,-10.0914,-5.69014}); @@ -360,9 +297,6 @@ TEST_F(HelpersTests1, HHsequence_test6) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test7) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {4,4}, {9,13,3,6, 13,11,7,6, 3,7,4,7, 6,6,7,10}); auto exp = NDArrayFactory::create('c', {4,4}, {9,13,3,6,-5.90424,-2.30926,-0.447417, 3.05712, -10.504,-9.31339, -8.85493,-10.8886, -8.29494,-10.6737, -5.94895,-7.55591}); @@ -376,9 +310,6 @@ TEST_F(HelpersTests1, HHsequence_test7) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test8) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12}); auto exp = NDArrayFactory::create('c', {5,4}, {9, -13, 3, 6, 13, 11, 7, -6, -6.90831,-5.01113, 0.381677,0.440128, -0.80107,0.961605,-0.308019,-1.96153, -0.795985, 18.6538, 12.0731, 16.9988}); @@ -392,9 +323,6 @@ TEST_F(HelpersTests1, HHsequence_test8) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test9) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {6,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12 ,0,-15,10,2}); auto exp = NDArrayFactory::create('c', {6,4}, {9, -13, 3, 6, 13, 11, 7, -6, 3, 7, 4, 7, 3.77597, 18.6226,-0.674868, 4.61365, 5.02738,-14.1486, -2.22877,-8.98245, -0.683766, 1.73722, 14.9859, 12.0843}); @@ -408,9 +336,6 @@ TEST_F(HelpersTests1, HHsequence_test9) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test10) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {4,4}, {9,13,3,6, 13,11,7,6, 3,7,4,7, 6,6,7,10}); auto matrix2 = NDArrayFactory::create('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15}); auto exp = NDArrayFactory::create('c', {6,4}, {9, -1, 3, 9, 10, 11, -7, -5, 3, 2, 4, 7, 2.58863, 11.0295,-4.17483,-0.641012, -1.21892,-16.3151, 6.12049, -20.0239, -0.901799,-15.0389,-12.4944, -20.2394}); @@ -425,9 +350,6 @@ TEST_F(HelpersTests1, HHsequence_test10) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test11) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,4}, {9,-13,3,6, 13,11,7,-6, 3,7,4,7, -6,6,7,10, 2,17,9,12}); auto matrix2 = NDArrayFactory::create('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15}); auto exp = NDArrayFactory::create('c', {6,4}, {9, -1, 3, 9, 10, 11, -7, -5, 3, 2, 4, 7, 1.14934, 4.40257, 8.70127,-1.18824, 1.5132,0.220419,-11.6285,-11.7549, 2.32148, 24.3838,0.256531, 25.9116}); @@ -442,9 +364,6 @@ TEST_F(HelpersTests1, HHsequence_test11) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test12) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,3}, {9,-13,3, 13,11,7, 3,7,4, -6,6,7, 2,17,9}); auto matrix2 = NDArrayFactory::create('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15}); auto exp = NDArrayFactory::create('c', {6,4}, {9, -1, 3, 9, 10, 11, -7, -5, 3, 2, 4, 7, -1, 6, 7, 19, -2.62252,-22.2914, 4.76743,-19.6689, -1.05943,-9.00514,-11.8013,-7.94571}); @@ -459,9 +378,6 @@ TEST_F(HelpersTests1, HHsequence_test12) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test13) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,3}, {9,-13,3, 13,11,7, 3,7,4, -6,6,7, 2,17,9}); auto matrix2 = NDArrayFactory::create('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15}); auto exp = NDArrayFactory::create('c', {6,4}, {9 , -1 , 3 , 9, -4.65167, 3.44652, 7.83593, 22.6899, -9.48514, -21.902, 5.66559,-13.0533, -0.343184, 15.2895, 7.2888, 14.0489, 0.289638,-1.87752, 3.944,-1.49707, -2.48845, 3.18285,-10.6685,0.406502}); @@ -476,9 +392,6 @@ TEST_F(HelpersTests1, HHsequence_test13) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test14) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,3}, {9,-13,3, 13,11,7, 3,7,4, -6,6,7, 2,17,9}); auto matrix2 = NDArrayFactory::create('c',{5,5}, {9,-1,3,9,10, 11,-7,-5,3, 2, 4,7,-1,6,7, 19,2,17,9,15, 2,17,-9,15,2}); auto exp = NDArrayFactory::create('c', {5,5}, {1.78958, 8.06962,-6.13687, 4.36267, 1.06472, -14.9578, -8.1522, 1.30442,-18.3343,-13.2578, 13.5536, 5.50764, 15.7859, 7.60831, 11.7871, -1.3626,-0.634986, 7.60934, -2.1841, 5.62694, -13.0577, 15.1554, -7.6511, 3.76365,-5.87368}); @@ -494,9 +407,6 @@ TEST_F(HelpersTests1, HHsequence_test14) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test15) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,3}, {9,-13,3, 13,11,7, 3,7,4, -6,6,7, 2,17,9}); auto matrix2 = NDArrayFactory::create('c',{5,5}, {9,-1,3,9,10, 11,-7,-5,3, 2, 4,7,-1,6,7, 19,2,17,9,15, 2,17,-9,15,2}); auto exp = NDArrayFactory::create('c', {5,5}, {9, -1, 3, 9, 10, 11, -7, -5, 3, 2, 4, 7, -1, 6, 7, -9.26566,-16.4298, 1.64125,-17.3243,-7.70257, -16.7077, 4.80216,-19.1652,-2.42279,-13.0258}); @@ -511,9 +421,6 @@ TEST_F(HelpersTests1, HHsequence_test15) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test16) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {9,-1,3,9,10, 11,-7,-5,3, 2, 4,7,-1,6,7, 19,2,17,9,15, 2,17,-9,15,2}); auto matrix2 = NDArrayFactory::create('c', {10,10}); matrix2 = 100.; @@ -529,9 +436,6 @@ TEST_F(HelpersTests1, HHsequence_test16) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test17) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {9,-1,3,9,10, 11,-7,-5,3, 2, 4,7,-1,6,7, 19,2,17,9,15, 2,17,-9,15,2}); auto matrix2 = NDArrayFactory::create('c', {10,10}); matrix2 = 100.; @@ -547,9 +451,6 @@ TEST_F(HelpersTests1, HHsequence_test17) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test18) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15}); auto matrix2 = NDArrayFactory::create('c', {10,10}); matrix2 = 100.; @@ -565,9 +466,6 @@ TEST_F(HelpersTests1, HHsequence_test18) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, HHsequence_test19) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c',{6,4}, {9,-1,3,9, 10,11,-7,-5, 3,2,4,7, -1,6,7,19, 2,17,9,15, 2,17,-9,15}); auto matrix2 = NDArrayFactory::create('c', {10,10}); matrix2 = 100.; @@ -581,305 +479,48 @@ TEST_F(HelpersTests1, HHsequence_test19) { } /////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test1) { +TEST_F(HelpersTests1, HHcolPivQR_1) { - #ifdef __CUDABLAS__ - return; - #endif - auto matrix = NDArrayFactory::create('c', {5,5}, {-17 ,14 ,9 ,-12 ,-12 ,5 ,-4 ,-19 ,-7 ,-12 ,15 ,16 ,17 ,-6 ,8 ,-10 ,14 ,-15 ,6 ,-10 ,-14 ,12 ,-1 ,-16 ,3}); - auto matrix2 = NDArrayFactory::create('c', {5,5}, {18 ,3 ,2 ,7 ,-11 ,7 ,7 ,10 ,-13 ,-8 ,13 ,20 ,-4 ,-16 ,-9 ,-17 ,-5 ,-7 ,-19 ,-8 ,-9 ,9 ,6 ,14 ,-11}); - auto expM = NDArrayFactory::create('c', {5,5}, {-17,14,9,-12,-12, 5,-4, -19, -7,-12, 15,16,17.0294, -6, 8, -10,14, -15, 6,-10, -14,12, 0,-16, 0}); - auto expU = NDArrayFactory::create('c', {5,5}, {18,3, 2,7,-11, 7, 7.75131,10,-12.5665, -8, 13, 20.905,-4,-14.7979, -9, -17,-3.87565,-7,-19.2608, -8, -9, 9, 6, 14,-11}); + auto matrix1 = NDArrayFactory::create('c', {5,6}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); - ops::helpers::SVD svd(matrix, 4, true, true, true, 't'); - svd._m = matrix; - svd._u = matrix2; - svd.deflation1(1,1,2,2); + auto expQR = NDArrayFactory::create('c', {5,6}, {-32.6649659, -4.9594419, -8.2657365, 7.2248659, 16.5927006, 11.7251002, -0.1354883, -29.0586293, 10.9775804, -14.6886248, 4.1884104, 20.7115773, 0.3483986, 0.3236753, 25.5376258, 1.6432380, 9.6395914, -9.0237996, -0.0580664, 0.0798999, -0.0799029, 19.5280665, -4.9773587, 16.0968604, 0.3483986, -0.6667832, 0.0252425, 0.0159188, 10.6978354, -4.6919842}); + auto expCoeffs = NDArrayFactory::create('c', {1,5}, {1.58166, 1.28555, 1.98605, 1.99949, 0}); + auto expPermut = NDArrayFactory::create('c', {6,6}, {0,1,0,0,0,0, 0,0,1,0,0,0, 1,0,0,0,0,0, 0,0,0,0,0,1, 0,0,0,0,1,0, 0,0,0,1,0,0}); - ASSERT_TRUE(expM.equalsTo(&svd._m)); - ASSERT_TRUE(expU.equalsTo(&svd._u)); + ops::helpers::HHcolPivQR qr(matrix1); + + ASSERT_TRUE(expQR.equalsTo(&qr._qr)); + ASSERT_TRUE(expCoeffs.equalsTo(&qr._coeffs)); + ASSERT_TRUE(expPermut.equalsTo(&qr._permut)); + + ASSERT_TRUE(expQR.isSameShapeStrict(qr._qr)); + ASSERT_TRUE(expCoeffs.isSameShapeStrict(qr._coeffs)); + ASSERT_TRUE(expPermut.isSameShapeStrict(qr._permut)); } /////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test2) { +TEST_F(HelpersTests1, HHcolPivQR_2) { - #ifdef __CUDABLAS__ - return; - #endif - auto matrix= NDArrayFactory::create('c', {5,5}, {-17 ,14 ,9 ,-12 ,-12 ,5 ,-4 ,-19 ,-7 ,-12 ,15 ,16 ,17 ,-6 ,8 ,-10 ,14 ,-15 ,6 ,-10 ,-14 ,12 ,-1 ,-16 ,3}); - auto matrix2 = NDArrayFactory::create('c', {5,5}, {18 ,3 ,2 ,7 ,-11 ,7 ,7 ,10 ,-13 ,-8 ,13 ,20 ,-4 ,-16 ,-9 ,-17 ,-5 ,-7 ,-19 ,-8 ,-9 ,9 ,6 ,14 ,-11}); - auto expM = NDArrayFactory::create('c', {5,5}, {22.6716,14, 9,-12,-12, 5,-4,-19, -7,-12, 0,16, 0, -6, 8, -10,14,-15, 6,-10, -14,12, -1,-16, 3}); - auto expU = NDArrayFactory::create('c', {5,5}, {-12.1738, 3, -13.4089, 7,-11, 1.36735, 7, -12.1297,-13, -8, -12.3944,20, -5.60173,-16, -9, -17,-5,-7,-19, -8, -9, 9, 6, 14,-11}); + auto matrix1 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); - ops::helpers::SVD svd(matrix, 4, true, true, true); - svd._m = matrix; - svd._u = matrix2; - svd.deflation1(0,0,2,2); + auto expQR = NDArrayFactory::create('c', {6,6}, {38.1707, -3.03898, 5.16103, 23.0805, -7.57126, -13.885, -0.41519, 34.3623, 3.77403, 2.62327, -8.17784, 9.10312, 0.394431, 0.509952,-30.2179, -6.78341, 12.8421, 28.5491, -0.290633, 0.111912,0.450367, 28.1139, 15.5195, 2.60562, 0.332152, 0.405161,0.308163,0.0468127, 22.294,-2.94931, 0.249114,0.0627956,0.657873, 0.76767,-0.752594,-7.46986}); + auto expCoeffs = NDArrayFactory::create('c', {1,6}, {1.26198, 1.38824, 1.15567, 1.25667, 1.27682, 0}); + auto expPermut = NDArrayFactory::create('c', {6,6}, {0,0,1,0,0,0, 0,0,0,0,1,0, 0,0,0,1,0,0, 0,1,0,0,0,0, 0,0,0,0,0,1, 1,0,0,0,0,0}); - ASSERT_TRUE(expM.equalsTo(&svd._m)); - ASSERT_TRUE(expU.equalsTo(&svd._u)); + ops::helpers::HHcolPivQR qr(matrix1); + + ASSERT_TRUE(expQR.equalsTo(&qr._qr)); + ASSERT_TRUE(expCoeffs.equalsTo(&qr._coeffs)); + ASSERT_TRUE(expPermut.equalsTo(&qr._permut)); + + ASSERT_TRUE(expQR.isSameShapeStrict(qr._qr)); + ASSERT_TRUE(expCoeffs.isSameShapeStrict(qr._coeffs)); + ASSERT_TRUE(expPermut.isSameShapeStrict(qr._permut)); } /////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test3) { +TEST_F(HelpersTests1, HHcolPivQR_3) { - #ifdef __CUDABLAS__ - return; - #endif - auto matrix= NDArrayFactory::create('c', {5,5}, {-17 ,14 ,9 ,-12 ,-12 ,5 ,-4 ,-19 ,-7 ,-12 ,15 ,16 ,17 ,-6 ,8 ,-10 ,14 ,-15 ,6 ,-10 ,-14 ,12 ,-1 ,-16 ,3}); - auto matrix2 = NDArrayFactory::create('c', {2,6}, {18 ,3 ,2 ,7 ,-11 ,7 ,7 ,10 ,-13 ,-8 ,13 ,20}); - auto expM = NDArrayFactory::create('c', {5,5}, {-17,14,9,-12,-12, 5,-4, -19, -7,-12, 15,16,17.0294, -6, 8, -10,14, -15, 6,-10, -14,12, 0,-16, 0}); - auto expU = NDArrayFactory::create('c', {2,6}, {18, 2.58377, 2, 7.16409,-11, 7, 7 ,10.4525 ,-13, -7.39897 ,13 ,20}); - - ops::helpers::SVD svd(matrix, 4, false, true, true, 't'); - svd._m = matrix; - svd._u = matrix2; - svd.deflation1(1,1,2,2); - - ASSERT_TRUE(expM.equalsTo(&svd._m)); - ASSERT_TRUE(expU.equalsTo(&svd._u)); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test4) { - - #ifdef __CUDABLAS__ - return; - #endif - auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); - auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); - auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); - auto expM = NDArrayFactory::create('c', {6,5}, {12, 20, 19,-18, -6, 3, 6, 2, -7, -7, 14, 8, 18,-17, 18, -14,-15,8.06226, 2, 2, -3,-18, 0,-17, 2, 12, 18, 6, -2,-17}); - auto expU = NDArrayFactory::create('c', {6,6}, {-10,-16, -20, 13, 20,-10, -9, -1,-20.7138,4.46525, -4, 20, -11, 19,-18.4812,2.72876, 12,-19, 18,-18, 17, -10,-19, 14, -2, -7, -17, -14, -4,-16, 18, -6, -18, 1,-15,-12}); - auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19, -7, 1, 2,-18, -13, 14, 2, -2,-11,2.97683,-7.69015,-6, -3, -8, 8, -2, 7, 16, 15, -3, 7, 0}); - - ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); - svd._m = matrix1; - svd._u = matrix2; - svd._v = matrix3; - svd.deflation2(1, 2, 2, 1, 1, 2, 1); - - ASSERT_TRUE(expM.equalsTo(&svd._m)); - ASSERT_TRUE(expU.equalsTo(&svd._u)); - ASSERT_TRUE(expV.equalsTo(&svd._v)); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test5) { - - #ifdef __CUDABLAS__ - return; - #endif - auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); - auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); - auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); - auto expM = NDArrayFactory::create('c', {6,5}, {18.4391, 20, 19,-18, -6, 3, 6, 2, -7, -7, 0, 8,18.4391,-17, 18, -14,-15, 1, 2, 2, -3,-18, 8,-17,-19, 12, 18, 6, -2,-17}); - auto expU = NDArrayFactory::create('c', {6,6}, {-10,-16,-20,13, 20,-10, -9,-15.8359, -7,-12.2566, -4, 20, -11,-1.30158, -5,-26.1401, 12,-19, 18,-19.3068, 17, 7.15871,-19, 14, -2, -7,-17, -14, -4,-16, 18, -6,-18, 1,-15,-12}); - auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19, -7, 1, 2,-1.08465,-13,22.7777, 2, -2,-5.64019, 8,9.65341,-6, -3, -8, 8, -2, 7, 16, 15, -3, 7, 0}); - - ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); - svd._m = matrix1; - svd._u = matrix2; - svd._v = matrix3; - svd.deflation2(1, 0, 1, 1, 0, 2, 2); - - ASSERT_TRUE(expM.equalsTo(&svd._m)); - ASSERT_TRUE(expU.equalsTo(&svd._u)); - ASSERT_TRUE(expV.equalsTo(&svd._v)); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test6) { - - #ifdef __CUDABLAS__ - return; - #endif - auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); - auto matrix2 = NDArrayFactory::create('c', {2,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20}); - auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); - auto expM = NDArrayFactory::create('c', {6,5}, {18.4391, 20, 19,-18, -6, 3, 6, 2, -7, -7, 0, 8,18.4391,-17, 18, -14,-15, 1, 2, 2, -3,-18, 8,-17,-19, 12, 18, 6, -2,-17}); - auto expU = NDArrayFactory::create('c', {2,6}, {-10, -0.542326,-20, 20.6084,20,-10, -9, -15.8359, -7,-12.2566,-4, 20}); - auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19, -7, 1, 2,-1.08465,-13,22.7777, 2, -2,-5.64019, 8,9.65341,-6, -3, -8, 8, -2, 7, 16, 15, -3, 7, 0}); - - ops::helpers::SVD svd(matrix3, 4, false, true, true, 't'); - svd._m = matrix1; - svd._u = matrix2; - svd._v = matrix3; - svd.deflation2(1, 0, 1, 1, 0, 2, 2); - - ASSERT_TRUE(expM.equalsTo(&svd._m)); - ASSERT_TRUE(expU.equalsTo(&svd._u)); - ASSERT_TRUE(expV.equalsTo(&svd._v)); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test7) { - - #ifdef __CUDABLAS__ - return; - #endif - auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); - auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); - auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); - - auto expM = NDArrayFactory::create('c', {6,5}, {12, 20, 19,-18, -6, 3, 6, 2, -7, -7, 14, 8,19.6977,-17, 18, -14,-15, 1, 2, 2, -3,-18, 0,-17, 0, 12, 18, 6, -2,-17}); - auto expU = NDArrayFactory::create('c', {6,6}, {-10, -16,-20, 13, 20,-10, -9,-9.03658, -7,-17.8701, -4, 20, -11, 10.0519, -5,-24.1652, 12,-19, 18, -20.51, 17,-1.82762,-19, 14, -2,-12.0826,-17,-9.95039, -4,-16, 18, -6,-18, 1,-15,-12}); - auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19,-7, 1, 2,-18,-13,14, 2, -2,-11, 8, 2,-6, -3, -8, 8,-2, 7, 16, 15, -3, 7, 0}); - - ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); - svd._m = matrix1; - svd._u = matrix2; - svd._v = matrix3; - svd.deflation(1, 3, 1, 1, 2, 1); - - ASSERT_TRUE(expM.equalsTo(&svd._m)); - ASSERT_TRUE(expU.equalsTo(&svd._u)); - ASSERT_TRUE(expV.equalsTo(&svd._v)); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test8) { - - #ifdef __CUDABLAS__ - return; - #endif - auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); - auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); - auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); - - auto expM = NDArrayFactory::create('c', {6,5}, {12, 20,19,-18, -6, 3, 6, 2, -7, -7, 14,-15, 2,-17, 18, -14, 8, 1, 18, 2, -3,-18, 8,-17,-19, 12, 18, 6, -2,-17}); - auto expU = NDArrayFactory::create('c', {6,6}, {-10,-20,-16, 13, 20,-10, -9, -7, -1,-20, -4, 20, -11, -5, 19,-18, 12,-19, 18, 17,-18,-10,-19, 14, -2, -7,-17,-14, -4,-16, 18, -6,-18, 1,-15,-12}); - auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19,-7, 1, 2,-18,-13, 2,14, -2,-11, 8,-6, 2, -3, -8, 8, 7,-2, 16, 15, -3, 7, 0}); - - ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); - svd._m = matrix1; - svd._u = matrix2; - svd._v = matrix3; - svd.deflation(0, 2, 2, 1, 2, 1); - - ASSERT_TRUE(expM.equalsTo(&svd._m)); - ASSERT_TRUE(expU.equalsTo(&svd._u)); - ASSERT_TRUE(expV.equalsTo(&svd._v)); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test9) { - - #ifdef __CUDABLAS__ - return; - #endif - auto col0 = NDArrayFactory::create('c', {10,1}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,14}); - auto diag = NDArrayFactory::create('c', {10,1}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2}); - auto permut = NDArrayFactory::create('c', {1,10}, {8 ,1 ,4 ,0, 5 ,2 ,9 ,3 ,7 ,6}); - auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); - - auto expSingVals = NDArrayFactory::create('c', {10,1}, {-2, 15.304323, 11.2, -1, 1.73489, -12, -15.3043, -12.862, 5.6, 41.4039}); - auto expShifts = NDArrayFactory::create('c', {10,1}, {1, 19, 19, 1, 2, -18, -18, -13, 2, 2}); - auto expMus = NDArrayFactory::create('c', {10,1}, {-3, -3.695677, -7.8, -2, -0.265108, 6, 2.69568, 0.138048, 3.6, 39.4039}); - - auto singVals = NDArrayFactory::create('c', {10,1}); - auto shifts = NDArrayFactory::create('c', {10,1}); - auto mus = NDArrayFactory::create('c', {10,1}); - - ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); - svd.calcSingVals(col0, diag, permut, singVals, shifts, mus); - - ASSERT_TRUE(expSingVals.equalsTo(&singVals)); - ASSERT_TRUE(expShifts.equalsTo(&shifts)); - ASSERT_TRUE(expMus.equalsTo(&mus)); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test10) { - - #ifdef __CUDABLAS__ - return; - #endif - auto singVals = NDArrayFactory::create('c', {4,1}, {1 ,1 ,1 ,1}); - auto col0 = NDArrayFactory::create('c', {4,1}, {1 ,1 ,1 ,1}); - auto diag = NDArrayFactory::create('c', {4,1}, {5 ,7 ,-13 ,14}); - auto permut = NDArrayFactory::create('c', {1,4}, {0 ,2 ,3 ,1 }); - auto mus = NDArrayFactory::create('c', {4,1}, {4,1,4,6}); - auto shifts = NDArrayFactory::create('c', {4,1}, {4,2,5,6}); - auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); - - auto expZhat = NDArrayFactory::create('c', {4,1}, {0, 0.278208, 72.501953, 0}); - - auto zhat = NDArrayFactory::create('c', {4,1}); - - ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); - svd.perturb(col0, diag, permut, singVals, shifts, mus, zhat); - - ASSERT_NEAR(expZhat.e(1), zhat.e(1), EPS); - ASSERT_NEAR(expZhat.e(2), zhat.e(2), EPS); -} - - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test11) { - - #ifdef __CUDABLAS__ - return; - #endif - auto singVals = NDArrayFactory::create('c', {4,1}, {1 ,1 ,1 ,1}); - auto zhat = NDArrayFactory::create('c', {4,1}, {2 ,1 ,2 ,1}); - auto diag = NDArrayFactory::create('c', {4,1}, {5 ,7 ,-13 ,14}); - auto permut = NDArrayFactory::create('c', {1,4}, {0 ,2 ,3 ,1 }); - auto mus = NDArrayFactory::create('c', {4,1}, {4,1,4,6}); - auto shifts = NDArrayFactory::create('c', {4,1}, {4,2,5,6}); - auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); - - auto expU = NDArrayFactory::create('c', {5,5}, {-0.662161, 0.980399,-0.791469,-0.748434, 0, -0.744931, 0.183825,-0.593602,-0.392928, 0, 0.0472972, 0.061275,0.0719517, 0.104781, 0, 0.0662161,0.0356509, 0.126635, 0.523904, 0, 0, 0, 0, 0, 1}); - auto expV = NDArrayFactory::create('c', {4,4}, {-0.745259,-0.965209, -0.899497, -0.892319, -0.652102, 0.21114, -0.39353, -0.156156, -0.0768918,-0.130705,-0.0885868,-0.0773343, 0.115929,0.0818966, 0.167906, 0.416415}); - auto U = NDArrayFactory::create('c', {5,5}); - auto V = NDArrayFactory::create('c', {4,4}); - - - ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); - svd.calcSingVecs(zhat, diag,permut, singVals, shifts, mus, U, V); - - ASSERT_TRUE(expU.equalsTo(&U)); - ASSERT_TRUE(expV.equalsTo(&V)); - -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test12) { - - #ifdef __CUDABLAS__ - return; - #endif - auto matrix1 = NDArrayFactory::create('c', {6,5}, {-2 ,-3 ,2 ,1 ,0 ,0 ,-4 ,5 ,-2 ,-3 ,-4 ,0 ,5 ,-1 ,-5 ,-3 ,-5 ,3 ,3 ,3 ,-5 ,5 ,-5 ,0 ,2 ,-2 ,-3 ,-4 ,-5 ,-3}); - auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); - auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); - auto matrix4 = NDArrayFactory::create('c', {5,5}, {3 ,-8 ,5 ,7 ,-8 ,4 ,-19 ,-12 ,-4 ,-5 ,-11 ,19 ,-2 ,-7 ,1 ,16 ,-5 ,10 ,19 ,-19 ,0 ,-20 ,0 ,-8 ,-13}); - - auto expSingVals = NDArrayFactory::create('c', {4,1}, {8.43282, 5, 2.3, 1.10167}); - auto expU = NDArrayFactory::create('c', {5,5}, {0.401972,0, 0.206791, 0.891995,0, 0,1, 0, 0,0, 0.816018,0,-0.522818,-0.246529,0, -0.415371,0,-0.826982, 0.378904,0, 0,0, 0, 0,1}); - auto expV = NDArrayFactory::create('c', {4,4}, {-0.951851,0,-0.133555,-0.275939, 0,1, 0, 0, 0.290301,0,-0.681937,-0.671333, -0.098513,0,-0.719114, 0.687873}); - - ops::helpers::SVD svd(matrix4, 4, true, true, true, 't'); - svd._m = matrix1; - svd._u = matrix2; - svd._v = matrix3; - NDArray U, singVals, V; - svd.calcBlockSVD(1, 4, U, singVals, V); - - ASSERT_TRUE(expSingVals.equalsTo(&singVals)); - ASSERT_TRUE(expU.equalsTo(&U)); - ASSERT_TRUE(expV.equalsTo(&V)); - - ASSERT_TRUE(expSingVals.isSameShapeStrict(singVals)); - ASSERT_TRUE(expU.isSameShapeStrict(U)); - ASSERT_TRUE(expV.isSameShapeStrict(V)); -} - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test13) { - - #ifdef __CUDABLAS__ - return; - #endif NDArray matrix1('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); auto expQR = NDArrayFactory::create('c', {6,5}, {-37.054 , 0.323852 , 8.04231 , -22.9395 ,-13.089, 0.105164, 32.6021, 6.42277, -0.262898,-1.58766, 0.140218, -0.485058, 29.2073, -9.92301,-23.7111, -0.262909,-0.00866538, 0.103467, 8.55831,-1.86455, -0.315491, 0.539207, 0.40754,-0.0374124,-7.10401, 0.315491, 0.385363,-0.216459, -0.340008,0.628595}); @@ -898,60 +539,10 @@ TEST_F(HelpersTests1, SVD_test13) { } -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test14) { - - #ifdef __CUDABLAS__ - return; - #endif - auto matrix1 = NDArrayFactory::create('c', {5,6}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); - - auto expQR = NDArrayFactory::create('c', {5,6}, {-32.665, -4.95944, -8.26574, 7.22487, 16.5927, 11.7251, -0.135488, -29.0586, 10.9776, -14.6886, 4.18841, 20.7116, 0.348399, 0.323675, 25.5376, 1.64324, 9.63959, -9.0238, -0.0580664,0.0798999,-0.0799029, 19.5281,-4.97736, 16.0969, 0.348399,-0.666783, 0.0252425,0.0159188, 10.6978,-4.69198}); - auto expCoeffs = NDArrayFactory::create('c', {1,5}, {1.58166, 1.28555, 1.98605, 1.99949, 0}); - auto expPermut = NDArrayFactory::create('c', {6,6}, {0,1,0,0,0,0, 0,0,1,0,0,0, 1,0,0,0,0,0, 0,0,0,0,0,1, 0,0,0,0,1,0, 0,0,0,1,0,0}); - - ops::helpers::HHcolPivQR qr(matrix1); - - ASSERT_TRUE(expQR.equalsTo(&qr._qr)); - ASSERT_TRUE(expCoeffs.equalsTo(&qr._coeffs)); - ASSERT_TRUE(expPermut.equalsTo(&qr._permut)); - - ASSERT_TRUE(expQR.isSameShapeStrict(qr._qr)); - ASSERT_TRUE(expCoeffs.isSameShapeStrict(qr._coeffs)); - ASSERT_TRUE(expPermut.isSameShapeStrict(qr._permut)); -} - - -/////////////////////////////////////////////////////////////////// -TEST_F(HelpersTests1, SVD_test15) { - - #ifdef __CUDABLAS__ - return; - #endif - auto matrix1 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); - - auto expQR = NDArrayFactory::create('c', {6,6}, {38.1707, -3.03898, 5.16103, 23.0805, -7.57126, -13.885, -0.41519, 34.3623, 3.77403, 2.62327, -8.17784, 9.10312, 0.394431, 0.509952,-30.2179, -6.78341, 12.8421, 28.5491, -0.290633, 0.111912,0.450367, 28.1139, 15.5195, 2.60562, 0.332152, 0.405161,0.308163,0.0468127, 22.294,-2.94931, 0.249114,0.0627956,0.657873, 0.76767,-0.752594,-7.46986}); - auto expCoeffs = NDArrayFactory::create('c', {1,6}, {1.26198, 1.38824, 1.15567, 1.25667, 1.27682, 0}); - auto expPermut = NDArrayFactory::create('c', {6,6}, {0,0,1,0,0,0, 0,0,0,0,1,0, 0,0,0,1,0,0, 0,1,0,0,0,0, 0,0,0,0,0,1, 1,0,0,0,0,0}); - - ops::helpers::HHcolPivQR qr(matrix1); - - ASSERT_TRUE(expQR.equalsTo(&qr._qr)); - ASSERT_TRUE(expCoeffs.equalsTo(&qr._coeffs)); - ASSERT_TRUE(expPermut.equalsTo(&qr._permut)); - - ASSERT_TRUE(expQR.isSameShapeStrict(qr._qr)); - ASSERT_TRUE(expCoeffs.isSameShapeStrict(qr._coeffs)); - ASSERT_TRUE(expPermut.isSameShapeStrict(qr._permut)); -} - - +#ifndef __CUDABLAS__ /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test1) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto left = NDArrayFactory::create('c', {2,2}); auto right = NDArrayFactory::create('c', {2,2}); @@ -968,9 +559,6 @@ TEST_F(HelpersTests1, JacobiSVD_test1) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test2) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto matrix4 = NDArrayFactory::create('c', {5,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19}); auto matrix5 = NDArrayFactory::create('c', {5,5}, {3 ,-8 ,5 ,7 ,-8 ,4 ,-19 ,-12 ,-4 ,-5 ,-11 ,19 ,-2 ,-7 ,1 ,16 ,-5 ,10 ,19 ,-19 ,0 ,-20 ,0 ,-8 ,-13}); @@ -998,9 +586,6 @@ TEST_F(HelpersTests1, JacobiSVD_test2) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test3) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto rotation = NDArrayFactory::create('c', {2,2}, {0.2, math::nd4j_sqrt(0.6), -math::nd4j_sqrt(0.6), 0.2}); @@ -1014,9 +599,6 @@ TEST_F(HelpersTests1, JacobiSVD_test3) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test4) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto rotation = NDArrayFactory::create('c', {2,2}, {0.2, math::nd4j_sqrt(0.6), -math::nd4j_sqrt(0.6), 0.2}); @@ -1030,9 +612,6 @@ TEST_F(HelpersTests1, JacobiSVD_test4) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test5) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto rotation = NDArrayFactory::create('c', {2,2}, {0.2, math::nd4j_sqrt(0.6), -math::nd4j_sqrt(0.6), 0.2}); @@ -1046,9 +625,6 @@ TEST_F(HelpersTests1, JacobiSVD_test5) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test6) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto rotation = NDArrayFactory::create('c', {2,2}, {0.2, math::nd4j_sqrt(0.6), -math::nd4j_sqrt(0.6), 0.2}); @@ -1062,9 +638,6 @@ TEST_F(HelpersTests1, JacobiSVD_test6) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test7) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto rotation = NDArrayFactory::create('c', {2,2}, {0.2, math::nd4j_sqrt(0.6), -math::nd4j_sqrt(0.6), 0.2}); @@ -1078,9 +651,6 @@ TEST_F(HelpersTests1, JacobiSVD_test7) { ////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test8) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto rotation = NDArrayFactory::create('c', {2,2}, {0.2, math::nd4j_sqrt(0.6), -math::nd4j_sqrt(0.6), 0.2}); @@ -1094,9 +664,6 @@ TEST_F(HelpersTests1, JacobiSVD_test8) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test9) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto expS = NDArrayFactory::create('c', {5,1}, {35.7975, 29.1924, 11.1935, 9.2846, 6.77071}); @@ -1113,9 +680,6 @@ TEST_F(HelpersTests1, JacobiSVD_test9) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test10) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); auto expS = NDArrayFactory::create('c', {5,1}, {35.7975, 29.1924, 11.1935, 9.2846, 6.77071}); @@ -1132,9 +696,6 @@ TEST_F(HelpersTests1, JacobiSVD_test10) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test11) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {6,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0, 3, -11, 2, 12, 10}); auto expS = NDArrayFactory::create('c', {5,1}, {36.27, 32.1997, 15.9624, 10.6407, 6.9747}); @@ -1151,9 +712,6 @@ TEST_F(HelpersTests1, JacobiSVD_test11) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test12) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {6,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0, 3, -11, 2, 12, 10}); auto expS = NDArrayFactory::create('c', {5,1}, {36.27, 32.1997, 15.9624, 10.6407, 6.9747}); @@ -1170,9 +728,6 @@ TEST_F(HelpersTests1, JacobiSVD_test12) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test13) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,6}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0, 3, -11, 2, 12, 10}); auto expS = NDArrayFactory::create('c', {5,1}, {40.499, 23.5079, 17.8139, 14.4484, 7.07957}); @@ -1189,9 +744,6 @@ TEST_F(HelpersTests1, JacobiSVD_test13) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test14) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,6}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0, 3, -11, 2, 12, 10}); auto expS = NDArrayFactory::create('c', {5,1}, {40.499, 23.5079, 17.8139, 14.4484, 7.07957}); @@ -1208,9 +760,6 @@ TEST_F(HelpersTests1, JacobiSVD_test14) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, JacobiSVD_test15) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix = NDArrayFactory::create('c', {5,6}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0, 3, -11, 2, 12, 10}); auto expS = NDArrayFactory::create('c', {5,1}, {40.499, 23.5079, 17.8139, 14.4484, 7.07957}); @@ -1222,13 +771,314 @@ TEST_F(HelpersTests1, JacobiSVD_test15) { ASSERT_TRUE(expS.equalsTo(&jac._s)); } +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, JacobiSVD_test16) { + + NDArray rotation('c', {2,2}, sd::DataType::DOUBLE); + + NDArray exp1('c', {2,2}, {1,0,0,1 }, sd::DataType::DOUBLE); + NDArray exp2('c', {2,2}, {0,1,-1,0}, sd::DataType::DOUBLE); + NDArray exp3('c', {2,2}, {-1,0,0,-1}, sd::DataType::DOUBLE); + NDArray exp4('c', {2,2}, {0.983282, 0.182089, -0.182089, 0.983282}, sd::DataType::DOUBLE); + NDArray exp5('c', {2,2}, {0.249041, 0.968493, -0.968493, 0.249041}, sd::DataType::DOUBLE); + + ops::helpers::JacobiSVD::createJacobiRotationGivens(0, 0, rotation); + ASSERT_TRUE(rotation.equalsTo(exp1)); + ASSERT_TRUE(rotation.isSameShapeStrict(exp1)); + + ops::helpers::JacobiSVD::createJacobiRotationGivens(0, -0.5, rotation); + ASSERT_TRUE(rotation.equalsTo(exp2)); + ASSERT_TRUE(rotation.isSameShapeStrict(exp2)); + + ops::helpers::JacobiSVD::createJacobiRotationGivens(-0.5, 0, rotation); + ASSERT_TRUE(rotation.equalsTo(exp3)); + ASSERT_TRUE(rotation.isSameShapeStrict(exp3)); + + + ops::helpers::JacobiSVD::createJacobiRotationGivens(2.7, -0.5, rotation); + ASSERT_TRUE(rotation.equalsTo(exp4)); + ASSERT_TRUE(rotation.isSameShapeStrict(exp4)); + + ops::helpers::JacobiSVD::createJacobiRotationGivens(2.7, -10.5, rotation); + ASSERT_TRUE(rotation.equalsTo(exp5)); + ASSERT_TRUE(rotation.isSameShapeStrict(exp5)); +} + +TEST_F(HelpersTests1, test_binary_search_1) { + std::array array = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + auto idx = sd::ops::helpers::binarySearch(array.data(), 2, 10); + ASSERT_EQ(2, idx); +} + +TEST_F(HelpersTests1, test_binary_search_2) { + std::array array = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + auto idx = sd::ops::helpers::binarySearch(array.data(), 18, 10); + ASSERT_EQ(-1, idx); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test1) { + + auto matrix = NDArrayFactory::create('c', {5,5}, {-17 ,14 ,9 ,-12 ,-12 ,5 ,-4 ,-19 ,-7 ,-12 ,15 ,16 ,17 ,-6 ,8 ,-10 ,14 ,-15 ,6 ,-10 ,-14 ,12 ,-1 ,-16 ,3}); + auto matrix2 = NDArrayFactory::create('c', {5,5}, {18 ,3 ,2 ,7 ,-11 ,7 ,7 ,10 ,-13 ,-8 ,13 ,20 ,-4 ,-16 ,-9 ,-17 ,-5 ,-7 ,-19 ,-8 ,-9 ,9 ,6 ,14 ,-11}); + auto expM = NDArrayFactory::create('c', {5,5}, {-17,14,9,-12,-12, 5,-4, -19, -7,-12, 15,16,17.0294, -6, 8, -10,14, -15, 6,-10, -14,12, 0,-16, 0}); + auto expU = NDArrayFactory::create('c', {5,5}, {18,3, 2,7,-11, 7, 7.75131,10,-12.5665, -8, 13, 20.905,-4,-14.7979, -9, -17,-3.87565,-7,-19.2608, -8, -9, 9, 6, 14,-11}); + + ops::helpers::SVD svd(matrix, 4, true, true, true, 't'); + svd._m = matrix; + svd._u = matrix2; + svd.deflation1(1,1,2,2); + + ASSERT_TRUE(expM.equalsTo(&svd._m)); + ASSERT_TRUE(expU.equalsTo(&svd._u)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test2) { + + auto matrix= NDArrayFactory::create('c', {5,5}, {-17 ,14 ,9 ,-12 ,-12 ,5 ,-4 ,-19 ,-7 ,-12 ,15 ,16 ,17 ,-6 ,8 ,-10 ,14 ,-15 ,6 ,-10 ,-14 ,12 ,-1 ,-16 ,3}); + auto matrix2 = NDArrayFactory::create('c', {5,5}, {18 ,3 ,2 ,7 ,-11 ,7 ,7 ,10 ,-13 ,-8 ,13 ,20 ,-4 ,-16 ,-9 ,-17 ,-5 ,-7 ,-19 ,-8 ,-9 ,9 ,6 ,14 ,-11}); + auto expM = NDArrayFactory::create('c', {5,5}, {22.6716,14, 9,-12,-12, 5,-4,-19, -7,-12, 0,16, 0, -6, 8, -10,14,-15, 6,-10, -14,12, -1,-16, 3}); + auto expU = NDArrayFactory::create('c', {5,5}, {-12.1738, 3, -13.4089, 7,-11, 1.36735, 7, -12.1297,-13, -8, -12.3944,20, -5.60173,-16, -9, -17,-5,-7,-19, -8, -9, 9, 6, 14,-11}); + + ops::helpers::SVD svd(matrix, 4, true, true, true); + svd._m = matrix; + svd._u = matrix2; + svd.deflation1(0,0,2,2); + + ASSERT_TRUE(expM.equalsTo(&svd._m)); + ASSERT_TRUE(expU.equalsTo(&svd._u)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test3) { + + auto matrix= NDArrayFactory::create('c', {5,5}, {-17 ,14 ,9 ,-12 ,-12 ,5 ,-4 ,-19 ,-7 ,-12 ,15 ,16 ,17 ,-6 ,8 ,-10 ,14 ,-15 ,6 ,-10 ,-14 ,12 ,-1 ,-16 ,3}); + auto matrix2 = NDArrayFactory::create('c', {2,6}, {18 ,3 ,2 ,7 ,-11 ,7 ,7 ,10 ,-13 ,-8 ,13 ,20}); + auto expM = NDArrayFactory::create('c', {5,5}, {-17,14,9,-12,-12, 5,-4, -19, -7,-12, 15,16,17.0294, -6, 8, -10,14, -15, 6,-10, -14,12, 0,-16, 0}); + auto expU = NDArrayFactory::create('c', {2,6}, {18, 2.58377, 2, 7.16409,-11, 7, 7 ,10.4525 ,-13, -7.39897 ,13 ,20}); + + ops::helpers::SVD svd(matrix, 4, false, true, true, 't'); + svd._m = matrix; + svd._u = matrix2; + svd.deflation1(1,1,2,2); + + ASSERT_TRUE(expM.equalsTo(&svd._m)); + ASSERT_TRUE(expU.equalsTo(&svd._u)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test4) { + + auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); + auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); + auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); + auto expM = NDArrayFactory::create('c', {6,5}, {12, 20, 19,-18, -6, 3, 6, 2, -7, -7, 14, 8, 18,-17, 18, -14,-15,8.06226, 2, 2, -3,-18, 0,-17, 2, 12, 18, 6, -2,-17}); + auto expU = NDArrayFactory::create('c', {6,6}, {-10,-16, -20, 13, 20,-10, -9, -1,-20.7138,4.46525, -4, 20, -11, 19,-18.4812,2.72876, 12,-19, 18,-18, 17, -10,-19, 14, -2, -7, -17, -14, -4,-16, 18, -6, -18, 1,-15,-12}); + auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19, -7, 1, 2,-18, -13, 14, 2, -2,-11,2.97683,-7.69015,-6, -3, -8, 8, -2, 7, 16, 15, -3, 7, 0}); + + ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); + svd._m = matrix1; + svd._u = matrix2; + svd._v = matrix3; + svd.deflation2(1, 2, 2, 1, 1, 2, 1); + + ASSERT_TRUE(expM.equalsTo(&svd._m)); + ASSERT_TRUE(expU.equalsTo(&svd._u)); + ASSERT_TRUE(expV.equalsTo(&svd._v)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test5) { + + auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); + auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); + auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); + auto expM = NDArrayFactory::create('c', {6,5}, {18.4391, 20, 19,-18, -6, 3, 6, 2, -7, -7, 0, 8,18.4391,-17, 18, -14,-15, 1, 2, 2, -3,-18, 8,-17,-19, 12, 18, 6, -2,-17}); + auto expU = NDArrayFactory::create('c', {6,6}, {-10,-16,-20,13, 20,-10, -9,-15.8359, -7,-12.2566, -4, 20, -11,-1.30158, -5,-26.1401, 12,-19, 18,-19.3068, 17, 7.15871,-19, 14, -2, -7,-17, -14, -4,-16, 18, -6,-18, 1,-15,-12}); + auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19, -7, 1, 2,-1.08465,-13,22.7777, 2, -2,-5.64019, 8,9.65341,-6, -3, -8, 8, -2, 7, 16, 15, -3, 7, 0}); + + ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); + svd._m = matrix1; + svd._u = matrix2; + svd._v = matrix3; + svd.deflation2(1, 0, 1, 1, 0, 2, 2); + + ASSERT_TRUE(expM.equalsTo(&svd._m)); + ASSERT_TRUE(expU.equalsTo(&svd._u)); + ASSERT_TRUE(expV.equalsTo(&svd._v)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test6) { + + auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); + auto matrix2 = NDArrayFactory::create('c', {2,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20}); + auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); + auto expM = NDArrayFactory::create('c', {6,5}, {18.4391, 20, 19,-18, -6, 3, 6, 2, -7, -7, 0, 8,18.4391,-17, 18, -14,-15, 1, 2, 2, -3,-18, 8,-17,-19, 12, 18, 6, -2,-17}); + auto expU = NDArrayFactory::create('c', {2,6}, {-10, -0.542326,-20, 20.6084,20,-10, -9, -15.8359, -7,-12.2566,-4, 20}); + auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19, -7, 1, 2,-1.08465,-13,22.7777, 2, -2,-5.64019, 8,9.65341,-6, -3, -8, 8, -2, 7, 16, 15, -3, 7, 0}); + + ops::helpers::SVD svd(matrix3, 4, false, true, true, 't'); + svd._m = matrix1; + svd._u = matrix2; + svd._v = matrix3; + svd.deflation2(1, 0, 1, 1, 0, 2, 2); + + ASSERT_TRUE(expM.equalsTo(&svd._m)); + ASSERT_TRUE(expU.equalsTo(&svd._u)); + ASSERT_TRUE(expV.equalsTo(&svd._v)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test7) { + + auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); + auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); + auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); + + auto expM = NDArrayFactory::create('c', {6,5}, {12, 20, 19,-18, -6, 3, 6, 2, -7, -7, 14, 8,19.6977,-17, 18, -14,-15, 1, 2, 2, -3,-18, 0,-17, 0, 12, 18, 6, -2,-17}); + auto expU = NDArrayFactory::create('c', {6,6}, {-10, -16,-20, 13, 20,-10, -9,-9.03658, -7,-17.8701, -4, 20, -11, 10.0519, -5,-24.1652, 12,-19, 18, -20.51, 17,-1.82762,-19, 14, -2,-12.0826,-17,-9.95039, -4,-16, 18, -6,-18, 1,-15,-12}); + auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19,-7, 1, 2,-18,-13,14, 2, -2,-11, 8, 2,-6, -3, -8, 8,-2, 7, 16, 15, -3, 7, 0}); + + ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); + svd._m = matrix1; + svd._u = matrix2; + svd._v = matrix3; + svd.deflation(1, 3, 1, 1, 2, 1); + + ASSERT_TRUE(expM.equalsTo(&svd._m)); + ASSERT_TRUE(expU.equalsTo(&svd._u)); + ASSERT_TRUE(expV.equalsTo(&svd._v)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test8) { + + auto matrix1 = NDArrayFactory::create('c', {6,5}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,-7 ,14 ,8 ,18 ,-17 ,18 ,-14 ,-15 ,1 ,2 ,2 ,-3 ,-18 ,8 ,-17 ,-19 ,12 ,18 ,6 ,-2 ,-17}); + auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); + auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); + + auto expM = NDArrayFactory::create('c', {6,5}, {12, 20,19,-18, -6, 3, 6, 2, -7, -7, 14,-15, 2,-17, 18, -14, 8, 1, 18, 2, -3,-18, 8,-17,-19, 12, 18, 6, -2,-17}); + auto expU = NDArrayFactory::create('c', {6,6}, {-10,-20,-16, 13, 20,-10, -9, -7, -1,-20, -4, 20, -11, -5, 19,-18, 12,-19, 18, 17,-18,-10,-19, 14, -2, -7,-17,-14, -4,-16, 18, -6,-18, 1,-15,-12}); + auto expV = NDArrayFactory::create('c', {5,5}, {-18, 1, 19,-7, 1, 2,-18,-13, 2,14, -2,-11, 8,-6, 2, -3, -8, 8, 7,-2, 16, 15, -3, 7, 0}); + + ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); + svd._m = matrix1; + svd._u = matrix2; + svd._v = matrix3; + svd.deflation(0, 2, 2, 1, 2, 1); + + ASSERT_TRUE(expM.equalsTo(&svd._m)); + ASSERT_TRUE(expU.equalsTo(&svd._u)); + ASSERT_TRUE(expV.equalsTo(&svd._v)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test9) { + + auto col0 = NDArrayFactory::create('c', {10,1}, {12 ,20 ,19 ,-18 ,-6 ,3 ,6 ,2 ,-7 ,14}); + auto diag = NDArrayFactory::create('c', {10,1}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2}); + auto permut = NDArrayFactory::create('c', {1,10}, {8 ,1 ,4 ,0, 5 ,2 ,9 ,3 ,7 ,6}); + auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); + + auto expSingVals = NDArrayFactory::create('c', {10,1}, {-2, 15.304323, 11.2, -1, 1.73489, -12, -15.3043, -12.862, 5.6, 41.4039}); + auto expShifts = NDArrayFactory::create('c', {10,1}, {1, 19, 19, 1, 2, -18, -18, -13, 2, 2}); + auto expMus = NDArrayFactory::create('c', {10,1}, {-3, -3.695677, -7.8, -2, -0.265108, 6, 2.69568, 0.138048, 3.6, 39.4039}); + + auto singVals = NDArrayFactory::create('c', {10,1}); + auto shifts = NDArrayFactory::create('c', {10,1}); + auto mus = NDArrayFactory::create('c', {10,1}); + + ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); + svd.calcSingVals(col0, diag, permut, singVals, shifts, mus); + + ASSERT_TRUE(expSingVals.equalsTo(&singVals)); + ASSERT_TRUE(expShifts.equalsTo(&shifts)); + ASSERT_TRUE(expMus.equalsTo(&mus)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test10) { + + auto singVals = NDArrayFactory::create('c', {4,1}, {1 ,1 ,1 ,1}); + auto col0 = NDArrayFactory::create('c', {4,1}, {1 ,1 ,1 ,1}); + auto diag = NDArrayFactory::create('c', {4,1}, {5 ,7 ,-13 ,14}); + auto permut = NDArrayFactory::create('c', {1,4}, {0 ,2 ,3 ,1 }); + auto mus = NDArrayFactory::create('c', {4,1}, {4,1,4,6}); + auto shifts = NDArrayFactory::create('c', {4,1}, {4,2,5,6}); + auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); + + auto expZhat = NDArrayFactory::create('c', {4,1}, {0, 0.278208, 72.501953, 0}); + + auto zhat = NDArrayFactory::create('c', {4,1}); + + ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); + svd.perturb(col0, diag, permut, singVals, shifts, mus, zhat); + + ASSERT_NEAR(expZhat.e(1), zhat.e(1), EPS); + ASSERT_NEAR(expZhat.e(2), zhat.e(2), EPS); +} + + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test11) { + + auto singVals = NDArrayFactory::create('c', {4,1}, {1 ,1 ,1 ,1}); + auto zhat = NDArrayFactory::create('c', {4,1}, {2 ,1 ,2 ,1}); + auto diag = NDArrayFactory::create('c', {4,1}, {5 ,7 ,-13 ,14}); + auto permut = NDArrayFactory::create('c', {1,4}, {0 ,2 ,3 ,1 }); + auto mus = NDArrayFactory::create('c', {4,1}, {4,1,4,6}); + auto shifts = NDArrayFactory::create('c', {4,1}, {4,2,5,6}); + auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); + + auto expU = NDArrayFactory::create('c', {5,5}, {-0.662161, 0.980399,-0.791469,-0.748434, 0, -0.744931, 0.183825,-0.593602,-0.392928, 0, 0.0472972, 0.061275,0.0719517, 0.104781, 0, 0.0662161,0.0356509, 0.126635, 0.523904, 0, 0, 0, 0, 0, 1}); + auto expV = NDArrayFactory::create('c', {4,4}, {-0.745259,-0.965209, -0.899497, -0.892319, -0.652102, 0.21114, -0.39353, -0.156156, -0.0768918,-0.130705,-0.0885868,-0.0773343, 0.115929,0.0818966, 0.167906, 0.416415}); + auto U = NDArrayFactory::create('c', {5,5}); + auto V = NDArrayFactory::create('c', {4,4}); + + + ops::helpers::SVD svd(matrix3, 4, true, true, true, 't'); + svd.calcSingVecs(zhat, diag,permut, singVals, shifts, mus, U, V); + + ASSERT_TRUE(expU.equalsTo(&U)); + ASSERT_TRUE(expV.equalsTo(&V)); + +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests1, SVD_test12) { + + auto matrix1 = NDArrayFactory::create('c', {6,5}, {-2 ,-3 ,2 ,1 ,0 ,0 ,-4 ,5 ,-2 ,-3 ,-4 ,0 ,5 ,-1 ,-5 ,-3 ,-5 ,3 ,3 ,3 ,-5 ,5 ,-5 ,0 ,2 ,-2 ,-3 ,-4 ,-5 ,-3}); + auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); + auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); + auto matrix4 = NDArrayFactory::create('c', {5,5}, {3 ,-8 ,5 ,7 ,-8 ,4 ,-19 ,-12 ,-4 ,-5 ,-11 ,19 ,-2 ,-7 ,1 ,16 ,-5 ,10 ,19 ,-19 ,0 ,-20 ,0 ,-8 ,-13}); + + auto expSingVals = NDArrayFactory::create('c', {4,1}, {8.43282, 5, 2.3, 1.10167}); + auto expU = NDArrayFactory::create('c', {5,5}, {0.401972,0, 0.206791, 0.891995,0, 0,1, 0, 0,0, 0.816018,0,-0.522818,-0.246529,0, -0.415371,0,-0.826982, 0.378904,0, 0,0, 0, 0,1}); + auto expV = NDArrayFactory::create('c', {4,4}, {-0.951851,0,-0.133555,-0.275939, 0,1, 0, 0, 0.290301,0,-0.681937,-0.671333, -0.098513,0,-0.719114, 0.687873}); + + ops::helpers::SVD svd(matrix4, 4, true, true, true, 't'); + svd._m = matrix1; + svd._u = matrix2; + svd._v = matrix3; + NDArray U, singVals, V; + svd.calcBlockSVD(1, 4, U, singVals, V); + + ASSERT_TRUE(expSingVals.equalsTo(&singVals)); + ASSERT_TRUE(expU.equalsTo(&U)); + ASSERT_TRUE(expV.equalsTo(&V)); + + ASSERT_TRUE(expSingVals.isSameShapeStrict(singVals)); + ASSERT_TRUE(expU.isSameShapeStrict(U)); + ASSERT_TRUE(expV.isSameShapeStrict(V)); +} /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, SVD_test16) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix1 = NDArrayFactory::create('c', {6,5}, {-2 ,-3 ,2 ,1 ,0 ,0 ,-4 ,5 ,-2 ,-3 ,-4 ,0 ,5 ,-1 ,-5 ,-3 ,-5 ,3 ,3 ,3 ,-5 ,5 ,-5 ,0 ,2 ,-2 ,-3 ,-4 ,-5 ,-3}); auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); @@ -1257,9 +1107,6 @@ TEST_F(HelpersTests1, SVD_test16) { /////////////////////////////////////////////////////////////////// TEST_F(HelpersTests1, SVD_test17) { - #ifdef __CUDABLAS__ - return; - #endif auto matrix1 = NDArrayFactory::create('c', {6,5}, {-2 ,-3 ,2 ,1 ,0 ,0 ,-4 ,5 ,-2 ,-3 ,-4 ,0 ,5 ,-1 ,-5 ,-3 ,-5 ,3 ,3 ,3 ,-5 ,5 ,-5 ,0 ,2 ,-2 ,-3 ,-4 ,-5 ,-3}); auto matrix2 = NDArrayFactory::create('c', {6,6}, {-10 ,-16 ,-20 ,13 ,20 ,-10 ,-9 ,-1 ,-7 ,-20 ,-4 ,20 ,-11 ,19 ,-5 ,-18 ,12 ,-19 ,18 ,-18 ,17 ,-10 ,-19 ,14 ,-2 ,-7 ,-17 ,-14 ,-4 ,-16 ,18 ,-6 ,-18 ,1 ,-15 ,-12}); auto matrix3 = NDArrayFactory::create('c', {5,5}, {-18 ,1 ,19 ,-7 ,1 ,2 ,-18 ,-13 ,14 ,2 ,-2 ,-11 ,8 ,2 ,-6 ,-3 ,-8 ,8 ,-2 ,7 ,16 ,15 ,-3 ,7 ,0}); @@ -1893,7 +1740,7 @@ TEST_F(HelpersTests1, OpArgsHolder_test3) { ASSERT_EQ(Status::OK(), results.status()); ASSERT_TRUE(exp.isSameShape(tiled)); ASSERT_TRUE(exp.equalsTo(tiled)); - + OpArgsHolder holderBP = holderFF.createArgsHolderForBP({&gradO}, true); sd::ops::tile_bp opBP; results = opBP.execute(holderBP); @@ -2495,4 +2342,3 @@ TEST_F(HelpersTests1, lstmLayerCell_3) { ASSERT_TRUE(expC.isSameShape(c)); ASSERT_TRUE(expC.equalsTo(c)); } - diff --git a/libnd4j/tests_cpu/layers_tests/HelpersTests2.cpp b/libnd4j/tests_cpu/layers_tests/HelpersTests2.cpp new file mode 100644 index 000000000..8a0cc28bf --- /dev/null +++ b/libnd4j/tests_cpu/layers_tests/HelpersTests2.cpp @@ -0,0 +1,426 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2019 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +#include "testlayers.h" +#include +#include +#include +#include +#include + +using namespace sd; + +class HelpersTests2 : public testing::Test { +public: + + HelpersTests2() { + + std::cout< hess1(x1); + ASSERT_TRUE(hess1._H.isSameShape(&x1)); + ASSERT_TRUE(hess1._H.equalsTo(&x1)); + ASSERT_TRUE(hess1._Q.isSameShape(&expQ)); + ASSERT_TRUE(hess1._Q.equalsTo(&expQ)); + + ops::helpers::Hessenberg hess2(x2); + ASSERT_TRUE(hess2._H.isSameShape(&x2)); + ASSERT_TRUE(hess2._H.equalsTo(&x2)); + ASSERT_TRUE(hess2._Q.isSameShape(&expQ)); + ASSERT_TRUE(hess2._Q.equalsTo(&expQ)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Hessenberg_2) { + + NDArray x('c', {2,2}, {1.5,-2,17,5}, sd::DataType::DOUBLE); + NDArray expQ('c', {2,2}, {1,0,0,1}, sd::DataType::DOUBLE); + + ops::helpers::Hessenberg hess(x); + + // hess._H.printBuffer(); + + ASSERT_TRUE(hess._H.isSameShape(&x)); + ASSERT_TRUE(hess._H.equalsTo(&x)); + + ASSERT_TRUE(hess._Q.isSameShape(&expQ)); + ASSERT_TRUE(hess._Q.equalsTo(&expQ)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Hessenberg_3) { + + NDArray x('c', {3,3}, {33,24,-48,57,12.5,-3,1.1,10,-5.2}, sd::DataType::DOUBLE); + NDArray expH('c', {3,3}, {33, -23.06939, -48.45414, -57.01061, 12.62845, 3.344058, 0, -9.655942, -5.328448}, sd::DataType::DOUBLE); + NDArray expQ('c', {3,3}, {1,0,0,0, -0.99981, -0.019295, 0, -0.019295,0.99981}, sd::DataType::DOUBLE); + + ops::helpers::Hessenberg hess(x); + + ASSERT_TRUE(hess._H.isSameShape(&expH)); + ASSERT_TRUE(hess._H.equalsTo(&expH)); + + ASSERT_TRUE(hess._Q.isSameShape(&expQ)); + ASSERT_TRUE(hess._Q.equalsTo(&expQ)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Hessenberg_4) { + + NDArray x('c', {4,4}, {0.33 ,-7.25 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,7.59 ,3.44 ,2.24 ,-6.82 ,-1.15 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE); + NDArray expH('c', {4,4}, {0.33, 0.4961181, 3.51599, 9.017665, -7.792702, 4.190221, 6.500328, 5.438888, 0, 3.646734, 0.4641911, -7.635502, 0,0, 5.873535, 5.105588}, sd::DataType::DOUBLE); + NDArray expQ('c', {4,4}, {1,0,0,0, 0,-0.171956, 0.336675, -0.925787, 0,-0.973988,0.0826795, 0.210976, 0, 0.147574, 0.937984,0.3137}, sd::DataType::DOUBLE); + + ops::helpers::Hessenberg hess(x); + + ASSERT_TRUE(hess._H.isSameShape(&expH)); + ASSERT_TRUE(hess._H.equalsTo(&expH)); + + ASSERT_TRUE(hess._Q.isSameShape(&expQ)); + ASSERT_TRUE(hess._Q.equalsTo(&expQ)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Hessenberg_5) { + + NDArray x('c', {10,10}, {6.9 ,4.8 ,9.5 ,3.1 ,6.5 ,5.8 ,-0.9 ,-7.3 ,-8.1 ,3.0 ,0.1 ,9.9 ,-3.2 ,6.4 ,6.2 ,-7.0 ,5.5 ,-2.2 ,-4.0 ,3.7 ,-3.6 ,9.0 ,-1.4 ,-2.4 ,1.7 , + -6.1 ,-4.2 ,-2.5 ,-5.6 ,-0.4 ,0.4 ,9.1 ,-2.1 ,-5.4 ,7.3 ,3.6 ,-1.7 ,-5.7 ,-8.0 ,8.8 ,-3.0 ,-0.5 ,1.1 ,10.0 ,8.0 ,0.8 ,1.0 ,7.5 ,3.5 ,-1.8 , + 0.3 ,-0.6 ,-6.3 ,-4.5 ,-1.1 ,1.8 ,0.6 ,9.6 ,9.2 ,9.7 ,-2.6 ,4.3 ,-3.4 ,0.0 ,-6.7 ,5.0 ,10.5 ,1.5 ,-7.8 ,-4.1 ,-5.3 ,-5.0 ,2.0 ,-4.4 ,-8.4 , + 6.0 ,-9.4 ,-4.8 ,8.2 ,7.8 ,5.2 ,-9.5 ,-3.9 ,0.2 ,6.8 ,5.7 ,-8.5 ,-1.9 ,-0.3 ,7.4 ,-8.7 ,7.2 ,1.3 ,6.3 ,-3.7 ,3.9 ,3.3 ,-6.0 ,-9.1 ,5.9}, sd::DataType::DOUBLE); + NDArray expH('c', {10,10}, {6.9, 6.125208, -8.070945, 7.219828, -9.363308, 2.181236, 5.995414, 3.892612, 4.982657, -2.088574,-12.6412, 1.212547, -6.449684, 5.162879, 0.4341714, -5.278079, -2.624011, -2.03615, 11.39619, -3.034842, + 0, -12.71931, 10.1146, 6.494434, -1.062934, 5.668906, -4.672953, -9.319893, -2.023392, 6.090341,0,0, 7.800521, -1.46286, 1.484626, -10.58252, -3.492978, 2.42187, 5.470045, 1.877265, + 0,0,0, 14.78259,-0.3147726, -5.74874, -0.377823, 3.310056, 2.242614, -5.111574,0,0,0,0, -9.709131, 3.885072, 6.762626, 4.509144, 2.390195, -4.991013, + 0,0,0,0,0, 8.126269, -12.32529, 9.030151, 1.390931, 0.8634045,0,0,0,0,0,0, -12.99477, 9.574299,-0.3098022, 4.910835,0,0,0,0,0,0,0, 14.75256, 18.95723, -5.054717,0,0,0,0,0,0,0,0, -4.577715, -5.440827,}, sd::DataType::DOUBLE); + NDArray expQ('c', {10,10}, {1,0,0,0,0,0,0,0,0,0,0,-0.0079106,-0.38175,-0.39287,-0.26002,-0.44102,-0.071516,0.12118,0.64392,0.057562, + 0,0.28478,0.0058784,0.3837,-0.47888,0.39477,0.0036847,-0.24678,0.3229,0.47042,0,-0.031643,-0.61277,0.087648,0.12014,0.47648,-0.5288,0.060599,0.021434,-0.30102, + 0,0.23732,-0.17801,-0.31809,-0.31267,0.27595,0.30134,0.64555,-0.33392,0.13363,0,-0.023732,-0.40236,0.43089,-0.38692,-0.5178,-0.03957,-0.081667,-0.47515,-0.0077949, + 0,0.20568,-0.0169,0.36962,0.49669,-0.22475,-0.22199,0.50075,0.10454,0.46112,0,0.41926,0.30243,-0.3714,-0.16795,-0.12969,-0.67572,-0.1205,-0.26047,0.10407, + 0,-0.41135,-0.28357,-0.33858,0.18836,0.083822,-0.0068213,-0.30161,-0.24956,0.66327,0,0.68823,-0.33616,-0.12129,0.36163,-0.063256,0.34198,-0.37564,-0.048196,-0.058948}, sd::DataType::DOUBLE); + + ops::helpers::Hessenberg hess(x); + + ASSERT_TRUE(hess._H.isSameShape(&expH)); + ASSERT_TRUE(hess._H.equalsTo(&expH)); + + ASSERT_TRUE(hess._Q.isSameShape(&expQ)); + ASSERT_TRUE(hess._Q.equalsTo(&expQ)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Schur_1) { + + NDArray x('c', {3,3}, sd::DataType::DOUBLE); + + NDArray expT('c', {3,3}, {-2.5, -2, 1, 0, 1.5, -2, 3, 4, 5}, sd::DataType::DOUBLE); + NDArray expU('c', {3,3}, {0.3, 0.2,-0.1, 0,-0.1, 0.2, -0.3,-0.4, 0.5}, sd::DataType::DOUBLE); + + ops::helpers::Schur schur(x); + schur._T.linspace(-3, 1); + schur._U.linspace(-0.3, 0.1); + + schur.splitTwoRows(1, 0.5); + + ASSERT_TRUE(schur._T.isSameShape(&expT)); + ASSERT_TRUE(schur._T.equalsTo(&expT)); + + ASSERT_TRUE(schur._U.isSameShape(&expU)); + ASSERT_TRUE(schur._U.equalsTo(&expU)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Schur_2) { + + NDArray x('c', {3,3}, sd::DataType::DOUBLE); + + NDArray shift('c', {3}, sd::DataType::DOUBLE); + NDArray exp1('c', {3}, {1,-3,0}, sd::DataType::DOUBLE); + NDArray exp2('c', {3}, {3, 3,-7}, sd::DataType::DOUBLE); + NDArray exp3('c', {3}, {0.964,0.964,0.964}, sd::DataType::DOUBLE); + NDArray exp1T('c', {3,3}, {-3,-2,-1,0,1,2,3,4,5}, sd::DataType::DOUBLE); + NDArray exp2T('c', {3,3}, {-8,-2,-1,0,-4,2,3,4,0}, sd::DataType::DOUBLE); + NDArray exp3T('c', {3,3}, {-9.464102,-2,-1,0,-5.464102,2,3,4,-1.464102,}, sd::DataType::DOUBLE); + + ops::helpers::Schur schur(x); + // schur._U.linspace(-0.3, 0.1); // doesn't matter + + schur._T.linspace(-3, 1); + double expShift =0; + schur.calcShift(1, 5, expShift, shift); + ASSERT_TRUE(schur._T.equalsTo(&exp1T)); + ASSERT_TRUE(shift.isSameShape(&exp1)); + ASSERT_TRUE(shift.equalsTo(&exp1)); + ASSERT_TRUE(expShift == 0); + + schur._T.linspace(-3, 1); + expShift = 0; + schur.calcShift(2, 10, expShift, shift); + ASSERT_TRUE(schur._T.equalsTo(&exp2T)); + ASSERT_TRUE(shift.isSameShape(&exp2)); + ASSERT_TRUE(shift.equalsTo(&exp2)); + ASSERT_TRUE(expShift == 5); + + schur._T.linspace(-3, 1); + expShift = 0; + schur.calcShift(2, 30, expShift, shift); + ASSERT_TRUE(schur._T.equalsTo(&exp3T)); + ASSERT_TRUE(shift.isSameShape(&exp3)); + ASSERT_TRUE(shift.equalsTo(&exp3)); + ASSERT_TRUE((6.4641-0.00001) < expShift && expShift < (6.4641+0.00001)); +} + + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Schur_3) { + + NDArray x('c', {2,2}, {1.5,-2,17,5}, sd::DataType::DOUBLE); + NDArray expU('c', {2,2}, {1,0,0,1}, sd::DataType::DOUBLE); + + ops::helpers::Schur schur(x); + + ASSERT_TRUE(schur._T.isSameShape(&x)); + ASSERT_TRUE(schur._T.equalsTo(&x)); + + ASSERT_TRUE(schur._U.isSameShape(&expU)); + ASSERT_TRUE(schur._U.equalsTo(&expU)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Schur_4) { + + NDArray x('c', {3,3}, {33,24,-48,57,12.5,-3,1.1,10,-5.2}, sd::DataType::DOUBLE); + NDArray expT('c', {3,3}, {53.73337,-20.21406,-50.44809,0,-27.51557, 26.74307,0,0,14.0822}, sd::DataType::DOUBLE); + NDArray expU('c', {3,3}, {-0.5848506, 0.7185352, 0.3763734,-0.7978391,-0.5932709,-0.1071558,-0.1462962, 0.3629555,-0.9202504}, sd::DataType::DOUBLE); + + ops::helpers::Schur schur(x); + + ASSERT_TRUE(schur._T.isSameShape(&expT)); + ASSERT_TRUE(schur._T.equalsTo(&expT)); + + ASSERT_TRUE(schur._U.isSameShape(&expU)); + ASSERT_TRUE(schur._U.equalsTo(&expU)); +} + +/* +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Schur_5) { + + NDArray x('c', {4,4}, {0.33 ,-7.25 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,7.59 ,3.44 ,2.24 ,-6.82 ,-1.15 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE); + NDArray expT('c', {4,4}, {6.940177,7.201107,2.523849,-8.534745,-3.109643,5.289615,-2.940507,9.330303, 0,0,-0.1740346, 7.19851,0,0, -2.870214, -1.965758}, sd::DataType::DOUBLE); + NDArray expU('c', {4,4}, {-0.2602141, 0.8077556,-0.3352316,-0.4091935,0.3285353,-0.4395489,-0.4714875,-0.6903338,0.7536921, 0.3005626,-0.3910435, 0.4343908,-0.5062621, -0.252962,-0.7158242, 0.4090287}, sd::DataType::DOUBLE); + + ops::helpers::Schur schur(x); + + ASSERT_TRUE(schur._T.isSameShape(&expT)); + ASSERT_TRUE(schur._T.equalsTo(&expT)); + + ASSERT_TRUE(schur._U.isSameShape(&expU)); + ASSERT_TRUE(schur._U.equalsTo(&expU)); +} +*/ +/* +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, Schur_6) { + + NDArray x('c', {10,10}, {6.9 ,4.8 ,9.5 ,3.1 ,6.5 ,5.8 ,-0.9 ,-7.3 ,-8.1 ,3.0 ,0.1 ,9.9 ,-3.2 ,6.4 ,6.2 ,-7.0 ,5.5 ,-2.2 ,-4.0 ,3.7 ,-3.6 ,9.0 ,-1.4 ,-2.4 ,1.7 , + -6.1 ,-4.2 ,-2.5 ,-5.6 ,-0.4 ,0.4 ,9.1 ,-2.1 ,-5.4 ,7.3 ,3.6 ,-1.7 ,-5.7 ,-8.0 ,8.8 ,-3.0 ,-0.5 ,1.1 ,10.0 ,8.0 ,0.8 ,1.0 ,7.5 ,3.5 ,-1.8 , + 0.3 ,-0.6 ,-6.3 ,-4.5 ,-1.1 ,1.8 ,0.6 ,9.6 ,9.2 ,9.7 ,-2.6 ,4.3 ,-3.4 ,0.0 ,-6.7 ,5.0 ,10.5 ,1.5 ,-7.8 ,-4.1 ,-5.3 ,-5.0 ,2.0 ,-4.4 ,-8.4 , + 6.0 ,-9.4 ,-4.8 ,8.2 ,7.8 ,5.2 ,-9.5 ,-3.9 ,0.2 ,6.8 ,5.7 ,-8.5 ,-1.9 ,-0.3 ,7.4 ,-8.7 ,7.2 ,1.3 ,6.3 ,-3.7 ,3.9 ,3.3 ,-6.0 ,-9.1 ,5.9}, sd::DataType::DOUBLE); + NDArray expT('c', {10,10}, {-13.78982, 6.072464, 0.3021194, -8.455495,-0.3047058, 4.033153, 2.610364, 2.80607, -2.735616, 0.3040549,-2.188506, -12.38324, -1.167179, -4.539672, -19.08546, 1.752401,-0.1354974,-0.2747422,-0.3270464, -5.070936, + 0,0,0.5067366, 7.930223,-0.6465996, 8.659522, 1.283713, 4.551415, 12.7736, 3.4812,0,0,-9.858142, -2.905068, -6.474159, -6.247967, 0.4720073, -10.49523, 3.617189, -4.941627, + 0,0,0,0,9.461626, -4.896166, 9.339704, 4.640336, 16.8626, 2.056027,0,0,0,0,6.479812, 8.462862, 7.386285, -4.123457, -5.817095, -2.633641,0,0,0,0,0,0,13.46667, -4.907281, 4.602204, 5.198035, + 0,0,0,0,0,0, 7.176822, 16.93311, 2.195036, 1.346086,0,0,0,0,0,0,0,0, 16.86979, -3.052473,0,0,0,0,0,0,0,0,0, -5.52268}, sd::DataType::DOUBLE); + + // NDArray expT('c', {10,10}, {-13.78982, 6.072464, 0.1926198, -8.458698,-0.3047363, 4.033151, 2.610336, 2.806096, -2.735616, 0.3040549,-2.188506, -12.38324, -1.225857, -4.52418, -19.08548, 1.752257,-0.1354946,-0.2747435,-0.3270464, -5.070936, + // 0,0, 0.4812058, 7.886377,-0.7304318, 8.577898, 1.289673, 4.415163, 12.81936, 3.416929,0,0, -9.901988, -2.879537, -6.465196, -6.359608, 0.455452, -10.55328, 3.451505, -4.986284, + // 0,0,0,0, 9.461614, -4.896159, 9.339602, 4.64046, 16.86265, 2.056047,0,0,0,0, 6.47982, 8.462874, 7.386396, -4.123349, -5.816967, -2.633626, + // 0,0,0,0,0,0, 13.46665, -4.907315, 4.602182, 5.198022,0,0,0,0,0,0, 7.176788, 16.93313, 2.195081, 1.346137,0,0,0,0,0,0,0,0, 16.86979, -3.052473,0,0,0,0,0,0,0,0,0, -5.52268}, sd::DataType::DOUBLE); + + NDArray expU('c', {10,10}, {0.1964177, 0.2165192, -0.2138164, 0.4083154, -0.1872303, -0.5087223, 0.5529025, -0.2996174,-0.08772947, 0.07126534,-0.1906247, -0.223588, 0.3574755, 0.4245914, -0.3885589,-0.07328949, -0.4176507, -0.1885168, -0.4476957, 0.1971104, + -0.2219015, 0.3084187, 0.1069209, -0.4905009, -0.3517786, 0.1446875, 0.121738, -0.3772941, 0.1232591, 0.5353205,-0.4766346, 0.6158252, -0.1529085, 0.04780914, 0.1274182, -0.1219211, -0.3123289, -0.2219282,-0.07613826, -0.429201, + 0.2577533, -0.3356205, -0.225358, -0.1540796, 0.3155174, -0.1904664, -0.3567101, -0.6831458, 0.1244646, 0.03383783, -0.45597, -0.3350697, 0.06824276, -0.2861978,-0.06724917, -0.7046481, 0.01664764, 0.2270567, 0.2003283,-0.01544937, + 0.122865, 0.1516775, -0.4446453, -0.2338583, 0.1633447, -0.193498, -0.198088, 0.3170272, -0.5869794, 0.4013553, 0.347383, 0.3666581, 0.6890763,-0.05797414, 0.3630058, -0.319958, -0.1071812, 0.06162044, 0.03171228, 0.1275262, + -0.2986812, 0.05382598, -0.1484276, 0.4936468, 0.362756, 0.05858297, -0.1055183, 0.1090384, 0.4217073, 0.5534347, 0.3864388, 0.2085926, -0.204135, 0.05230855, -0.5290207, -0.1548485, -0.4670302, 0.2205726, 0.4380318,-0.01626632}, sd::DataType::DOUBLE); + + ops::helpers::Schur schur(x); + + ASSERT_TRUE(schur._T.isSameShape(&expT)); + ASSERT_TRUE(schur._T.equalsTo(&expT, 1e-3)); + + ASSERT_TRUE(schur._U.isSameShape(&expU)); + ASSERT_TRUE(schur._U.equalsTo(&expU)); +} +*/ + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, EigenValsAndVecs_1) { + + NDArray x('c', {2,2}, {1.5,-2,17,5}, sd::DataType::DOUBLE); + NDArray expVals('c', {2,2}, {3.25,5.562149, 3.25,-5.562149}, sd::DataType::DOUBLE); + NDArray expVecs('c', {2,2,2}, {-0.3094862,-0.0973726, -0.3094862,0.0973726,0,0.9459053, 0,-0.9459053}, sd::DataType::DOUBLE); + + ops::helpers::EigenValsAndVecs eig(x); + + ASSERT_TRUE(eig._Vals.isSameShape(&expVals)); + ASSERT_TRUE(eig._Vals.equalsTo(&expVals)); + + ASSERT_TRUE(eig._Vecs.isSameShape(&expVecs)); + ASSERT_TRUE(eig._Vecs.equalsTo(&expVecs)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, EigenValsAndVecs_2) { + + NDArray x('c', {3,3}, {33,24,-48,57,12.5,-3,1.1,10,-5.2}, sd::DataType::DOUBLE); + NDArray expVals('c', {3,2}, {53.73337,0, -27.51557,0, 14.0822,0}, sd::DataType::DOUBLE); + NDArray expVecs('c', {3,3,2}, {-0.5848506,0,0.5560778,0,-0.04889745,0,-0.7978391,0,-0.7683444,0,-0.8855156,0,-0.1462962,0,0.3168979,0,-0.4620293,0}, sd::DataType::DOUBLE); + + ops::helpers::EigenValsAndVecs eig(x); + + ASSERT_TRUE(eig._Vals.isSameShape(&expVals)); + ASSERT_TRUE(eig._Vals.equalsTo(&expVals)); + + ASSERT_TRUE(eig._Vecs.isSameShape(&expVecs)); + ASSERT_TRUE(eig._Vecs.equalsTo(&expVecs)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, EigenValsAndVecs_3) { + + NDArray x('c', {4,4}, {0.33 ,-7.25 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,7.59 ,3.44 ,2.24 ,-6.82 ,-1.15 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE); + NDArray expVals('c', {4,2}, {6.114896,4.659591,6.114896,-4.659591, -1.069896,4.45631,-1.069896,-4.45631}, sd::DataType::DOUBLE); + NDArray expVecs('c', {4,4,2}, {-0.2141303,0.4815241,-0.2141303,-0.4815241, 0.1035092,-0.4270603, 0.1035092,0.4270603, 0.2703519,-0.2892722, 0.2703519,0.2892722, -0.5256817,0.044061, -0.5256817,-0.044061, + 0.6202137,0.05521234,0.6202137,-0.05521234, -0.5756007,0.3932209,-0.5756007,-0.3932209,-0.4166034,-0.0651337, -0.4166034,0.0651337, -0.1723716,0.1138941,-0.1723716,-0.1138941}, sd::DataType::DOUBLE); + + ops::helpers::EigenValsAndVecs eig(x); + + ASSERT_TRUE(eig._Vals.isSameShape(&expVals)); + ASSERT_TRUE(eig._Vals.equalsTo(&expVals)); + + ASSERT_TRUE(eig._Vecs.isSameShape(&expVecs)); + ASSERT_TRUE(eig._Vecs.equalsTo(&expVecs)); +} + +/* +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, EigenValsAndVecs_4) { + + NDArray x('c', {10,10}, {6.9 ,4.8 ,9.5 ,3.1 ,6.5 ,5.8 ,-0.9 ,-7.3 ,-8.1 ,3.0 ,0.1 ,9.9 ,-3.2 ,6.4 ,6.2 ,-7.0 ,5.5 ,-2.2 ,-4.0 ,3.7 ,-3.6 ,9.0 ,-1.4 ,-2.4 ,1.7 , + -6.1 ,-4.2 ,-2.5 ,-5.6 ,-0.4 ,0.4 ,9.1 ,-2.1 ,-5.4 ,7.3 ,3.6 ,-1.7 ,-5.7 ,-8.0 ,8.8 ,-3.0 ,-0.5 ,1.1 ,10.0 ,8.0 ,0.8 ,1.0 ,7.5 ,3.5 ,-1.8 , + 0.3 ,-0.6 ,-6.3 ,-4.5 ,-1.1 ,1.8 ,0.6 ,9.6 ,9.2 ,9.7 ,-2.6 ,4.3 ,-3.4 ,0.0 ,-6.7 ,5.0 ,10.5 ,1.5 ,-7.8 ,-4.1 ,-5.3 ,-5.0 ,2.0 ,-4.4 ,-8.4 , + 6.0 ,-9.4 ,-4.8 ,8.2 ,7.8 ,5.2 ,-9.5 ,-3.9 ,0.2 ,6.8 ,5.7 ,-8.5 ,-1.9 ,-0.3 ,7.4 ,-8.7 ,7.2 ,1.3 ,6.3 ,-3.7 ,3.9 ,3.3 ,-6.0 ,-9.1 ,5.9}, sd::DataType::DOUBLE); + NDArray expVals('c', {10,2}, { -13.08653,3.577011,-13.08653,-3.577011, -1.199166,8.675665,-1.199166,-8.675665,8.962244, + 5.610424, 8.962244,-5.610424, 15.19989,5.675794, 15.19989,-5.675794,16.86979,0,-5.52268,0}, sd::DataType::DOUBLE); + NDArray expVecs('c', {10,10,2}, {0.1652385,0.1439317, 0.1652385,-0.1439317, -0.198272,0.207306, -0.198272,-0.207306, 0.1861466,-0.4599919, 0.1861466,0.4599919, 0.09384053,-0.4889922, 0.09384053,0.4889922, -0.6153314,0, -0.2180209,0, + -0.1603652,-0.1466119, -0.1603652,0.1466119, 0.2817409,0.3301842, 0.2817409,-0.3301842, 0.09747303,-0.2218182, 0.09747303,0.2218182, 0.2318273,-0.3355113, 0.2318273,0.3355113, -0.4828878,0, -0.1451126,0, + -0.1866771,0.1220412, -0.1866771,-0.1220412, 0.08937842,-0.3025104, 0.08937842,0.3025104, 0.2783766,0.2258364, 0.2783766,-0.2258364, -0.1413997,-0.09596012, -0.1413997,0.09596012, -0.2286925,0, 0.3290011,0, + -0.4009741,0.238131, -0.4009741,-0.238131, -0.02772353,0.1338458, -0.02772353,-0.1338458, 0.09030543,-0.2222453, 0.09030543,0.2222453, 0.2565825,-0.2275446, 0.2565825,0.2275446, -0.2855937,0, -0.3950544,0, + 0.2168379,-0.1301121, 0.2168379,0.1301121, -0.165433,-0.1220125, -0.165433,0.1220125, -0.2685605,0.008133055,-0.2685605,-0.008133055, 0.1929395,-0.1194659, 0.1929395,0.1194659, 0.2206467,0, 0.3289105,0, + -0.3835898,-0.2478813, -0.3835898,0.2478813, 0.1923005,-0.01036433, 0.1923005,0.01036433, -0.1711637,-0.3548358, -0.1711637,0.3548358, 0.2888441,0.09625169, 0.2888441,-0.09625169, 0.2595426,0, -0.1288072,0, + 0.1033616,0.09839151, 0.1033616,-0.09839151, -0.3080167,-0.1624564, -0.3080167,0.1624564,-0.03972293,-0.03967309, -0.03972293,0.03967309, 0.1965443,0.3025898, 0.1965443,-0.3025898, 0.04587166,0, 0.499261,0, + 0.2922398,0.2461792, 0.2922398,-0.2461792, 0.2769633,-0.2745029, 0.2769633,0.2745029, 0.1034687,-0.002947149, 0.1034687,0.002947149, -0.02611308,0.1658046, -0.02611308,-0.1658046, 0.2351063,0, -0.3787892,0, + -0.2512689,-0.02169855, -0.2512689,0.02169855, -0.01481625,0.4376404, -0.01481625,-0.4376404, -0.2298635,-0.2360671, -0.2298635,0.2360671, 0.11004,-0.1467444, 0.11004,0.1467444, 0.1501568,0, 0.340117,0, + 0.325096,0.1712822, 0.325096,-0.1712822, -0.2412035,-0.09236849, -0.2412035,0.09236849, 0.3894343,-0.08673087, 0.3894343,0.08673087, 0.3125305,0.07128152, 0.3125305,-0.07128152, -0.2415555,0, 0.1841298,0,}, sd::DataType::DOUBLE); + + ops::helpers::EigenValsAndVecs eig(x); + + ASSERT_TRUE(eig._Vals.isSameShape(&expVals)); + ASSERT_TRUE(eig._Vals.equalsTo(&expVals)); + + ASSERT_TRUE(eig._Vecs.isSameShape(&expVecs)); + ASSERT_TRUE(eig._Vecs.equalsTo(&expVecs)); +} +*/ + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, fullPivLU_1) { + + NDArray a('c', {4,4}, {0.33 ,-7.25 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,7.59 ,3.44 ,2.24 ,-6.82 ,-1.15 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE); + NDArray b('c', {4,1}, {-5.,10,9,1}, sd::DataType::DOUBLE); + + NDArray x = b.ulike(); + + NDArray expX('c', {4,1}, {0.8527251, -0.2545784, -1.076495, -0.8526268}, sd::DataType::DOUBLE); + + ops::helpers::FullPivLU::solve(a,b,x); + + ASSERT_TRUE(x.equalsTo(&expX)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, fullPivLU_2) { + + NDArray a('c', {4,4}, {0.33 ,-7.25 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,7.59 ,3.44 ,2.24 ,-6.82 ,-1.15 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE); + NDArray b('c', {4,2}, {-5.,10,9,1,1.5,-2,17,5}, sd::DataType::DOUBLE); + + NDArray x = b.ulike(); + + NDArray expX('c', {4,2}, {1.462913, 1.835338, 0.4083664, -2.163816, -3.344481, -3.739225, 0.5156383,0.01624954}, sd::DataType::DOUBLE); + + ops::helpers::FullPivLU::solve(a,b,x); + + ASSERT_TRUE(x.equalsTo(&expX)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, fullPivLU_3) { + + NDArray a1('c', {4,3}, {0.33 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,2.24 ,-6.82 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE); + NDArray a2('c', {3,4}, {0.33 ,1.71 ,6.20 ,1.34 ,5.38 ,-2.76 ,-8.51 ,2.24 ,-6.82 ,4.80 ,-4.67 ,2.14}, sd::DataType::DOUBLE); + NDArray b1('c', {4,2}, {-5.,10,9,1,1.5,-2,17,5}, sd::DataType::DOUBLE); + NDArray b2('c', {3,2}, {-5.,10,9,1,1.5,-2}, sd::DataType::DOUBLE); + + NDArray expX1('c', {3,2}, {0.9344955,-0.5841325, 0.8768102, 1.029137, -1.098021, 1.360152}, sd::DataType::DOUBLE); + NDArray expX2('c', {4,2}, {0.3536033,0.5270184,0,0,-0.8292221,0.967515,0.01827441,2.856337}, sd::DataType::DOUBLE); + + NDArray x1 = expX1.ulike(); + ops::helpers::FullPivLU::solve(a1,b1,x1); + ASSERT_TRUE(x1.equalsTo(&expX1)); + + NDArray x2 = expX2.ulike(); + ops::helpers::FullPivLU::solve(a2,b2,x2); + ASSERT_TRUE(x2.equalsTo(&expX2)); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(HelpersTests2, fullPivLU_4) { + + NDArray a('c', {10,10}, {6.9 ,4.8 ,9.5 ,3.1 ,6.5 ,5.8 ,-0.9 ,-7.3 ,-8.1 ,3.0 ,0.1 ,9.9 ,-3.2 ,6.4 ,6.2 ,-7.0 ,5.5 ,-2.2 ,-4.0 ,3.7 ,-3.6 ,9.0 ,-1.4 ,-2.4 ,1.7 , + -6.1 ,-4.2 ,-2.5 ,-5.6 ,-0.4 ,0.4 ,9.1 ,-2.1 ,-5.4 ,7.3 ,3.6 ,-1.7 ,-5.7 ,-8.0 ,8.8 ,-3.0 ,-0.5 ,1.1 ,10.0 ,8.0 ,0.8 ,1.0 ,7.5 ,3.5 ,-1.8 , + 0.3 ,-0.6 ,-6.3 ,-4.5 ,-1.1 ,1.8 ,0.6 ,9.6 ,9.2 ,9.7 ,-2.6 ,4.3 ,-3.4 ,0.0 ,-6.7 ,5.0 ,10.5 ,1.5 ,-7.8 ,-4.1 ,-5.3 ,-5.0 ,2.0 ,-4.4 ,-8.4 , + 6.0 ,-9.4 ,-4.8 ,8.2 ,7.8 ,5.2 ,-9.5 ,-3.9 ,0.2 ,6.8 ,5.7 ,-8.5 ,-1.9 ,-0.3 ,7.4 ,-8.7 ,7.2 ,1.3 ,6.3 ,-3.7 ,3.9 ,3.3 ,-6.0 ,-9.1 ,5.9}, sd::DataType::DOUBLE); + NDArray b('c', {10,2}, {-5.,10,9,1,1.5,-2,17,5,3.6,0.12, -3.1,2.27,-0.5,27.3,8.9,5,-7,8,-9,10}, sd::DataType::DOUBLE); + + NDArray x = b.ulike(); + + NDArray expX('c', {10,2}, {-0.697127, 2.58257, 2.109721,3.160622,-2.217796, -3.275736,-0.5752479, 2.475356,1.996841, -1.928947, + 2.213154,3.541014, 0.7104885, -1.981451,-3.297972,-0.4720612, 3.672657, 0.9161028, -2.322383, -1.784493}, sd::DataType::DOUBLE); + + ops::helpers::FullPivLU::solve(a,b,x); + + ASSERT_TRUE(x.equalsTo(&expX)); +} diff --git a/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp b/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp index 669574fa7..8150976e1 100644 --- a/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp @@ -90,6 +90,9 @@ TEST_F(NDArrayTest, NDArrayOrder1) { auto arrayF = new NDArray(arrayC->dup('f')); auto arrayC2 = new NDArray(arrayF->dup('c')); + arrayF->syncToHost(); + arrayC2->syncToHost(); + ASSERT_EQ('c', arrayC->ordering()); ASSERT_EQ('f', arrayF->ordering()); ASSERT_EQ('c', arrayC2->ordering()); diff --git a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp index 3421edf95..3d0df208f 100644 --- a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp @@ -251,7 +251,7 @@ TEST_F(NativeOpsTests, ExecPairwise_2) { auto exp = NDArrayFactory::create('c', {5, 5}); x.assign(true); y.assign(false); - y.t(5) = true; + y.r(5) = true; #ifdef __CUDABLAS__ printf("Unsupported for cuda now.\n"); #else diff --git a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp index f8086c9fe..f4c8bd2fa 100644 --- a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp @@ -1168,6 +1168,529 @@ TEST_F(PlaygroundTests, lstmLayerCellBp_1) { } +/////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests13, lstmLayer_bp_1) { + + const int sL = 3; + const int bS = 2; + const int nIn = 2; + const int nOut = 3; + + const int dataFormat = 0; // [sL,bS,nIn] + const int directionMode = 0; // forward + const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates + const int cellAct = 0; // tanh activation for cell state + const int outAct = 0; // tanh activation for output + + const bool hasBiases = true; // biases array is provided + const bool hasSeqLen = false; // seqLen array is not provided + const auto hasInitH = true; // initial output is provided + const auto hasInitC = true; // initial cell state is provided + const auto hasPH = true; // peephole connections are absent + const auto retFullSeq = true; // dLdh per each time step + const auto retLastH = true; // output at last time step + const auto retLastC = true; // cells state at last time step + + const double cellClip = 0.5; // clipping + + NDArray x('c', {sL, bS, nIn}, sd::DataType::DOUBLE); + NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); + NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); + NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); + NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); + NDArray dLdh('c', {sL, bS, nOut}, sd::DataType::DOUBLE); + NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); + + x.linspace(-2,0.1); + hI.linspace(-1.5,0.1); + cI.linspace(0.7,-0.1); + Wx.linspace(1,-0.1); + Wr.linspace(-1,0.1); + Wp.linspace(0.2,0.2); + b.linspace(1,-0.15); + + std::vector tArgs = {cellClip}; + std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; + std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; + + const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); + const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + + sd::ops::lstmLayer opFF; + sd::ops::lstmLayer_bp opBP; + + const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP); + + ASSERT_TRUE(isGradCorrect); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests13, lstmLayer_bp_2) { + + const int sL = 3; + const int bS = 2; + const int nIn = 2; + const int nOut = 3; + + const int dataFormat = 1; // [bS,sL,nIn] + const int directionMode = 0; // forward + const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates + const int cellAct = 0; // tanh activation for cell state + const int outAct = 0; // tanh activation for output + + const bool hasBiases = true; // biases array is provided + const bool hasSeqLen = false; // seqLen array is not provided + const auto hasInitH = true; // initial output is provided + const auto hasInitC = true; // initial cell state is provided + const auto hasPH = true; // peephole connections are absent + const auto retFullSeq = true; // return whole h {h_0, h_1, ... , h_sL-1}, [sL,bS,nOut] + const auto retLastH = false; // output at last time step + const auto retLastC = true; // cells state at last time step + + const double cellClip = 0.5; // clipping + + NDArray x('c', {bS, sL, nIn}, sd::DataType::DOUBLE); + NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); + NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); + NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); + NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); + NDArray dLdh('c', {bS, sL, nOut}, sd::DataType::DOUBLE); + NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); + + x.linspace(-2,0.1); + hI.linspace(-1.5,0.1); + cI.linspace(0.7,-0.1); + Wx.linspace(1,-0.1); + Wr.linspace(-1,0.1); + Wp.linspace(0.2,0.2); + b.linspace(1,-0.15); + + std::vector tArgs = {cellClip}; + std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; + std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; + + const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); + const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); + + sd::ops::lstmLayer opFF; + sd::ops::lstmLayer_bp opBP; + + const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, std::vector(), {0., 1.}, GradCheck::LossFunc::MEAN); + + ASSERT_TRUE(isGradCorrect); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests13, lstmLayer_bp_3) { + + const int sL = 4; + const int bS = 3; + const int nIn = 3; + const int nOut = 2; + + const int dataFormat = 2; // [bS, nIn, sL] + const int directionMode = 0; // forward + const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates + const int cellAct = 0; // tanh activation for cell state + const int outAct = 0; // tanh activation for output + + const bool hasBiases = true; // biases array is provided + const bool hasSeqLen = true; // seqLen array is not provided + const auto hasInitH = true; // initial output is provided + const auto hasInitC = true; // initial cell state is provided + const auto hasPH = true; // peephole connections are absent + const auto retFullSeq = true; // dLdh per each time step + const auto retLastH = true; // output at last time step + const auto retLastC = true; // cells state at last time step + + const double cellClip = 0.5; // clipping + + NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE); + NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); + NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); + NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); + NDArray seqLen('c', {bS}, {2,0,4}, sd::DataType::DOUBLE); + NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); + NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE); + NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); + + x.linspace(-2,0.1); + hI.linspace(-1.5,0.1); + cI.linspace(0.7,-0.1); + Wx.linspace(1,-0.1); + Wr.linspace(-1,0.1); + Wp.linspace(0.2,0.2); + b.linspace(1,-0.15); + + std::vector tArgs = {cellClip}; + std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; + std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; + + const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); + const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + + sd::ops::lstmLayer opFF; + sd::ops::lstmLayer_bp opBP; + + const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); + + ASSERT_TRUE(isGradCorrect); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests13, lstmLayer_bp_4) { + + const int sL = 3; + const int bS = 2; + const int nIn = 2; + const int nOut = 3; + + const int dataFormat = 1; // [bS,sL,nIn] + const int directionMode = 1; // backward + const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates + const int cellAct = 0; // tanh activation for cell state + const int outAct = 0; // tanh activation for output + + const bool hasBiases = true; // biases array is provided + const bool hasSeqLen = false; // seqLen array is not provided + const auto hasInitH = true; // initial output is provided + const auto hasInitC = true; // initial cell state is provided + const auto hasPH = true; // peephole connections are absent + const auto retFullSeq = true; // dLdh per each time step + const auto retLastH = true; // output at last time step + const auto retLastC = true; // cells state at last time step + + const double cellClip = 0.5; // clipping + + NDArray x('c', {bS, sL, nIn}, sd::DataType::DOUBLE); + NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); + NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); + NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); + NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); + NDArray dLdh('c', {bS, sL, nOut}, sd::DataType::DOUBLE); + NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); + + x.linspace(-2,0.1); + hI.linspace(-1.5,0.1); + cI.linspace(0.7,-0.1); + Wx.linspace(1,-0.1); + Wr.linspace(-1,0.1); + Wp.linspace(0.2,0.2); + b.linspace(1,-0.15); + + std::vector tArgs = {cellClip}; + std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; + std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; + + const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); + const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + + sd::ops::lstmLayer opFF; + sd::ops::lstmLayer_bp opBP; + + const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP); + + ASSERT_TRUE(isGradCorrect); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests13, lstmLayer_bp_5) { + + const int sL = 3; + const int bS = 2; + const int nIn = 2; + const int nOut = 2; + + const int dataFormat = 2; // [bS, nIn, sL] + const int directionMode = 1; // backward + const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates + const int cellAct = 0; // tanh activation for cell state + const int outAct = 0; // tanh activation for output + + const bool hasBiases = true; // biases array is provided + const bool hasSeqLen = true; // seqLen array is not provided + const auto hasInitH = true; // initial output is provided + const auto hasInitC = true; // initial cell state is provided + const auto hasPH = true; // peephole connections are absent + const auto retFullSeq = true; // dLdh per each time step + const auto retLastH = true; // output at last time step + const auto retLastC = true; // cells state at last time step + + const double cellClip = 0.5; // clipping + + NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE); + NDArray Wx('c', {nIn, 4*nOut}, sd::DataType::DOUBLE); + NDArray Wr('c', {nOut, 4*nOut}, sd::DataType::DOUBLE); + NDArray b('c', {4*nOut}, sd::DataType::DOUBLE); + NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); + NDArray hI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray cI('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray Wp('c', {3*nOut}, sd::DataType::DOUBLE); + NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE); + NDArray dLdhL('c', {bS, nOut}, sd::DataType::DOUBLE); + NDArray dLdcL('c', {bS, nOut}, sd::DataType::DOUBLE); + + x.linspace(-2,0.1); + hI.linspace(-1.5,0.1); + cI.linspace(0.7,-0.1); + Wx.linspace(1,-0.1); + Wr.linspace(-1,0.1); + Wp.linspace(0.2,0.2); + b.linspace(1,-0.15); + + std::vector tArgs = {cellClip}; + std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; + std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; + + const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); + const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + + sd::ops::lstmLayer opFF; + sd::ops::lstmLayer_bp opBP; + + const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); + + ASSERT_TRUE(isGradCorrect); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests13, lstmLayer_bp_6) { + + const int sL = 3; + const int bS = 2; + const int nIn = 2; + const int nOut = 2; + + const int dataFormat = 2; // [bS, nIn, sL] + const int directionMode = 2; // bidirectional sum + const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates + const int cellAct = 0; // tanh activation for cell state + const int outAct = 0; // tanh activation for output + + const bool hasBiases = true; // biases array is provided + const bool hasSeqLen = true; // seqLen array is not provided + const auto hasInitH = true; // initial output is provided + const auto hasInitC = true; // initial cell state is provided + const auto hasPH = true; // peephole connections are absent + const auto retFullSeq = true; // dLdh per each time step + const auto retLastH = true; // output at last time step + const auto retLastC = true; // cells state at last time step + + const double cellClip = 0.5; // clipping + + NDArray x('c', {bS, nIn, sL}, sd::DataType::DOUBLE); + NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE); + NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE); + NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE); + NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); + NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE); + NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE); + NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE); + NDArray dLdh('c', {bS, nOut, sL}, sd::DataType::DOUBLE); + NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE); + NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE); + + x.linspace(-2,0.1); + hI.linspace(-1.5,0.1); + cI.linspace(0.7,-0.1); + Wx.linspace(1,-0.1); + Wr.linspace(-1,0.1); + Wp.linspace(0.2,0.2); + b.linspace(1,-0.15); + + std::vector tArgs = {cellClip}; + std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; + std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; + + const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); + const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs); + + sd::ops::lstmLayer opFF; + sd::ops::lstmLayer_bp opBP; + + const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); + + ASSERT_TRUE(isGradCorrect); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests13, lstmLayer_bp_7) { + + const int sL = 3; + const int bS = 2; + const int nIn = 2; + const int nOut = 2; + + const int dataFormat = 1; // [bS,sL,nIn] + const int directionMode = 3; // bidirectional concat + const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates + const int cellAct = 0; // tanh activation for cell state + const int outAct = 0; // tanh activation for output + + const bool hasBiases = true; // biases array is provided + const bool hasSeqLen = true; // seqLen array is not provided + const auto hasInitH = true; // initial output is provided + const auto hasInitC = true; // initial cell state is provided + const auto hasPH = true; // peephole connections are absent + const auto retFullSeq = true; // dLdh per each time step + const auto retLastH = true; // output at last time step + const auto retLastC = true; // cells state at last time step + + const double cellClip = 0.5; // clipping + + NDArray x('c', {bS,sL,nIn}, sd::DataType::DOUBLE); + NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE); + NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE); + NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE); + NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); + NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE); + NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE); + NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE); + NDArray dLdh('c', {bS,sL,2*nOut}, sd::DataType::DOUBLE); + NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE); + NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE); + + x.linspace(-2,0.1); + hI.linspace(-1.5,0.1); + cI.linspace(0.7,-0.1); + Wx.linspace(1,-0.1); + Wr.linspace(-1,0.1); + Wp.linspace(0.2,0.2); + b.linspace(1,-0.15); + + std::vector tArgs = {cellClip}; + std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; + std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; + + const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); + const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs); + + sd::ops::lstmLayer opFF; + sd::ops::lstmLayer_bp opBP; + + const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); + + ASSERT_TRUE(isGradCorrect); +} + +/////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests13, lstmLayer_bp_8) { + + const int sL = 3; + const int bS = 2; + const int nIn = 2; + const int nOut = 2; + + const int dataFormat = 3; // [sL, bS, nIn] + const int directionMode = 4; // bidirectional extra output dim + const int gateAct = 2; // sigmoid activation for input (i), forget (f) and output (o) gates + const int cellAct = 0; // tanh activation for cell state + const int outAct = 0; // tanh activation for output + + const bool hasBiases = true; // biases array is provided + const bool hasSeqLen = true; // seqLen array is not provided + const auto hasInitH = true; // initial output is provided + const auto hasInitC = true; // initial cell state is provided + const auto hasPH = true; // peephole connections are absent + const auto retFullSeq = true; // dLdh per each time step + const auto retLastH = true; // output at last time step + const auto retLastC = true; // cells state at last time step + + const double cellClip = 0.5; // clipping + + NDArray x('c', {sL, bS, nIn}, sd::DataType::DOUBLE); + NDArray Wx('c', {2, nIn, 4*nOut}, sd::DataType::DOUBLE); + NDArray Wr('c', {2, nOut, 4*nOut}, sd::DataType::DOUBLE); + NDArray b('c', {2, 4*nOut}, sd::DataType::DOUBLE); + NDArray seqLen('c', {bS}, {0,2}, sd::DataType::DOUBLE); + NDArray hI('c', {2, bS, nOut}, sd::DataType::DOUBLE); + NDArray cI('c', {2, bS, nOut}, sd::DataType::DOUBLE); + NDArray Wp('c', {2, 3*nOut}, sd::DataType::DOUBLE); + NDArray dLdh('c', {sL, 2, bS, nOut}, sd::DataType::DOUBLE); + NDArray dLdhL('c', {2, bS, nOut}, sd::DataType::DOUBLE); + NDArray dLdcL('c', {2, bS, nOut}, sd::DataType::DOUBLE); + + x.linspace(-2,0.1); + hI.linspace(-1.5,0.1); + cI.linspace(0.7,-0.1); + Wx.linspace(1,-0.1); + Wr.linspace(-1,0.1); + Wp.linspace(0.2,0.2); + b.linspace(1,-0.15); + + std::vector tArgs = {cellClip}; + std::vector iArgs = {dataFormat, directionMode, gateAct, cellAct, outAct}; + std::vector bArgs = {hasBiases, hasSeqLen, hasInitH, hasInitC, hasPH, retFullSeq, retLastH, retLastC}; + + const OpArgsHolder argsHolderFF({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp}, tArgs, iArgs, bArgs); + const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdhL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh, &dLdcL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL, &dLdcL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdh}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdhL}, tArgs, iArgs, bArgs); + // const OpArgsHolder argsHolderBP({&x, &Wx, &Wr, &b, &seqLen, &hI, &cI, &Wp, &dLdcL}, tArgs, iArgs, bArgs); + + sd::ops::lstmLayer opFF; + sd::ops::lstmLayer_bp opBP; + + const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP, {true, true, true, true, false, true, true, true}); + + ASSERT_TRUE(isGradCorrect); +} + +////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests15, gru_bp_1) { + + const int sL = 3; + const int bS = 2; + const int nIn = 5; + const int nOut = 4; + + + NDArray x('c', {sL, bS, nIn}, {0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. , 6.5, 7. , 7.5, 8. , 8.5, 9. , 9.5, 10. , 10.5, 11. , 11.5, 12. , 12.5, 13. , 13.5, 14. , 14.5, 15.}, sd::DataType::DOUBLE); + NDArray hI('c', {bS, nOut}, {-3,-2,-1,0,1,2,3,4}, sd::DataType::DOUBLE); + NDArray Wx('c', {nIn, 3*nOut}, sd::DataType::DOUBLE); + NDArray Wh('c', {nOut, 3*nOut}, sd::DataType::DOUBLE); + NDArray b('c', {3*nOut}, sd::DataType::DOUBLE); + + NDArray dLdh('c', {sL, bS, nOut}, sd::DataType::DOUBLE); + + Wx.linspace(1,-0.1); + Wh.linspace(0.2,0.2); + b.linspace(1,-0.15); + + const OpArgsHolder argsHolderFF({&x, &hI, &Wx, &Wh, &b}, {}, {}); + const OpArgsHolder argsHolderBP({&x, &hI, &Wx, &Wh, &b, &dLdh}, {}, {}); + + sd::ops::gru opFF; + sd::ops::gru_bp opBP; + + const bool isGradCorrect = GradCheck::checkGrad(opFF, opBP, argsHolderFF, argsHolderBP); +} */ diff --git a/libnd4j/tests_cpu/layers_tests/RNGTests.cpp b/libnd4j/tests_cpu/layers_tests/RNGTests.cpp index c4c1806bd..37facc43c 100644 --- a/libnd4j/tests_cpu/layers_tests/RNGTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/RNGTests.cpp @@ -100,7 +100,7 @@ TEST_F(RNGTests, TestGenerator_SGA_1) { for (auto idx = 0; idx < array.lengthOf(); idx++) { float x = generator.relativeT(idx, -sd::DataTypeUtils::template max() / 10, sd::DataTypeUtils::template max() / 10); - array.t(idx) = x; + array.r(idx) = x; } auto minimum = array.reduceNumber(reduce::AMin); minimum.printBuffer("Randomly float min on 1M array"); @@ -285,7 +285,7 @@ TEST_F(RNGTests, Test_Gaussian_21) { ASSERT_NEAR(sd::math::nd4j_abs(mean->e(0)), 0.f, 0.2f); ASSERT_NEAR(variance->e(0), 1.0f, 0.2f); - + } #ifdef DEBUG_BUILD @@ -315,7 +315,7 @@ TEST_F(RNGTests, Test_Gaussian_22) { //variance0->printIndexedBuffer("Variance"); ASSERT_NEAR(sd::math::nd4j_abs(mean0->e(0)), 0.f, 1.0e-3f); ASSERT_NEAR(variance0->e(0), 1.0f, 1.e-3f); - + } TEST_F(RNGTests, Test_Gaussian_3) { @@ -431,7 +431,7 @@ TEST_F(RNGTests, Test_Truncated_21) { // result.at(0)->printBuffer("MEAN"); // result.at(1)->printBuffer("VARIANCE"); - + sd::ops::reduce_min minOp; sd::ops::reduce_max maxOp; @@ -585,7 +585,7 @@ TEST_F(RNGTests, Test_Uniform_2) { ASSERT_TRUE(x1.equalsTo(z)); delete op; - + } TEST_F(RNGTests, Test_Uniform_SGA_3) { @@ -614,7 +614,7 @@ TEST_F(RNGTests, Test_Gaussian_2) { ASSERT_TRUE(x1.equalsTo(z)); delete op; - + } TEST_F(RNGTests, Test_LogNorm_2) { @@ -634,7 +634,7 @@ TEST_F(RNGTests, Test_LogNorm_2) { ASSERT_TRUE(x1.equalsTo(z)); delete op; - + } TEST_F(RNGTests, Test_TruncatedNorm_2) { @@ -653,7 +653,7 @@ TEST_F(RNGTests, Test_TruncatedNorm_2) { ASSERT_TRUE(x1.isSameShape(z)); ASSERT_TRUE(x1.equalsTo(z)); delete op; - + } @@ -674,7 +674,7 @@ TEST_F(RNGTests, Test_Binomial_2) { ASSERT_TRUE(x1.equalsTo(z)); delete op; - + } @@ -695,7 +695,7 @@ TEST_F(RNGTests, Test_Bernoulli_2) { ASSERT_TRUE(x1.equalsTo(z)); delete op; - + } TEST_F(RNGTests, Test_GaussianDistribution_1) { @@ -716,7 +716,7 @@ TEST_F(RNGTests, Test_GaussianDistribution_1) { ASSERT_FALSE(nexp1->equalsTo(z)); ASSERT_FALSE(nexp2->equalsTo(z)); - + } TEST_F(RNGTests, Test_BernoulliDistribution_1) { @@ -736,7 +736,7 @@ TEST_F(RNGTests, Test_BernoulliDistribution_1) { ASSERT_FALSE(nexp1->equalsTo(z)); ASSERT_FALSE(nexp2->equalsTo(z)); - + } @@ -787,7 +787,7 @@ TEST_F(RNGTests, Test_ExponentialDistribution_1_SGA) { ASSERT_FALSE(nexp1->equalsTo(z)); ASSERT_FALSE(nexp2->equalsTo(z)); - + } TEST_F(RNGTests, Test_ExponentialDistribution_2_SGA) { @@ -880,7 +880,7 @@ TEST_F(RNGTests, Test_ExponentialDistribution_2) { ASSERT_FALSE(nexp1->equalsTo(z)); ASSERT_FALSE(nexp2->equalsTo(z)); - + } TEST_F(RNGTests, Test_PoissonDistribution_1) { @@ -900,7 +900,7 @@ TEST_F(RNGTests, Test_PoissonDistribution_1) { ASSERT_TRUE(exp0.isSameShape(z)); ASSERT_FALSE(exp0.equalsTo(z)); - + } TEST_F(RNGTests, Test_GammaDistribution_1) { @@ -920,7 +920,7 @@ TEST_F(RNGTests, Test_GammaDistribution_1) { ASSERT_TRUE(exp0.isSameShape(z)); ASSERT_FALSE(exp0.equalsTo(z)); - + } TEST_F(RNGTests, Test_GammaDistribution_2) { @@ -941,7 +941,7 @@ TEST_F(RNGTests, Test_GammaDistribution_2) { ASSERT_TRUE(exp0.isSameShape(z)); ASSERT_FALSE(exp0.equalsTo(z)); - + } TEST_F(RNGTests, Test_GammaDistribution_3) { @@ -962,7 +962,7 @@ TEST_F(RNGTests, Test_GammaDistribution_3) { ASSERT_TRUE(exp0.isSameShape(z)); ASSERT_FALSE(exp0.equalsTo(z)); - + } TEST_F(RNGTests, Test_UniformDistribution_04) { @@ -980,7 +980,7 @@ TEST_F(RNGTests, Test_UniformDistribution_04) { ASSERT_TRUE(exp0.isSameShape(z)); ASSERT_FALSE(exp0.equalsTo(z)); - + } namespace sd { @@ -1142,7 +1142,7 @@ TEST_F(RNGTests, test_multinomial_1) { ASSERT_EQ(Status::OK(), result.status()); ASSERT_TRUE(expectedZ.isSameShape(outputZ)); ASSERT_TRUE(expectedZ.equalsTo(outputZ)); - + } TEST_F(RNGTests, test_multinomial_2) { @@ -1219,7 +1219,7 @@ TEST_F(RNGTests, test_multinomial_5) { RandomGenerator rng(1234, 1234); ASSERT_EQ(Status::OK(), op.execute(rng, { &probs, &samples }, { &output }, {}, { 1 }, {}, {}, false)); - + auto deviation = output.varianceNumber(variance::SummaryStatsStandardDeviation, false); auto mean = output.meanNumber(); // printf("Var: %f Mean: %f \n", deviation.e(0), mean.e(0)); @@ -1290,7 +1290,7 @@ TEST_F(RNGTests, test_multinomial_6) { ASSERT_NEAR(1.2175, deviation.e(0), 45e-3); // 1000000 35e-3); ASSERT_NEAR(2.906, mean.e(0), 45e-3); // 1000000 35e-3); - + RandomGenerator rng(1234, 1234); NDArray probs('c', { batchValue, ClassValue }, { 1., 1.5, 2., 2.5, 3. }, sd::DataType::FLOAT32); diff --git a/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt b/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt index 7d3073b58..92084ef74 100644 --- a/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt +++ b/libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt @@ -115,7 +115,7 @@ elseif(WIN32) set(CMAKE_CXX_FLAGS " -g -fPIC -std=c++11 -Wa,-mbig-obj") endif() else() - set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -DLINUX_BUILD=true") + set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -ffast-math -DFFAST_MATH=true -DLINUX_BUILD=true") if ("${_RELEASE}" OR CMAKE_BUILD_TYPE STREQUAL "Release") message("Release build for tests") @@ -225,6 +225,17 @@ if (CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT(MINGW) AND NOT(APPLE)) SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -export-dynamic") endif() +file(GLOB_RECURSE COMPILATION_UNITS false ../../include/ops/declarable/helpers/cpu/compilation_units/*.cpp.in) +foreach(FL_ITEM ${COMPILATION_UNITS}) + string(REGEX MATCH "^(.*)\\.cpp\.in$" dummy ${FL_ITEM}) + set(FL_ITEM_WLE ${CMAKE_MATCH_1}) + foreach(FL_TYPE_INDEX RANGE 0 9) + #message( "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp") + configure_file( "${FL_ITEM}" "${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp" @ONLY) + LIST(APPEND CUSTOMOPS_GENERIC_SOURCES ${FL_ITEM_WLE}_${FL_TYPE_INDEX}.cpp ) + endforeach() +endforeach() + # this function strips path from file name, basically making up short file name, i.e. file.cpp function(SHORTNAME LONG_NAME OUTPUT)