2019-06-06 14:21:15 +02:00
|
|
|
/*******************************************************************************
|
|
|
|
* Copyright (c) 2015-2018 Skymind, Inc.
|
|
|
|
*
|
|
|
|
* This program and the accompanying materials are made available under the
|
|
|
|
* terms of the Apache License, Version 2.0 which is available at
|
|
|
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
|
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
|
|
* License for the specific language governing permissions and limitations
|
|
|
|
* under the License.
|
|
|
|
*
|
|
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
|
|
******************************************************************************/
|
|
|
|
|
|
|
|
//
|
|
|
|
// @author Yurii Shyrma (iuriish@yahoo.com), created on 03.01.2018
|
|
|
|
//
|
|
|
|
|
|
|
|
#include <svd.h>
|
|
|
|
#include <jacobiSVD.h>
|
|
|
|
#include <biDiagonalUp.h>
|
|
|
|
#include <array/ResultSet.h>
|
|
|
|
#include <NDArrayFactory.h>
|
|
|
|
|
|
|
|
|
|
|
|
namespace nd4j {
|
|
|
|
namespace ops {
|
|
|
|
namespace helpers {
|
|
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
template <typename T>
|
|
|
|
SVD<T>::SVD(const NDArray& matrix, const int switchSize, const bool calcU, const bool calcV, const bool fullUV ) {
|
|
|
|
|
|
|
|
if(matrix.rankOf() != 2 || matrix.isScalar())
|
|
|
|
throw std::runtime_error("ops::helpers::SVD constructor: input array must be 2D matrix !");
|
|
|
|
|
|
|
|
const int rows = matrix.sizeAt(0);
|
|
|
|
const int cols = matrix.sizeAt(1);
|
|
|
|
|
|
|
|
if(cols > rows) {
|
|
|
|
|
|
|
|
_transp = true;
|
|
|
|
_diagSize = rows;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
|
|
|
|
_transp = false;
|
|
|
|
_diagSize = cols;
|
|
|
|
}
|
|
|
|
|
|
|
|
_switchSize = switchSize;
|
|
|
|
_calcU = calcU;
|
|
|
|
_calcV = calcV;
|
|
|
|
_fullUV = fullUV;
|
|
|
|
|
|
|
|
if (_transp)
|
|
|
|
math::nd4j_swap<bool>(_calcU, _calcV);
|
|
|
|
|
|
|
|
_s = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize, 1}, matrix.getContext());
|
|
|
|
_m = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.getContext());
|
|
|
|
_m.assign(0.);
|
|
|
|
|
|
|
|
if (_calcU)
|
|
|
|
_u = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.getContext());
|
|
|
|
else
|
|
|
|
_u = NDArrayFactory::create<T>(matrix.ordering(), {2, _diagSize + 1}, matrix.getContext());
|
|
|
|
_u.assign(0.);
|
|
|
|
|
|
|
|
if (_calcV) {
|
|
|
|
_v = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize, _diagSize}, matrix.getContext());
|
|
|
|
_v.assign(0.);
|
|
|
|
}
|
|
|
|
|
|
|
|
evalData(matrix);
|
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
template <typename T>
|
|
|
|
SVD<T>::SVD(const NDArray& matrix, const int switchSize, const bool calcU, const bool calcV, const bool fullUV, const char t) {
|
|
|
|
|
|
|
|
if(matrix.rankOf() != 2 || matrix.isScalar())
|
|
|
|
throw std::runtime_error("ops::helpers::SVD constructor: input array must be 2D matrix !");
|
|
|
|
|
|
|
|
const int rows = matrix.sizeAt(0);
|
|
|
|
const int cols = matrix.sizeAt(1);
|
|
|
|
|
|
|
|
if(cols > rows) {
|
|
|
|
|
|
|
|
_transp = true;
|
|
|
|
_diagSize = rows;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
|
|
|
|
_transp = false;
|
|
|
|
_diagSize = cols;
|
|
|
|
}
|
|
|
|
|
|
|
|
_switchSize = switchSize;
|
|
|
|
_calcU = calcU;
|
|
|
|
_calcV = calcV;
|
|
|
|
_fullUV = fullUV;
|
|
|
|
|
|
|
|
if (_transp)
|
|
|
|
math::nd4j_swap<bool>(_calcU, _calcV);
|
|
|
|
|
|
|
|
_s = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize, 1}, matrix.getContext());
|
|
|
|
_m = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize + 1, _diagSize}, matrix.getContext());
|
|
|
|
_m.assign(0.f);
|
|
|
|
|
|
|
|
if (_calcU)
|
|
|
|
_u = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize + 1, _diagSize + 1}, matrix.getContext());
|
|
|
|
else
|
|
|
|
_u = NDArrayFactory::create<T>(matrix.ordering(), {2, _diagSize + 1}, matrix.getContext());
|
|
|
|
_u.assign(0.);
|
|
|
|
|
|
|
|
if (_calcV) {
|
|
|
|
_v = NDArrayFactory::create<T>(matrix.ordering(), {_diagSize, _diagSize}, matrix.getContext());
|
|
|
|
_v.assign(0.);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
template <typename T>
|
|
|
|
void SVD<T>::deflation1(int col1, int shift, int ind, int size) {
|
|
|
|
|
|
|
|
if(ind <= 0)
|
|
|
|
throw std::runtime_error("ops::helpers::SVD::deflation1 method: input int must satisfy condition ind > 0 !");
|
|
|
|
|
|
|
|
int first = col1 + shift;
|
|
|
|
T cos = _m.e<T>(first, first);
|
|
|
|
T sin = _m.e<T>(first+ind, first);
|
|
|
|
T denom = math::nd4j_sqrt<T, T>(cos*cos + sin*sin);
|
|
|
|
|
|
|
|
if (denom == (T)0.) {
|
|
|
|
|
|
|
|
_m.p(first+ind, first+ind, 0.f);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
cos /= denom;
|
|
|
|
sin /= denom;
|
|
|
|
|
|
|
|
_m.p(first,first, denom);
|
|
|
|
_m.p(first+ind, first, 0.f);
|
|
|
|
_m.p(first+ind, first+ind, 0.f);
|
|
|
|
|
|
|
|
auto rotation = NDArrayFactory::create<T>(_m.ordering(), {2, 2}, _m.getContext());
|
|
|
|
rotation.p(0, 0, cos);
|
|
|
|
rotation.p(0, 1, -sin);
|
|
|
|
rotation.p(1, 0, sin);
|
|
|
|
rotation.p(1, 1, cos);
|
|
|
|
|
|
|
|
if (_calcU) {
|
|
|
|
auto temp = _u({col1,col1+size+1, 0,0}, true);
|
|
|
|
JacobiSVD<T>::mulRotationOnRight(col1, col1+ind, temp, rotation);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
JacobiSVD<T>::mulRotationOnRight(col1, col1+ind, _u, rotation);
|
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
template <typename T>
|
|
|
|
void SVD<T>::deflation2(int col1U , int col1M, int row1W, int col1W, int ind1, int ind2, int size) {
|
|
|
|
|
|
|
|
if(ind1 >= ind2)
|
|
|
|
throw std::runtime_error("ops::helpers::SVD::deflation2 method: input intes must satisfy condition ind1 < ind2 !");
|
|
|
|
|
|
|
|
if(size <= 0)
|
|
|
|
throw std::runtime_error("ops::helpers::SVD::deflation2 method: input size must satisfy condition size > 0 !");
|
|
|
|
|
|
|
|
T cos = _m.e<T>(col1M+ind1, col1M);
|
|
|
|
T sin = _m.e<T>(col1M+ind2, col1M);
|
|
|
|
T denom = math::nd4j_sqrt<T,T>(cos*cos + sin*sin);
|
|
|
|
|
|
|
|
if (denom == (T)0.) {
|
|
|
|
|
|
|
|
_m.p(col1M + ind1, col1M + ind1, _m.e<T>(col1M + ind2, col1M + ind2));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
cos /= denom;
|
|
|
|
sin /= denom;
|
|
|
|
_m.p(col1M + ind1, col1M, denom);
|
|
|
|
_m.p(col1M + ind2, col1M + ind2, _m.e<T>(col1M + ind1, col1M + ind1));
|
|
|
|
_m.p(col1M + ind2, col1M, 0.f);
|
|
|
|
|
|
|
|
auto rotation = NDArrayFactory::create<T>(_m.ordering(), {2, 2}, _m.getContext());
|
|
|
|
rotation.p(0,0, cos);
|
|
|
|
rotation.p(1,1, cos);
|
|
|
|
|
|
|
|
rotation.p(0,1, -sin);
|
|
|
|
rotation.p(1,0, sin);
|
|
|
|
|
|
|
|
if (_calcU) {
|
|
|
|
auto temp = _u({col1U,col1U+size+1, 0,0}, true);
|
|
|
|
JacobiSVD<T>::mulRotationOnRight(col1U+ind1, col1U+ind2, temp, rotation);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
JacobiSVD<T>::mulRotationOnRight(col1U+ind1, col1U+ind2, _u, rotation);
|
|
|
|
|
|
|
|
if (_calcV) {
|
|
|
|
auto temp = _v({row1W,row1W+size, 0,0}, true);
|
|
|
|
JacobiSVD<T>::mulRotationOnRight(col1W+ind1, col1W+ind2, temp, rotation);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
// has effect on block from (col1+shift, col1+shift) to (col2+shift, col2+shift) inclusively
|
|
|
|
template <typename T>
|
|
|
|
void SVD<T>::deflation(int col1, int col2, int ind, int row1W, int col1W, int shift)
|
|
|
|
{
|
|
|
|
|
|
|
|
const int len = col2 + 1 - col1;
|
|
|
|
|
|
|
|
auto colVec0 = new NDArray(_m({col1+shift,col1+shift+len, col1+shift,col1+shift+1}, true));
|
|
|
|
|
|
|
|
auto diagInterval = _m({col1+shift, col1+shift+len, col1+shift,col1+shift+len}, true).diagonal('c');
|
|
|
|
|
|
|
|
const T almostZero = DataTypeUtils::min<T>();
|
|
|
|
T maxElem;
|
|
|
|
if(len == 1)
|
|
|
|
maxElem = math::nd4j_abs<T>(diagInterval->template e<T>(0));
|
|
|
|
else
|
|
|
|
maxElem = (*diagInterval)({1,-1, 0,0}, true).reduceNumber(reduce::AMax).template e<T>(0);
|
|
|
|
T maxElem0 = colVec0->reduceNumber(reduce::AMax).template e<T>(0);
|
|
|
|
|
|
|
|
T eps = math::nd4j_max<T>(almostZero, DataTypeUtils::eps<T>() * maxElem);
|
|
|
|
T epsBig = (T)8. * DataTypeUtils::eps<T>() * math::nd4j_max<T>(maxElem0, maxElem);
|
|
|
|
|
|
|
|
if(diagInterval->template e<T>(0) < epsBig)
|
|
|
|
diagInterval->p(Nd4jLong(0), epsBig);
|
|
|
|
|
|
|
|
for(int i=1; i < len; ++i)
|
|
|
|
if(math::nd4j_abs<T>(colVec0->template e<T>(i)) < eps)
|
|
|
|
colVec0->p(i, 0.f);
|
|
|
|
|
|
|
|
for(int i=1; i < len; i++)
|
|
|
|
if(diagInterval->template e<T>(i) < epsBig) {
|
|
|
|
deflation1(col1, shift, i, len);
|
|
|
|
for(int i = 0; i < len; ++i)
|
|
|
|
diagInterval->p(i, _m.e<T>(col1+shift+i,col1+shift+i));
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
bool totDefl = true;
|
|
|
|
for(int i=1; i < len; i++)
|
|
|
|
if(colVec0->template e<T>(i) >= almostZero) {
|
|
|
|
totDefl = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
int* permut = nullptr;
|
|
|
|
ALLOCATE(permut, _m.getContext()->getWorkspace(), 3*_diagSize, int);
|
|
|
|
{
|
|
|
|
permut[0] = 0;
|
|
|
|
int p = 1;
|
|
|
|
|
|
|
|
for(int i=1; i<len; ++i)
|
|
|
|
if(math::nd4j_abs<T>(diagInterval->template e<T>(i)) < almostZero)
|
|
|
|
permut[p++] = i;
|
|
|
|
|
|
|
|
int k = 1, m = ind+1;
|
|
|
|
|
|
|
|
for( ; p < len; ++p) {
|
|
|
|
if(k > ind)
|
|
|
|
permut[p] = m++;
|
|
|
|
else if(m >= len)
|
|
|
|
permut[p] = k++;
|
|
|
|
else if(diagInterval->template e<T>(k) < diagInterval->template e<T>(m))
|
|
|
|
permut[p] = m++;
|
|
|
|
else
|
|
|
|
permut[p] = k++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(totDefl) {
|
|
|
|
for(int i=1; i<len; ++i) {
|
|
|
|
int ki = permut[i];
|
|
|
|
if(math::nd4j_abs<T>(diagInterval->template e<T>(ki)) < almostZero || diagInterval->template e<T>(0) < diagInterval->template e<T>(ki))
|
|
|
|
permut[i-1] = permut[i];
|
|
|
|
else {
|
|
|
|
permut[i-1] = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int *tInd = permut + len;
|
|
|
|
int *tCol = permut + 2*len;
|
|
|
|
|
|
|
|
for(int m = 0; m < len; m++) {
|
|
|
|
tCol[m] = m;
|
|
|
|
tInd[m] = m;
|
|
|
|
}
|
|
|
|
|
|
|
|
for(int i = totDefl ? 0 : 1; i < len; i++) {
|
|
|
|
|
|
|
|
const int ki = permut[len - (totDefl ? i+1 : i)];
|
|
|
|
const int jac = tCol[ki];
|
|
|
|
|
|
|
|
T _e0 = diagInterval->template e<T>(jac);
|
|
|
|
//math::nd4j_swap<T>(diagInterval)(i), (*diagInterval)(jac));
|
|
|
|
diagInterval->p(jac, diagInterval->template e<T>(i));
|
|
|
|
diagInterval->p(i, _e0);
|
|
|
|
|
|
|
|
if(i!=0 && jac!=0) {
|
|
|
|
_e0 = colVec0->template e<T>(jac);
|
|
|
|
//math::nd4j_swap<T>((*colVec0)(i), (*colVec0)(jac));
|
|
|
|
colVec0->p(jac, colVec0->template e<T>(i));
|
|
|
|
colVec0->p(i, _e0);
|
|
|
|
}
|
|
|
|
|
|
|
|
NDArray* temp1 = nullptr, *temp2 = nullptr;
|
|
|
|
if (_calcU) {
|
|
|
|
auto temp1 = _u({col1,col1+len+1, col1+i, col1+i+1}, true);
|
|
|
|
auto temp2 = _u({col1,col1+len+1, col1+jac,col1+jac+1}, true);
|
|
|
|
auto temp3 = temp1;
|
|
|
|
temp1.assign(temp2);
|
|
|
|
temp2.assign(temp3);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
auto temp1 = _u({0,2, col1+i, col1+i+1}, true);
|
|
|
|
auto temp2 = _u({0,2, col1+jac, col1+jac+1}, true);
|
|
|
|
auto temp3 = temp1;
|
|
|
|
temp1.assign(temp2);
|
|
|
|
temp2.assign(temp3);
|
|
|
|
}
|
|
|
|
|
|
|
|
if(_calcV) {
|
|
|
|
auto temp1 = _v({row1W,row1W+len, col1W+i, col1W+i+1}, true);
|
|
|
|
auto temp2 = _v({row1W,row1W+len, col1W+jac, col1W+jac+1}, true);
|
|
|
|
auto temp3 = temp1;
|
|
|
|
temp1.assign(temp2);
|
|
|
|
temp2.assign(temp3);
|
|
|
|
}
|
|
|
|
|
|
|
|
const int tI = tInd[i];
|
|
|
|
tCol[tI] = jac;
|
|
|
|
tCol[ki] = i;
|
|
|
|
tInd[jac] = tI;
|
|
|
|
tInd[i] = ki;
|
|
|
|
}
|
|
|
|
|
|
|
|
RELEASE(permut, _m.getContext()->getWorkspace());
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
int i = len-1;
|
|
|
|
|
|
|
|
while(i > 0 && (math::nd4j_abs<T>(diagInterval->template e<T>(i)) < almostZero || math::nd4j_abs<T>(colVec0->template e<T>(i)) < almostZero))
|
|
|
|
--i;
|
|
|
|
|
|
|
|
for(; i > 1; --i) {
|
|
|
|
if( (diagInterval->template e<T>(i) - diagInterval->template e<T>(i-1)) < DataTypeUtils::eps<T>()*maxElem ) {
|
|
|
|
if (math::nd4j_abs<T>(diagInterval->template e<T>(i) - diagInterval->template e<T>(i-1)) >= epsBig)
|
|
|
|
throw std::runtime_error("ops::helpers::SVD::deflation: diagonal elements are not properly sorted !");
|
|
|
|
deflation2(col1, col1 + shift, row1W, col1W, i-1, i, len);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
delete colVec0;
|
|
|
|
delete diagInterval;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
template <typename T>
|
|
|
|
T SVD<T>::secularEq(const T diff, const NDArray& col0, const NDArray& diag, const NDArray& permut, const NDArray& diagShifted, const T shift) {
|
|
|
|
|
|
|
|
auto len = permut.lengthOf();
|
|
|
|
T res = 1.;
|
|
|
|
T item;
|
|
|
|
for(int i=0; i<len; ++i) {
|
|
|
|
auto j = permut.e<int>(i);
|
|
|
|
item = col0.e<T>(j) / ((diagShifted.e<T>(j) - diff) * (diag.e<T>(j) + shift + diff));
|
|
|
|
res += item * col0.e<T>(j);
|
|
|
|
}
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
template <typename T>
|
|
|
|
void SVD<T>::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArray& permut, NDArray& singVals, NDArray& shifts, NDArray& mus) {
|
|
|
|
|
|
|
|
auto len = col0.lengthOf();
|
|
|
|
auto curLen = len;
|
|
|
|
|
|
|
|
while(curLen > 1 && col0.e<T>(curLen-1) == (T)0.f)
|
|
|
|
--curLen;
|
|
|
|
|
|
|
|
for (int k = 0; k < len; ++k) {
|
|
|
|
|
|
|
|
if (col0.e<T>(k) == (T)0.f || curLen==1) {
|
|
|
|
|
|
|
|
singVals.p(k, k==0 ? col0.e<T>(0) : diag.e<T>(k));
|
|
|
|
mus.p(k, 0.f);
|
|
|
|
shifts.p(k, k==0 ? col0.e<T>(0) : diag.e<T>(k));
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
T left = diag.e<T>(k);
|
|
|
|
T right;
|
|
|
|
|
|
|
|
if(k==curLen-1)
|
|
|
|
right = diag.e<T>(curLen-1) + col0.reduceNumber(reduce::Norm2).e<T>(0);
|
|
|
|
else {
|
|
|
|
|
|
|
|
int l = k+1;
|
|
|
|
while(col0.e<T>(l) == (T)0.f) {
|
|
|
|
++l;
|
|
|
|
if(l >= curLen)
|
|
|
|
throw std::runtime_error("ops::helpers::SVD::calcSingVals method: l >= curLen !");
|
|
|
|
}
|
|
|
|
|
|
|
|
right = diag.e<T>(l);
|
|
|
|
}
|
|
|
|
|
|
|
|
T mid = left + (right - left) / (T)2.;
|
|
|
|
T fMid = secularEq(mid, col0, diag, permut, diag, 0.);
|
|
|
|
T shift = (k == curLen-1 || fMid > (T)0.) ? left : right;
|
|
|
|
|
|
|
|
auto diagShifted = diag - shift;
|
|
|
|
|
|
|
|
T muPrev, muCur;
|
|
|
|
if (shift == left) {
|
|
|
|
muPrev = (right - left) * 0.1;
|
|
|
|
if (k == curLen-1)
|
|
|
|
muCur = right - left;
|
|
|
|
else
|
|
|
|
muCur = (right - left) * 0.5;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
muPrev = -(right - left) * 0.1;
|
|
|
|
muCur = -(right - left) * 0.5;
|
|
|
|
}
|
|
|
|
|
|
|
|
T fPrev = secularEq(muPrev, col0, diag, permut, diagShifted, shift);
|
|
|
|
T fCur = secularEq(muCur, col0, diag, permut, diagShifted, shift);
|
|
|
|
|
|
|
|
if (math::nd4j_abs<T>(fPrev) < math::nd4j_abs<T>(fCur)) {
|
|
|
|
math::nd4j_swap<T>(fPrev, fCur);
|
|
|
|
math::nd4j_swap<T>(muPrev, muCur);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool useBisection = fPrev * fCur > (T)0.;
|
|
|
|
while (fCur != (T).0 &&
|
|
|
|
math::nd4j_abs<T>(muCur - muPrev) > (T)8. * DataTypeUtils::eps<T>() * math::nd4j_max<T>(math::nd4j_abs<T>(muCur), math::nd4j_abs<T>(muPrev))
|
|
|
|
&& math::nd4j_abs<T>(fCur - fPrev) > DataTypeUtils::eps<T>() && !useBisection) {
|
|
|
|
|
|
|
|
T a = (fCur - fPrev) / ((T)1./muCur - (T)1./muPrev);
|
|
|
|
T jac = fCur - a / muCur;
|
|
|
|
T muZero = -a/jac;
|
|
|
|
T fZero = secularEq(muZero, col0, diag, permut, diagShifted, shift);
|
|
|
|
|
|
|
|
muPrev = muCur;
|
|
|
|
fPrev = fCur;
|
|
|
|
muCur = muZero;
|
|
|
|
fCur = fZero;
|
|
|
|
|
|
|
|
if (shift == left && (muCur < (T)0. || muCur > right - left))
|
|
|
|
useBisection = true;
|
|
|
|
if (shift == right && (muCur < -(right - left) || muCur > (T)0.))
|
|
|
|
useBisection = true;
|
|
|
|
if (math::nd4j_abs<T>(fCur) > math::nd4j_abs<T>(fPrev))
|
|
|
|
useBisection = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (useBisection) {
|
|
|
|
|
|
|
|
T leftShifted, rightShifted;
|
|
|
|
if (shift == left) {
|
|
|
|
leftShifted = DataTypeUtils::min<T>();
|
|
|
|
rightShifted = (k==curLen-1) ? right : ((right - left) * (T)0.6);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
|
|
|
|
leftShifted = -(right - left) * (T)0.6;
|
|
|
|
rightShifted = -DataTypeUtils::min<T>();
|
|
|
|
}
|
|
|
|
|
|
|
|
T fLeft = secularEq(leftShifted, col0, diag, permut, diagShifted, shift);
|
|
|
|
T fRight = secularEq(rightShifted, col0, diag, permut, diagShifted, shift);
|
|
|
|
// if(fLeft * fRight >= (T)0.)
|
|
|
|
// throw "ops::helpers::SVD::calcSingVals method: fLeft * fRight >= (T)0. !";
|
|
|
|
|
|
|
|
while (rightShifted - leftShifted > (T)2.f * DataTypeUtils::eps<T>() * math::nd4j_max<T>(math::nd4j_abs<T>(leftShifted), math::nd4j_abs<T>(rightShifted))) {
|
|
|
|
|
|
|
|
T midShifted = (leftShifted + rightShifted) / (T)2.;
|
|
|
|
fMid = secularEq(midShifted, col0, diag, permut, diagShifted, shift);
|
|
|
|
if (fLeft * fMid < (T)0.)
|
|
|
|
rightShifted = midShifted;
|
|
|
|
else {
|
|
|
|
leftShifted = midShifted;
|
|
|
|
fLeft = fMid;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
muCur = (leftShifted + rightShifted) / (T)2.;
|
|
|
|
}
|
|
|
|
singVals.p(k, shift + muCur);
|
|
|
|
shifts.p(k, shift);
|
|
|
|
mus.p(k, muCur);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
template <typename T>
|
|
|
|
void SVD<T>::perturb(const NDArray& col0, const NDArray& diag, const NDArray& permut, const NDArray& singVals, const NDArray& shifts, const NDArray& mus, NDArray& zhat) {
|
|
|
|
|
|
|
|
int n = col0.lengthOf();
|
|
|
|
int m = permut.lengthOf();
|
|
|
|
if(m==0) {
|
|
|
|
zhat.assign(0.);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
int last = permut.e<int>(m-1);
|
|
|
|
|
|
|
|
for (int k = 0; k < n; ++k) {
|
|
|
|
|
|
|
|
if (col0.e<T>(k) == (T)0.f)
|
|
|
|
zhat.p(k, (T)0.f);
|
|
|
|
else {
|
|
|
|
T dk = diag.e<T>(k);
|
|
|
|
T prod = (singVals.e<T>(last) + dk) * (mus.e<T>(last) + (shifts.e<T>(last) - dk));
|
|
|
|
|
|
|
|
for(int l = 0; l<m; ++l) {
|
|
|
|
int i = permut.e<int>(l);
|
|
|
|
if(i!=k) {
|
|
|
|
int j = i<k ? i : permut.e<int>(l-1);
|
|
|
|
prod *= ((singVals.e<T>(j)+dk) / ((diag.e<T>(i)+dk))) * ((mus.e<T>(j)+(shifts.e<T>(j)-dk)) / ((diag.e<T>(i)-dk)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
T tmp = math::nd4j_sqrt<T,T>(prod);
|
|
|
|
zhat.p(k, col0.e<T>(k) > (T)0.f ? tmp : -tmp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
template <typename T>
|
|
|
|
void SVD<T>::calcSingVecs(const NDArray& zhat, const NDArray& diag, const NDArray& perm, const NDArray& singVals,
|
|
|
|
const NDArray& shifts, const NDArray& mus, NDArray& U, NDArray& V) {
|
|
|
|
|
|
|
|
int n = zhat.lengthOf();
|
|
|
|
int m = perm.lengthOf();
|
|
|
|
|
|
|
|
for (int k = 0; k < n; ++k) {
|
|
|
|
|
|
|
|
auto colU = new NDArray(U({0,0, k,k+1}, true));
|
|
|
|
*colU = 0.;
|
|
|
|
NDArray* colV = nullptr;
|
|
|
|
|
|
|
|
if (_calcV) {
|
|
|
|
colV = new NDArray(V({0,0, k,k+1}, true));
|
|
|
|
*colV = 0.;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (zhat.e<T>(k) == (T)0.f) {
|
|
|
|
colU->p(k, 1.f);
|
|
|
|
|
|
|
|
if (_calcV)
|
|
|
|
colV->p(k, 1.f);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
|
|
|
|
for(int l = 0; l < m; ++l) {
|
|
|
|
int i = perm.e<int>(l);
|
|
|
|
U.p(i,k, zhat.e<T>(i)/(((diag.e<T>(i) - shifts.e<T>(k)) - mus.e<T>(k)) )/( (diag.e<T>(i) + singVals.e<T>(k))));
|
|
|
|
}
|
|
|
|
U.p(n,k, 0.f);
|
|
|
|
*colU /= colU->reduceNumber(reduce::Norm2);
|
|
|
|
|
|
|
|
if (_calcV) {
|
|
|
|
|
|
|
|
for(int l = 1; l < m; ++l){
|
|
|
|
int i = perm.e<T>(l);
|
|
|
|
V.p(i,k, diag.e<T>(i) * zhat.e<T>(i) / (((diag.e<T>(i) - shifts.e<T>(k)) - mus.e<T>(k)) )/( (diag.e<T>(i) + singVals.e<T>(k))));
|
|
|
|
}
|
|
|
|
V.p(0,k, -1.f);
|
|
|
|
*colV /= colV->reduceNumber(reduce::Norm2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
delete colU;
|
|
|
|
if (_calcV)
|
|
|
|
delete colV;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto colU = U({0,0, n,n+1}, true);
|
|
|
|
colU = 0.;
|
|
|
|
colU.p(n, 1.);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
template <typename T>
|
|
|
|
void SVD<T>::calcBlockSVD(int col1, int size, NDArray& U, NDArray& singVals, NDArray& V) {
|
|
|
|
|
|
|
|
const T almostZero = DataTypeUtils::min<T>();
|
|
|
|
auto col0 = _m({col1, col1+size, col1, col1+1}, true);
|
|
|
|
auto diagP = _m({col1, col1+size, col1, col1+size}, true).diagonal('c');
|
|
|
|
auto diag = *diagP;
|
|
|
|
delete diagP;
|
|
|
|
|
|
|
|
diag.p(Nd4jLong(0), T(0));
|
|
|
|
singVals = NDArrayFactory::create<T>(_m.ordering(), {size, 1}, _m.getContext());
|
|
|
|
U = NDArrayFactory::create<T>(_u.ordering(), {size+1, size+1}, _u.getContext());
|
|
|
|
if (_calcV)
|
|
|
|
V = NDArrayFactory::create<T>(_v.ordering(), {size, size}, _v.getContext());
|
|
|
|
|
|
|
|
int curSize = size;
|
|
|
|
while(curSize > 1 && diag.template e<T>(curSize-1) == (T)0.f)
|
|
|
|
--curSize;
|
|
|
|
|
|
|
|
int m = 0;
|
|
|
|
std::vector<T> indices;
|
|
|
|
for(int k = 0; k < curSize; ++k)
|
|
|
|
if(math::nd4j_abs<T>(col0.template e<T>(k)) > almostZero)
|
|
|
|
indices.push_back((T)k);
|
|
|
|
|
|
|
|
auto permut = NDArrayFactory::create<T>(_m.ordering(), {1, (int)indices.size()}, indices, _m.getContext());
|
|
|
|
auto shifts = NDArrayFactory::create<T>(_m.ordering(), {size, 1}, _m.getContext());
|
|
|
|
auto mus = NDArrayFactory::create<T>(_m.ordering(), {size, 1}, _m.getContext());
|
|
|
|
auto zhat = NDArrayFactory::create<T>(_m.ordering(), {size, 1}, _m.getContext());
|
|
|
|
|
|
|
|
calcSingVals(col0, diag, permut, singVals, shifts, mus);
|
|
|
|
perturb(col0, diag, permut, singVals, shifts, mus, zhat);
|
|
|
|
calcSingVecs(zhat, diag, permut, singVals, shifts, mus, U, V);
|
|
|
|
|
|
|
|
for(int i=0; i<curSize-1; ++i) {
|
|
|
|
|
|
|
|
if(singVals.e<T>(i) > singVals.e<T>(i+1)) {
|
|
|
|
T _e0 = singVals.e<T>(i);
|
|
|
|
T _e1 = singVals.e<T>(i+1);
|
|
|
|
//math::nd4j_swap<T>(singVals(i),singVals(i+1));
|
|
|
|
singVals.p(i, _e1);
|
|
|
|
singVals.p(i+1, _e0);
|
|
|
|
|
|
|
|
auto temp1 = U({0,0, i,i+1}, true);
|
|
|
|
auto temp2 = U({0,0, i+1,i+2}, true);
|
|
|
|
auto temp3 = temp1;
|
|
|
|
temp1.assign(temp2);
|
|
|
|
temp2.assign(temp3);
|
|
|
|
|
|
|
|
if(_calcV) {
|
|
|
|
auto temp1 = V({0,0, i,i+1}, true);
|
|
|
|
auto temp2 = V({0,0, i+1,i+2}, true);
|
|
|
|
auto temp3 = temp1;
|
|
|
|
temp1.assign(temp2);
|
|
|
|
temp2.assign(temp3);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
auto temp1 = singVals({0,curSize, 0,0}, true);
|
|
|
|
for (int e = 0; e < curSize / 2; ++e) {
|
|
|
|
T tmp = temp1.e<T>(e);
|
|
|
|
temp1.p(e, temp1.e<T>(curSize-1-e));
|
|
|
|
temp1.p(curSize-1-e, tmp);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto temp2 = U({0,0, 0,curSize}, true);
|
|
|
|
for(int i = 0; i < curSize/2; ++i) {
|
|
|
|
auto temp3 = temp2({0,0, i,i+1}, true);
|
|
|
|
auto temp4 = temp2({0,0, curSize-1-i,curSize-i}, true);
|
|
|
|
auto temp5 = temp3;
|
|
|
|
temp3.assign(temp4);
|
|
|
|
temp4.assign(temp5);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (_calcV) {
|
|
|
|
auto temp2 = V({0,0, 0,curSize}, true);
|
|
|
|
for(int i = 0; i < curSize/2; ++i) {
|
|
|
|
auto temp3 = temp2({0,0, i,i+1}, true);
|
|
|
|
auto temp4 = temp2({0,0, curSize-1-i,curSize-i}, true);
|
|
|
|
auto temp5 = temp3;
|
|
|
|
temp3.assign(temp4);
|
|
|
|
temp4.assign(temp5);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
template<typename T>
|
|
|
|
void SVD<T>::DivideAndConquer(int col1, int col2, int row1W, int col1W, int shift) {
|
|
|
|
|
|
|
|
// requires rows = cols + 1;
|
|
|
|
const int n = col2 - col1 + 1;
|
|
|
|
const int k = n/2;
|
|
|
|
const T almostZero = DataTypeUtils::min<T>();
|
|
|
|
T alphaK;
|
|
|
|
T betaK;
|
|
|
|
T r0;
|
|
|
|
T lambda, phi, c0, s0;
|
|
|
|
auto l = NDArrayFactory::create<T>(_u.ordering(), {1, k}, _u.getContext());
|
|
|
|
auto f = NDArrayFactory::create<T>(_u.ordering(), {1, n-k-1}, _u.getContext());
|
|
|
|
|
|
|
|
if(n < _switchSize) {
|
|
|
|
|
|
|
|
JacobiSVD<T> jac(_m({col1,col1+n+1, col1,col1+n}, true), _calcU, _calcV, _fullUV);
|
|
|
|
|
|
|
|
if (_calcU) {
|
|
|
|
auto temp = _u({col1,col1+n+1, col1,col1+n+1}, true);
|
|
|
|
temp.assign(jac._u);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
auto temp1 = _u({0,1, col1,col1+n+1}, true);
|
|
|
|
temp1.assign(jac._u({0,1, 0,0}, true));
|
|
|
|
auto temp2 = _u({1,2, col1,col1+n+1}, true);
|
|
|
|
temp2.assign(jac._u({n,n+1, 0,0}, true));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (_calcV) {
|
|
|
|
auto temp = _v({row1W,row1W+n, col1W,col1W+n}, true);
|
|
|
|
temp.assign(jac._v);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto temp = _m({col1+shift,col1+shift+n+1, col1+shift,col1+shift+n}, true);
|
|
|
|
temp.assign(0.);
|
|
|
|
auto diag = _m.diagonal('c');
|
|
|
|
(*diag)({col1+shift, col1+shift+n, 0,0}, true).assign(jac._s({0,n, 0,0}, true));
|
|
|
|
delete diag;
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
alphaK = _m.e<T>(col1 + k, col1 + k);
|
|
|
|
betaK = _m.e<T>(col1 + k + 1, col1 + k);
|
|
|
|
|
|
|
|
DivideAndConquer(k + 1 + col1, col2, k + 1 + row1W, k + 1 + col1W, shift);
|
|
|
|
DivideAndConquer(col1, k - 1 + col1, row1W, col1W + 1, shift + 1);
|
|
|
|
|
|
|
|
if (_calcU) {
|
|
|
|
lambda = _u.e<T>(col1 + k, col1 + k);
|
|
|
|
phi = _u.e<T>(col1 + k + 1, col2 + 1);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
lambda = _u.e<T>(1, col1 + k);
|
|
|
|
phi = _u.e<T>(0, col2 + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
r0 = math::nd4j_sqrt<T, T>((math::nd4j_abs<T>(alphaK * lambda) * math::nd4j_abs<T>(alphaK * lambda)) + math::nd4j_abs<T>(betaK * phi) * math::nd4j_abs<T>(betaK * phi));
|
|
|
|
|
|
|
|
if(_calcU) {
|
|
|
|
l.assign(_u({col1+k, col1+k+1, col1,col1+k}, true));
|
|
|
|
f.assign(_u({col1+k+1,col1+k+2, col1+k+1,col1+n}, true));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
l.assign(_u({1,2, col1, col1+k}, true));
|
|
|
|
f.assign(_u({0,1, col1+k+1, col1+n}, true));
|
|
|
|
}
|
|
|
|
|
|
|
|
// UofSVD.printIndexedBuffer();
|
|
|
|
// VofSVD.printIndexedBuffer();
|
|
|
|
// singVals.printIndexedBuffer();
|
|
|
|
// printf("!! \n");
|
|
|
|
|
|
|
|
if (_calcV)
|
|
|
|
_v.p(row1W+k, col1W, 1.f);
|
|
|
|
|
|
|
|
if (r0 < almostZero){
|
|
|
|
c0 = 1.;
|
|
|
|
s0 = 0.;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
c0 = alphaK * lambda / r0;
|
|
|
|
s0 = betaK * phi / r0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (_calcU) {
|
|
|
|
|
|
|
|
auto temp = _u({col1,col1+k+1, col1+k,col1+k+1}, true);
|
|
|
|
NDArray q1(temp);
|
|
|
|
|
|
|
|
for (int i = col1 + k - 1; i >= col1; --i) {
|
|
|
|
auto temp = _u({col1,col1+k+1, i+1,i+2}, true);
|
|
|
|
temp.assign(_u({col1, col1+k+1, i, i+1}, true));
|
|
|
|
}
|
|
|
|
|
|
|
|
auto temp1 = _u({col1,col1+k+1, col1,col1+1}, true);
|
|
|
|
temp1.assign(q1 * c0);
|
|
|
|
auto temp2 = _u({col1,col1+k+1, col2+1,col2+2}, true);
|
|
|
|
temp2.assign(q1 * (-s0));
|
|
|
|
auto temp3 = _u({col1+k+1,col1+n+1, col1, col1+1}, true);
|
|
|
|
temp3.assign(_u({col1+k+1, col1+n+1, col2+1, col2+2}, true) * s0);
|
|
|
|
auto temp4 =_u({col1+k+1,col1+n+1, col2+1,col2+2}, true);
|
|
|
|
temp4 *= c0;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
|
|
|
|
T q1 = _u.e<T>(0, col1 + k);
|
|
|
|
|
|
|
|
for (int i = col1 + k - 1; i >= col1; --i)
|
|
|
|
_u.p(0, i+1, _u.e<T>(0, i));
|
|
|
|
|
|
|
|
_u.p(0, col1, q1 * c0);
|
|
|
|
_u.p(0, col2+1, -q1*s0);
|
|
|
|
_u.p(1, col1, _u.e<T>(1, col2+1) * s0);
|
|
|
|
_u.p(1, col2 + 1, _u.e<T>(1, col2 + 1) * c0);
|
|
|
|
_u({1,2, col1+1, col1+k+1}, true) = 0.f;
|
|
|
|
_u({0,1, col1+k+1, col1+n}, true) = 0.f;
|
|
|
|
}
|
|
|
|
|
|
|
|
_m.p(col1 + shift, col1 + shift, r0);
|
|
|
|
auto temp1 = _m({col1+shift+1,col1+shift+k+1, col1+shift,col1+shift+1}, true);
|
|
|
|
temp1.assign(l*alphaK);
|
|
|
|
auto temp2 = _m({col1+shift+k+1,col1+shift+n, col1+shift,col1+shift+1}, true);
|
|
|
|
temp2.assign(f*betaK);
|
|
|
|
|
|
|
|
deflation(col1, col2, k, row1W, col1W, shift);
|
|
|
|
|
|
|
|
NDArray UofSVD, VofSVD, singVals;
|
|
|
|
calcBlockSVD(col1 + shift, n, UofSVD, singVals, VofSVD);
|
|
|
|
|
|
|
|
if(_calcU) {
|
|
|
|
auto pTemp = _u({col1, col1+n+1, col1,col1+n+1}, true);
|
|
|
|
auto temp = pTemp;
|
|
|
|
pTemp.assign(mmul(temp, UofSVD));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
auto pTemp = _u({0,0, col1,col1+n+1}, true);
|
|
|
|
auto temp = pTemp;
|
|
|
|
pTemp.assign(mmul(temp, UofSVD));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (_calcV) {
|
|
|
|
auto pTemp = _v({row1W,row1W+n, row1W,row1W+n}, true);
|
|
|
|
auto temp = pTemp;
|
|
|
|
pTemp.assign(mmul(temp, VofSVD));
|
|
|
|
}
|
|
|
|
|
|
|
|
auto blockM = _m({col1+shift,col1+shift+n, col1+shift,col1+shift+n}, true);
|
|
|
|
blockM = 0.f;
|
|
|
|
auto diag = blockM.diagonal('c');
|
|
|
|
diag->assign(singVals);
|
|
|
|
delete diag;
|
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
template<typename T>
|
|
|
|
void SVD<T>::exchangeUV(const HHsequence& hhU, const HHsequence& hhV, const NDArray& U, const NDArray& V) {
|
|
|
|
|
|
|
|
if (_calcU) {
|
|
|
|
|
|
|
|
int colsU = _fullUV ? hhU.rows() : _diagSize;
|
|
|
|
auto temp1 = NDArrayFactory::create<T>(_u.ordering(), {hhU.rows(), colsU}, _u.getContext());
|
|
|
|
temp1.setIdentity();
|
|
|
|
_u = temp1;
|
|
|
|
|
|
|
|
auto temp2 = _u({0,_diagSize, 0,_diagSize}, true);
|
|
|
|
temp2.assign(V({0,_diagSize, 0,_diagSize}, true));
|
|
|
|
const_cast<HHsequence&>(hhU).mulLeft(_u);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (_calcV) {
|
|
|
|
|
|
|
|
int colsV = _fullUV ? hhV.rows() : _diagSize;
|
|
|
|
auto temp1 = NDArrayFactory::create<T>(_v.ordering(), {hhV.rows(), colsV}, _v.getContext());
|
|
|
|
temp1.setIdentity();
|
|
|
|
_v = temp1;
|
|
|
|
|
|
|
|
auto temp2 = _v({0,_diagSize, 0,_diagSize}, true);
|
|
|
|
temp2.assign(U({0,_diagSize, 0,_diagSize}, true));
|
|
|
|
const_cast<HHsequence&>(hhV).mulLeft(_v);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
template <typename T>
|
|
|
|
void SVD<T>::evalData(const NDArray& matrix) {
|
|
|
|
|
|
|
|
const T almostZero = DataTypeUtils::min<T>();
|
|
|
|
|
|
|
|
if(matrix.sizeAt(1) < _switchSize) {
|
|
|
|
|
|
|
|
JacobiSVD<T> jac(matrix, _calcU, _calcV, _fullUV);
|
|
|
|
|
|
|
|
if(_calcU)
|
|
|
|
_u = jac._u;
|
|
|
|
if(_calcV)
|
|
|
|
_v = jac._v;
|
|
|
|
|
|
|
|
_s.assign(jac._s);
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
T scale = matrix.reduceNumber(reduce::AMax).e<T>(0);
|
|
|
|
|
|
|
|
if(scale == (T)0.)
|
|
|
|
scale = 1.;
|
|
|
|
|
|
|
|
NDArray copy;
|
|
|
|
if(_transp) {
|
|
|
|
copy = NDArrayFactory::create<T>(matrix.ordering(), {matrix.sizeAt(1), matrix.sizeAt(0)}, matrix.getContext());
|
|
|
|
for(int i = 0; i < copy.sizeAt(0); ++i)
|
|
|
|
for(int j = 0; j < copy.sizeAt(1); ++j)
|
|
|
|
copy.p<T>(i, j, matrix.e<T>(j,i) / scale);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
copy = matrix / scale;
|
|
|
|
|
|
|
|
BiDiagonalUp biDiag(copy);
|
|
|
|
|
|
|
|
_u = 0.;
|
|
|
|
_v = 0.;
|
|
|
|
|
|
|
|
auto temp1 = biDiag._HHbidiag.transpose();
|
|
|
|
auto temp2 = _m({0,_diagSize, 0,0}, true);
|
|
|
|
temp2.assign(temp1);
|
Merge master to upstream (#7945)
* Shugeo strided slice zeros (#14)
* Modified strided_slice op to properly work with empty-like shapes.
* Fixed test for reduce_mean with empty-like input.
* [WIP] Last merge (#15)
* correct logsoftmax looss (#2)
* Small SameDiff listener fix (#4)
* Various fixes (#6)
* #7839 Fix for asXMatrix and tests
* #7866 EmbeddingSequenceLayer dtype fix + test
* #7856 SameDiff save/load stream methods
* #7859 RegressionEvaluation rank 4 fix + tests + axis configuration
* EvaluationBinary 3d/4d
* More evaluation 3d/4d tests
* #7847 Evaluation empty checks
* Small test ifx
* #7848 Fix median edge case
* Improve DL4J samediff layer tests
* [WIP] FastText wrapper implemented (#8)
* FastText implemented
* Some fixes
* Fix shapes for wordsNearest
* Validation of input vectors
* Fixes
* Fixed test
* Thread tagged
* Some tweaks
* setContextClassLoader for DeallocatorServiceThread
* Numpy format tests (#1)
* Various fixes (#11)
* #7852 SameDiff gather fix
* #7892 SameDiff placeholder to constant conversion
* #7890 validate input rank for MLN/CG init methods
* Fix broken permute shape calculation
* Permute and gather fixes
* Tests
* #7850 LogSumExp fix + test
* Handful of test fixes
* Empty arrays with non-scalar shapes (#10)
* minor rearrangements for lambdas
* empty tensors with non-scalar shapes
* numpy empty tensors with non-scalar shapes
* few more empty tweaks
* Small fixes
* conv3d signature update
* micro fix in batchnorm mkldnn
* Import fixes
* Fix
* MKL-DNN update
* Small fill fix
* fill with empty input + test
* Fixes
* Small error improvement
* Fix
* one special test
* couple of fixes for lstm
* Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone
* Fixes
* FP16
* Unsigned
* BFloat16
* Fill op - empty tweaks
* - couple of fixes for empty arrays construction
- stack updated
* strided slice fix
* one transform test
* provide method for reducing shapeInfo in case of input array is empty
* Fixed reduceAlongDimensions to use empty input properly.
* couple of broadcast tests
* couple of tests broadcast tests + tweak to make them pass
* add check of non-empty to methods producing sub-arrays
* Fixed reshapeC with zeros in shape.
* complete empty check in reduce_... legacy ops
* Concat and cumsum/prod
* Tweak to empty shape inference on import
* add empty check to the rest of reduce legacy ops
* one more test
* correct typo in evalReduceShapeInfoEmpty
* Added tests for reduce_* ops to tests with zero shapes.
* few more tests for empty reductions
* Fixed strided_slice op with empty case and tests.
* one more empty reduction test
* Fixed strided_slice test.
* add empty check to NDArray::reshapei
* infOrMax
* empty min/max with infinity tests
* made unstack working correctly with empty arrays
* few IndexReduce tests + tweaks for empty shapes
* add test for empty concat
* few tests fixed
* Validation fix for reductions on empty shapes
* Reverse fix
* Reduction shape calc fixes
* SameDiff.generateOutputVariable: don't use shape function to determine number of outputs
* Range fix
* - NDArray constructor updated for scalars/empty arrays
- few tests fixed
* More fixes
* Empty creator fixes
* concat fix
* concat fix
* TF import tests: allow 'both all NaN' and 'both all inf' to pass
* Slice, zero fraction, and reshape fixes
* transpose, gather
* Zero fraction
* scalar cast fix
* Empty reduction axis support
* few more tests fixed
* Fixed input checks conforming with TF for concat op and tests.
* few tests fixed
* matmul scalar shape fix
* Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats.
* broadcast bool fix
* few more tests
* few more tests
* correct evalReduceShapeInfoEmpty
* argmax/argmin + tests
* one more empty edge case + one more test
* argmax/argmin/realdiv_bp tweaks
* empty reshape test + fix
* Helper fixes
* Small fixes
* Gather test fix
* Gather test fix
* Small fixes
* reduce scalar zero values
* scalar mean workaround
* Remove debug code
* along dim mean workaround
* one more test
* - equalsTo() tweak for empty arrays
- one more test
* broadcast tweaks
* [WIP] Fixing outstanding issues for NLP (#9)
* Avoid using not-inited objects
* Test fixed.
* Redundant method avoided for models like FastText
* KMeans++ implementation
* KMeans++ implementation
* Disable parallel execution
* KMeans++
* Tests
* Dev branch merge (#16)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Fix some issues on master (#17)
* Fix DataVec test issue
* Fix issue with dl4j SameDiff output layer
* Dtype fix for lambda layers
* #7912 BertIterator dtype fix (use float32 not global default)
* [WIP] Next set of CUDA stuff (#7)
New CUDA implementations and improvements
* bad file
* Dev branch master merge (#23)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Compatibility of deserialization (#18)
Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
* SameDiff: add activation gradient checking support for debugging (#19)
* SameDiff gradient checker: first pass on activation gradient checks
* Fixes + tests for activation gradient checking
* Javadoc
* [WIP] Some nd4j data type corrections (#20)
* Adjust data type
* Set correct Data type.
* Size of proper data type.
* fix averaged cpu load (#22)
* SameDiff ops, TF import and fixes (#24)
* CheckNumerics tests + fixes + misc fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fake quant
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* FakeQuantWithMinMaxArgs
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* CheckNumerics fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Small fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Javadoc
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Exception tweak
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix for out of scope stack allocated var use
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Ignores
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Ignore for known failing test (already logged issue)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Merge upstream to fork (#25)
* Add thousand-separator commas to TotalParams (#7915)
* Add thousand-separator commas to TotalParams
The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them.
* Add thousand-separator commas to MultiLayerNetwork
Corresponding change to MultiLayerNetwork
Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com>
* Update contributing and issue/PR templates (#7934)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix link to AdaDelta paper (#7942)
Fix link to AdaDelta paper hosted on matthewzeiler.com
Signed-off-by: Jxtps
* Fixes, and ignores for known/logged failing issues (#7943)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* SameDiff + DL4J/SameDiff: Multiple fixes (#28)
* #7919 HDF5 attribute buffer length fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7909 Arbiter constructor exception ux improvements
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7925 RNN output layer length checks
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7939 Add listener for validating inputs are not incorrectly modified
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7939 Integrate NonInplaceValidationListener into tests
* #7844 DL4J SameDiff fixes for variable minibatch size
* DL4J SameDiff fixes - ensure gradient for input placeholder is available
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Tweaks to ExternalErrorsFunction - use placeholders, make more robust
* Another fix
* More fixes
* More SameDiff/DL4J fixes
* Scope out scalar array creation in BaseScalarOp
* Remove debug code
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] Final dev branch merge (#29)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Compatibility of deserialization (#18)
Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
* SameDiff: add activation gradient checking support for debugging (#19)
* SameDiff gradient checker: first pass on activation gradient checks
* Fixes + tests for activation gradient checking
* Javadoc
* [WIP] Some nd4j data type corrections (#20)
* Adjust data type
* Set correct Data type.
* Size of proper data type.
* fix averaged cpu load (#22)
* [WIP] Multiple dataset iterators (#27)
* Splitting dataset into arbitrary number
* Fixes
* Multiple split of iterator
* Test
* Test
* Some fixes
* signature change
* one more tweak
Signed-off-by: raver119 <raver119@gmail.com>
* one more test for sequential use of DataSetIteratorSplitter
Signed-off-by: raver119 <raver119@gmail.com>
* Fixes
* Fixes
* one more test for Alexander
Signed-off-by: raver119 <raver119@gmail.com>
* Some fixes
* Some fixes
* one more test for Alexander
Signed-off-by: raver119 <raver119@gmail.com>
* minor test fix
Signed-off-by: raver119 <raver119@gmail.com>
* Some fixes
* Some fixes
* couple of assertions tweaked
Signed-off-by: raver119 <raver119@gmail.com>
* MDS splitter test :/
Signed-off-by: raver119 <raver119@gmail.com>
* Minor refactoring
* Multi dataset
* Some fixes
* More tests
* Small number of test fixes/improvements (failures on CI) (#31)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] More CUDA stuff (#26)
* initial commit
Signed-off-by: raver119 <raver119@gmail.com>
* LRN BP CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* less memory
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed bug with crop_and_resize op helper.
* get rid of unnecessary index-calculation dunction
Signed-off-by: Yurii <yurii@skymind.io>
* Fixed sort with nth_element cuda-based helper.
* Refactored nth_element.
* Refactored nth_element op and tests.
* Modified usage of dim array with sortTad routine.
* Refactored main routine of helper for non_max_image_suppression op.
* non_max_image_suppression op helper with cuda kernel implementation. Initial revision.
* fix vol2col cuda kernel
* meh
Signed-off-by: raver119 <raver119@gmail.com>
* topK concept
Signed-off-by: raver119 <raver119@gmail.com>
* unsorted topK with scanWitdh of 1
Signed-off-by: raver119 <raver119@gmail.com>
* correct vol2col tests
* sorted/unsorted topK
Signed-off-by: raver119 <raver119@gmail.com>
* implementation and fixing col2im/col2vol
* Corrected usage flags with input/output with reverse op.
* dup is const now
Signed-off-by: raver119 <raver119@gmail.com>
* percentile op
Signed-off-by: raver119 <raver119@gmail.com>
* group tests for mapool2d
Signed-off-by: Yurii <yurii@skymind.io>
* special test for george
Signed-off-by: raver119 <raver119@gmail.com>
* less threads for sortTad
Signed-off-by: raver119 <raver119@gmail.com>
* provide conv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* remove auther in sort tad kernel code
Signed-off-by: Yurii <yurii@skymind.io>
* provide depthwise_conv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* - max_pooling_with_argmax
- null check for special use
Signed-off-by: raver119 <raver119@gmail.com>
* dts cuda
Signed-off-by: raver119 <raver119@gmail.com>
* provide sconv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* std cuda
Signed-off-by: raver119 <raver119@gmail.com>
* Refactored non_max_suppression op to conform TF implementation.
* Improved suppression helper.
* provide pooling3d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* minor lstm rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* more of minor lstm rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* (bi)dynamic_rnn
Signed-off-by: raver119 <raver119@gmail.com>
* templates init order
Signed-off-by: raver119 <raver119@gmail.com>
* Refactored non_max_suppression op.
* Added cuda kernel for non_max_suppression.
* CPU sort by key/value
Signed-off-by: raver119 <raver119@gmail.com>
* CPU sort TAD by key/value
Signed-off-by: raver119 <raver119@gmail.com>
* CPU sort TAD by key/value tests
Signed-off-by: raver119 <raver119@gmail.com>
* Eliminate compiler error with cuda implementation.
* - repaired gradCheck in cuda
- provide conv2d_bp for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* missed signature
Signed-off-by: raver119 <raver119@gmail.com>
* provide depthwise_conv2d_bp for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* Implementation of lup helper with cuda kernel. Initial commit.
* further work on backprops for convolutions
Signed-off-by: Yurii <yurii@skymind.io>
* CUDA linear sort by key/val
Signed-off-by: raver119 <raver119@gmail.com>
* CUDA tad sort by key/val
Signed-off-by: raver119 <raver119@gmail.com>
* start providing of backprop for pooling2d/3d
Signed-off-by: Yurii <yurii@skymind.io>
* Added atomicAdd for bool datatype.
* dynamic partition concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic partition concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic partition scalar CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* important comment
Signed-off-by: raver119 <raver119@gmail.com>
* fix pooling2d/3d backprop helpers
Signed-off-by: Yurii <yurii@skymind.io>
* Added non-linear test with dynamic_partition.
* Improved test for dynamic_partition.
* dynamic_partition TAD concept
Signed-off-by: raver119 <raver119@gmail.com>
* - dynamic_partition TAD CUDA impl
- dynamic_partition TAD CPU fix
Signed-off-by: raver119 <raver119@gmail.com>
* - rewrite cpu code for usampling2d/3d
- write cuda code for usampling2d/3d
Signed-off-by: Yurii <yurii@skymind.io>
* dynamic_stitch CUDA vector case
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic_stitch CUDA TAD case concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic_stitch CUDA TAD case impl
Signed-off-by: raver119 <raver119@gmail.com>
* Added tests for dynamic_stitch 3D-4D cases.
* minor tests tweaks
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed type check for dynamic stitch.
* min/max bp
Signed-off-by: raver119 <raver119@gmail.com>
* rewrite code for upsampling2d/3d cpu
Signed-off-by: Yurii <yurii@skymind.io>
* reduce min/max/norm_max bp
Signed-off-by: raver119 <raver119@gmail.com>
* lup implementation. Additional enhancements.
* provide code for upsamling2d/3d backprop
Signed-off-by: Yurii <yurii@skymind.io>
* weightedCrossEntropyWithLogits
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed template math atomicMul for 64bit ints.
* Refactored dynamic_partition_bp op.
* inverseBroadcast fix
Signed-off-by: raver119 <raver119@gmail.com>
* DynamicPartitionBP test datatype fixed.
* - nd4j_atomicMul Windows fix
- cpu/NDArrayLambda.hpp excluded from CUDA
Signed-off-by: raver119 <raver119@gmail.com>
2019-06-27 17:37:04 +02:00
|
|
|
|
2019-06-06 14:21:15 +02:00
|
|
|
|
|
|
|
auto temp3 = _m({_m.sizeAt(0)-1,_m.sizeAt(0), 0,0}, true);
|
|
|
|
temp3.assign(0.);
|
|
|
|
|
|
|
|
DivideAndConquer(0, _diagSize - 1, 0, 0, 0);
|
|
|
|
|
|
|
|
for (int i = 0; i < _diagSize; ++i) {
|
|
|
|
T a = math::nd4j_abs<T>(_m.e<T>(i, i));
|
|
|
|
_s.p(i, a * scale);
|
|
|
|
if (a < almostZero) {
|
|
|
|
auto temp = _s({i+1,_diagSize, 0,0}, true);
|
|
|
|
temp.assign(0.);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if (i == _diagSize-1)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(_transp)
|
|
|
|
exchangeUV(biDiag.makeHHsequence('v'), biDiag.makeHHsequence('u'), _v, _u);
|
|
|
|
else
|
|
|
|
exchangeUV(biDiag.makeHHsequence('u'), biDiag.makeHHsequence('v'), _u, _v);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT SVD,,FLOAT_TYPES);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|