/* ****************************************************************************** * * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at * https://www.apache.org/licenses/LICENSE-2.0. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. * * SPDX-License-Identifier: Apache-2.0 ******************************************************************************/ // // @author Yurii Shyrma (iuriish@yahoo.com) // #include #include #include #include namespace sd { namespace ops { namespace helpers { ////////////////////////////////////////////////////////////////////////// template Hessenberg::Hessenberg(const NDArray& matrix) { if(matrix.rankOf() != 2) throw std::runtime_error("ops::helpers::Hessenberg constructor: input matrix must be 2D !"); if(matrix.sizeAt(0) == 1) { _Q = NDArray(matrix.ordering(), {1,1}, matrix.dataType(), matrix.getContext()); _Q = 1; _H = matrix.dup(); return; } if(matrix.sizeAt(0) != matrix.sizeAt(1)) throw std::runtime_error("ops::helpers::Hessenberg constructor: input array must be 2D square matrix !"); _H = matrix.dup(); _Q = matrix.ulike(); evalData(); } ////////////////////////////////////////////////////////////////////////// template void Hessenberg::evalData() { const int rows = _H.sizeAt(0); NDArray hhCoeffs(_H.ordering(), {rows - 1}, _H.dataType(), _H.getContext()); // calculate _H for(uint i = 0; i < rows - 1; ++i) { T coeff, norm; NDArray tail1 = _H({i+1,-1, i,i+1}); NDArray tail2 = _H({i+2,-1, i,i+1}, true); Householder::evalHHmatrixDataI(tail1, coeff, norm); _H({0,0, i,i+1}). template r(i+1) = norm; hhCoeffs. template r(i) = coeff; NDArray bottomRightCorner = _H({i+1,-1, i+1,-1}, true); Householder::mulLeft(bottomRightCorner, tail2, coeff); NDArray rightCols = _H({0,0, i+1,-1}, true); Householder::mulRight(rightCols, tail2.transpose(), coeff); } // calculate _Q HHsequence hhSeq(_H, hhCoeffs, 'u'); hhSeq._diagSize = rows - 1; hhSeq._shift = 1; hhSeq.applyTo_(_Q); // fill down with zeros starting at first subdiagonal _H.fillAsTriangular(0, -1, 0, _H, 'l'); } ////////////////////////////////////////////////////////////////////////// template Schur::Schur(const NDArray& matrix) { if(matrix.rankOf() != 2) throw std::runtime_error("ops::helpers::Schur constructor: input matrix must be 2D !"); if(matrix.sizeAt(0) != matrix.sizeAt(1)) throw std::runtime_error("ops::helpers::Schur constructor: input array must be 2D square matrix !"); evalData(matrix); } ////////////////////////////////////////////////////////////////////////// template void Schur::evalData(const NDArray& matrix) { const T scale = matrix.reduceNumber(reduce::AMax).template t(0); const T almostZero = DataTypeUtils::min(); if(scale < DataTypeUtils::min()) { _T = matrix.ulike(); _U = matrix.ulike(); _T.nullify(); _U.setIdentity(); return; } // perform Hessenberg decomposition Hessenberg hess(matrix / scale); _T = std::move(hess._H); _U = std::move(hess._Q); calcFromHessenberg(); _T *= scale; } ////////////////////////////////////////////////////////////////////////// template void Schur::splitTwoRows(const int ind, const T shift) { const int numCols = _T.sizeAt(1); T p = (T)0.5 * (_T.t(ind-1, ind-1) - _T.t(ind, ind)); T q = p*p + _T.t(ind, ind-1) * _T.t(ind-1, ind); _T.r(ind, ind) += shift; _T.r(ind-1, ind-1) += shift; if (q >= (T)0) { T z = math::nd4j_sqrt(math::nd4j_abs(q)); NDArray rotation(_T.ordering(), {2, 2}, _T.dataType(), _T.getContext()); if (p >= (T)0) JacobiSVD::createJacobiRotationGivens(p+z, _T.t(ind, ind-1), rotation); else JacobiSVD::createJacobiRotationGivens(p-z, _T.t(ind, ind-1), rotation); NDArray rightCols = _T({0,0, ind-1,-1}); JacobiSVD::mulRotationOnLeft(ind-1, ind, rightCols, rotation.transpose()); NDArray topRows = _T({0,ind+1, 0,0}); JacobiSVD::mulRotationOnRight(ind-1, ind, topRows, rotation); JacobiSVD::mulRotationOnRight(ind-1, ind, _U, rotation); _T.r(ind, ind-1) = (T)0; } if (ind > 1) _T.r(ind-1, ind-2) = (T)0; } ////////////////////////////////////////////////////////////////////////// template void Schur::calcShift(const int ind, const int iter, T& shift, NDArray& shiftVec) { // shiftVec has length = 3 shiftVec.r(0) = _T.t(ind, ind); shiftVec.r(1) = _T.t(ind-1, ind-1); shiftVec.r(2) = _T.t(ind, ind-1) * _T.t(ind-1, ind); if (iter == 10) { shift += shiftVec.t(0); for (int i = 0; i <= ind; ++i) _T.r(i,i) -= shiftVec.t(0); T s = math::nd4j_abs(_T.t(ind, ind-1)) + math::nd4j_abs(_T.t(ind-1, ind-2)); shiftVec.r(0) = T(0.75) * s; shiftVec.r(1) = T(0.75) * s; shiftVec.r(2) = T(-0.4375) * s*s; } if (iter == 30) { T s = (shiftVec.t(1) - shiftVec.t(0)) / T(2.0); s = s*s + shiftVec.t(2); if (s > T(0)) { s = math::nd4j_sqrt(s); if (shiftVec.t(1) < shiftVec.t(0)) s = -s; s = s + (shiftVec.t(1) - shiftVec.t(0)) / T(2.0); s = shiftVec.t(0) - shiftVec.t(2) / s; shift += s; for (int i = 0; i <= ind; ++i) _T.r(i,i) -= s; shiftVec = T(0.964); } } } ////////////////////////////////////////////////////////////////////////// template void Schur::initFrancisQR(const int ind1, const int ind2, const NDArray& shiftVec, int& ind3, NDArray& householderVec) { // shiftVec has length = 3 for (ind3 = ind2-2; ind3 >= ind1; --ind3) { const T mm = _T.t(ind3, ind3); const T r = shiftVec.t(0) - mm; const T s = shiftVec.t(1) - mm; householderVec.r(0) = (r * s - shiftVec.t(2)) / _T.t(ind3+1, ind3) + _T.t(ind3, ind3+1); householderVec.r(1) = _T.t(ind3+1, ind3+1) - mm - r - s; householderVec.r(2) = _T.t(ind3+2, ind3+1); if (ind3 == ind1) break; const T lhs = _T.t(ind3,ind3-1) * (math::nd4j_abs(householderVec.t(1)) + math::nd4j_abs(householderVec.t(2))); const T rhs = householderVec.t(0) * (math::nd4j_abs(_T.t(ind3-1, ind3-1)) + math::nd4j_abs(mm) + math::nd4j_abs(_T.t(ind3+1, ind3+1))); if(math::nd4j_abs(lhs) < DataTypeUtils::eps() * rhs) break; } } ////////////////////////////////////////////////////////////////////////// template void Schur::doFrancisQR(const int ind1, const int ind2, const int ind3, const NDArray& householderVec) { if(!(ind2 >= ind1)) throw std::runtime_error("ops::helpers::Schur:doFrancisQR: wrong input indexes, condition ind2 >= ind1 must be true !"); if(!(ind2 <= ind3-2)) throw std::runtime_error("ops::helpers::Schur:doFrancisQR: wrong input indexes, condition iind2 <= ind3-2 must be true !"); const int numCols = _T.sizeAt(1); for (int k = ind2; k <= ind3-2; ++k) { const bool firstIter = (k == ind2); T coeff, normX; NDArray tail(_T.ordering(), {2, 1}, _T.dataType(), _T.getContext()); Householder::evalHHmatrixData(firstIter ? householderVec : _T({k,k+3, k-1,k}), tail, coeff, normX); if (normX != T(0)) { if (firstIter && k > ind1) _T.r(k, k-1) = -_T.t(k, k-1); else if (!firstIter) _T.r(k, k-1) = normX; NDArray block1 = _T({k,k+3, k,numCols}, true); Householder::mulLeft(block1, tail, coeff); NDArray block2 = _T({0,math::nd4j_min(ind3,k+3)+1, k,k+3}, true); Householder::mulRight(block2, tail, coeff); NDArray block3 = _U({0,numCols, k,k+3}, true); Householder::mulRight(block3, tail, coeff); } } T coeff, normX; NDArray tail(_T.ordering(), {1, 1}, _T.dataType(), _T.getContext()); Householder::evalHHmatrixData(_T({ind3-1,ind3+1, ind3-2,ind3-1}), tail, coeff, normX); if (normX != T(0)) { _T.r(ind3-1, ind3-2) = normX; NDArray block1 = _T({ind3-1,ind3+1, ind3-1,numCols}, true); Householder::mulLeft(block1, tail, coeff); NDArray block2 = _T({0,ind3+1, ind3-1,ind3+1}, true); Householder::mulRight(block2, tail, coeff); NDArray block3 = _U({0,numCols, ind3-1,ind3+1}, true); Householder::mulRight(block3, tail, coeff); } for (int i = ind2+2; i <= ind3; ++i) { _T.r(i, i-2) = T(0); if (i > ind2+2) _T.r(i, i-3) = T(0); } } ////////////////////////////////////////////////////////////////////////// template void Schur::calcFromHessenberg() { const int maxIters = _maxItersPerRow * _T.sizeAt(0); const int numCols = _T.sizeAt(1); int iu = numCols - 1; int iter = 0; int totalIter = 0; T shift = T(0); T norm = 0; for (int j = 0; j < numCols; ++j) norm += _T({0,math::nd4j_min(numCols,j+2), j,j+1}).reduceNumber(reduce::ASum).template t(0); if(norm != T(0)) { while (iu >= 0) { const int il = getSmallSubdiagEntry(iu); if (il == iu) { _T.r(iu,iu) = _T.t(iu,iu) + shift; if (iu > 0) _T.r(iu, iu-1) = T(0); iu--; iter = 0; } else if (il == iu-1) { splitTwoRows(iu, shift); iu -= 2; iter = 0; } else { NDArray householderVec(_T.ordering(), {3}, _T.dataType(), _T.getContext()); NDArray shiftVec (_T.ordering(), {3}, _T.dataType(), _T.getContext()); calcShift(iu, iter, shift, shiftVec); ++iter; ++totalIter; if (totalIter > maxIters) break; int im; initFrancisQR(il, iu, shiftVec, im, householderVec); doFrancisQR(il, im, iu, householderVec); } } } } template class ND4J_EXPORT Hessenberg; template class ND4J_EXPORT Hessenberg; template class ND4J_EXPORT Hessenberg; template class ND4J_EXPORT Hessenberg; template class ND4J_EXPORT Schur; template class ND4J_EXPORT Schur; template class ND4J_EXPORT Schur; template class ND4J_EXPORT Schur; } } }