--- a/Eigen/src/Eigenvalues/Tridiagonalization.h +++ a/Eigen/src/Eigenvalues/Tridiagonalization.h @@ -21,17 +21,21 @@ struct traits { typedef typename MatrixType::PlainObject ReturnType; // FIXME shall it be a BandMatrix? enum { Flags = 0 }; }; template EIGEN_DEVICE_FUNC -void tridiagonalization_inplace(MatrixType& matA, CoeffVectorType& hCoeffs); +void tridiagonalization_inplace(MatrixType& matA, CoeffVectorType& hCoeffs, typename MatrixType::Scalar* workspace); + +EIGEN_DEVICE_FUNC +Index tridiagonalization_inplace_blocked_workspace_size(Index N, Index r); + } /** \eigenvalues_module \ingroup Eigenvalues_Module * * * \class Tridiagonalization * * \brief Tridiagonal decomposition of a selfadjoint matrix @@ -72,24 +76,27 @@ template class Tri typedef typename NumTraits::Real RealScalar; typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 enum { Size = MatrixType::RowsAtCompileTime, SizeMinusOne = Size == Dynamic ? Dynamic : (Size > 1 ? Size - 1 : 1), Options = MatrixType::Options, MaxSize = MatrixType::MaxRowsAtCompileTime, - MaxSizeMinusOne = MaxSize == Dynamic ? Dynamic : (MaxSize > 1 ? MaxSize - 1 : 1) + MaxSizeMinusOne = MaxSize == Dynamic ? Dynamic : (MaxSize > 1 ? MaxSize - 1 : 1), + BlockThreshold = 300, + BlockSize = 60 }; typedef Matrix CoeffVectorType; typedef typename internal::plain_col_type::type DiagonalType; typedef Matrix SubDiagonalType; typedef typename internal::remove_all::type MatrixTypeRealView; typedef internal::TridiagonalizationMatrixTReturnType MatrixTReturnType; + typedef Matrix WorkspaceType; typedef typename internal::conditional::IsComplex, typename internal::add_const_on_value_type::RealReturnType>::type, const Diagonal >::type DiagonalReturnType; typedef typename internal::conditional::IsComplex, typename internal::add_const_on_value_type::RealReturnType>::type, @@ -126,19 +133,20 @@ template class Tri * * Example: \include Tridiagonalization_Tridiagonalization_MatrixType.cpp * Output: \verbinclude Tridiagonalization_Tridiagonalization_MatrixType.out */ template explicit Tridiagonalization(const EigenBase& matrix) : m_matrix(matrix.derived()), m_hCoeffs(matrix.cols() > 1 ? matrix.cols()-1 : 1), + m_workspace(matrix.cols() > BlockThreshold ? internal::tridiagonalization_inplace_blocked_workspace_size(matrix.cols(), BlockSize) : 0), m_isInitialized(false) { - internal::tridiagonalization_inplace(m_matrix, m_hCoeffs); + internal::tridiagonalization_inplace(m_matrix, m_hCoeffs, m_workspace.data()); m_isInitialized = true; } /** \brief Computes tridiagonal decomposition of given matrix. * * \param[in] matrix Selfadjoint matrix whose tridiagonal decomposition * is to be computed. * \returns Reference to \c *this @@ -154,17 +162,20 @@ template class Tri * Example: \include Tridiagonalization_compute.cpp * Output: \verbinclude Tridiagonalization_compute.out */ template Tridiagonalization& compute(const EigenBase& matrix) { m_matrix = matrix.derived(); m_hCoeffs.resize(matrix.rows()-1, 1); - internal::tridiagonalization_inplace(m_matrix, m_hCoeffs); + if(matrix.cols() > BlockThreshold){ + m_workspace.resize(internal::tridiagonalization_inplace_blocked_workspace_size(matrix.cols(), BlockSize)); + } + internal::tridiagonalization_inplace(m_matrix, m_hCoeffs, m_workspace.data()); m_isInitialized = true; return *this; } /** \brief Returns the Householder coefficients. * * \returns a const reference to the vector of Householder coefficients * @@ -295,16 +306,17 @@ template class Tri * \sa diagonal() for an example, matrixT() */ SubDiagonalReturnType subDiagonal() const; protected: MatrixType m_matrix; CoeffVectorType m_hCoeffs; + WorkspaceType m_workspace; bool m_isInitialized; }; template typename Tridiagonalization::DiagonalReturnType Tridiagonalization::diagonal() const { eigen_assert(m_isInitialized && "Tridiagonalization is not initialized."); @@ -315,17 +327,16 @@ template typename Tridiagonalization::SubDiagonalReturnType Tridiagonalization::subDiagonal() const { eigen_assert(m_isInitialized && "Tridiagonalization is not initialized."); return m_matrix.template diagonal<-1>().real(); } namespace internal { - /** \internal * Performs a tridiagonal decomposition of the selfadjoint matrix \a matA in-place. * * \param[in,out] matA On input the selfadjoint matrix. Only the \b lower triangular part is referenced. * On output, the strict upper part is left unchanged, and the lower triangular part * represents the T and Q matrices in packed format has detailed below. * \param[out] hCoeffs returned Householder coefficients (see below) * @@ -341,17 +352,17 @@ namespace internal { * \f$ v_i = [ 0, \ldots, 0, 1, matA(i+2,i), \ldots, matA(N-1,i) ]^T \f$. * * Implemented from Golub's "Matrix Computations", algorithm 8.3.1. * * \sa Tridiagonalization::packedMatrix() */ template EIGEN_DEVICE_FUNC -void tridiagonalization_inplace(MatrixType& matA, CoeffVectorType& hCoeffs) +void tridiagonalization_inplace_unblocked(MatrixType& matA, CoeffVectorType& hCoeffs) { using numext::conj; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; Index n = matA.rows(); eigen_assert(n==matA.cols()); eigen_assert(n==hCoeffs.size()+1 || n==1); @@ -374,16 +385,115 @@ void tridiagonalization_inplace(MatrixTy matA.bottomRightCorner(remainingSize, remainingSize).template selfadjointView() .rankUpdate(matA.col(i).tail(remainingSize), hCoeffs.tail(remainingSize), Scalar(-1)); matA.col(i).coeffRef(i+1) = beta; hCoeffs.coeffRef(i) = h; } } +/** + * Calculates required workspace size for block householder tridiagonalization + * of a matrix of size n * n, by using block size of r. + * @param N matrix dimenstion + * @param r block size + * @return minimum workspace size + */ +EIGEN_DEVICE_FUNC +Index tridiagonalization_inplace_blocked_workspace_size(Index N, Index r){ + Index first_actual_r = std::min({r, static_cast(N - 1)}); + Index V_size = (N - 1) * r; + Index partial_update_size = (N - first_actual_r) * (N - first_actual_r); + return V_size + partial_update_size; +} + +/** + * Tridiagonalize a selfadjoint matrix using block Housholder algorithm. + * A = Q * T * Q^T, where T is tridiagonal and Q is unitary. + * @param[in,out] packed On input the input matrix. On output packed form of the + * tridiagonal matrix. Elements of the resulting selfadjoint tridiagonal matrix + * T are in the diagonal and first subdiagonal, which contains subdiagonal + * entries of T. Columns bellow diagonal contain householder vectors that can be + * used to construct unitary matrix Q. + * @param[out] hCoeffs householder coefficients + * @param workspace Workspace. Minimal required size can be determined by + * calling `tridiagonalization_inplace_blocked_workspace_size()` + * @param r Block size. Affects only performance of the algorithm. Optimal value + * depends on the size of A and cache of the processor. For larger matrices or + * larger cache sizes a larger value is optimal. + */ +template +EIGEN_DEVICE_FUNC +void tridiagonalization_inplace_blocked( + MatrixType& packed, + CoeffVectorType& hCoeffs, + typename MatrixType::Scalar* workspace, const Index r) { + using Scalar = typename MatrixType::Scalar; + using Real = typename NumTraits::Real; + using MapType = typename Matrix::MapType; + using MapTypeVec = typename Matrix::MapType; + Index N = packed.rows(); + for (Index k = 0; k < N - 1; k += r) { + const Index actual_r = std::min({r, static_cast(N - k - 1)}); + MapType V(workspace, N - k - 1, actual_r); + for (Index j = 0; j < actual_r; j++) { + if (j != 0) { + Index householder_whole_size = N - k - j; + Scalar temp = packed(k + j, k + j - 1); + packed(k + j, k + j - 1) = Scalar(1); + packed.col(k + j).tail(householder_whole_size) -= packed.block(k + j, k, householder_whole_size, j) * V.block(j - 1, 0, 1, j).adjoint(); + packed.col(k + j).tail(householder_whole_size) -= V.block(j - 1, 0, householder_whole_size, j) * packed.block(k + j, k, 1, j).adjoint(); + packed(k + j, k + j - 1) = temp; + } + typename Matrix::ColXpr::SegmentReturnType householder = packed.col(k + j).tail(N- k - j - 1); + Scalar tau; + Real beta; + householder.makeHouseholderInPlace(tau, beta); + householder[0] = Scalar(1); + + typename MapType::ColXpr::SegmentReturnType v = V.col(j).tail(householder.size()); + v.noalias() = packed.bottomRightCorner(N - k - j - 1,N - k - j - 1) + .template selfadjointView() * householder * numext::conj(tau); + MapTypeVec tmp(workspace + V.size(), j); + + //Reminder of packed is not transformed by current block yet - v needs some fixes + tmp.noalias() = V.bottomLeftCorner(householder.size(), j).adjoint() * householder * numext::conj(tau); + v.noalias() -= packed.block(k + j + 1, k, householder.size(), j) * tmp; + tmp.noalias() = packed.block(k + j + 1, k, householder.size(), j).adjoint() * householder * numext::conj(tau); + v.noalias() -= V.bottomLeftCorner(householder.size(), j) * tmp; + + const Scalar cnst = (v.adjoint() * householder)[0]; + v -= Real(0.5) * numext::conj(tau) * cnst * householder; + + //store householder transformation scaling and subdiagonal of T + hCoeffs(k + j) = tau; + packed(k + j + 1, k + j) = beta; + } + //update reminder of packed with the last block + MapType partial_update(workspace + V.size(), V.rows() - actual_r + 1,V.rows() - actual_r + 1); + Scalar tmp = packed(k + actual_r, k+actual_r-1); + packed(k + actual_r, k+actual_r-1) = Scalar(1); + partial_update.noalias() = packed.block(k + actual_r, k, N - k - actual_r, actual_r) * V.bottomRows(V.rows() - actual_r + 1).adjoint(); + packed(k + actual_r, k+actual_r-1) = tmp; + packed.block(k + actual_r, k + actual_r, N - k - actual_r, N - k - actual_r).template triangularView() + -= partial_update + partial_update.adjoint(); + } +} + +template +EIGEN_DEVICE_FUNC +void tridiagonalization_inplace(MatrixType& matA, CoeffVectorType& hCoeffs, typename MatrixType::Scalar* workspace) +{ + if(matA.rows() > Tridiagonalization::BlockThreshold) + tridiagonalization_inplace_blocked(matA, hCoeffs, workspace, Tridiagonalization::BlockSize); + else { + tridiagonalization_inplace_unblocked(matA, hCoeffs); + } +} + // forward declaration, implementation at the end of this file template::IsComplex> struct tridiagonalization_inplace_selector; /** \brief Performs a full tridiagonalization in place * @@ -436,22 +546,29 @@ void tridiagonalization_inplace(MatrixTy /** \internal * General full tridiagonalization */ template struct tridiagonalization_inplace_selector { typedef typename Tridiagonalization::CoeffVectorType CoeffVectorType; typedef typename Tridiagonalization::HouseholderSequenceType HouseholderSequenceType; + typedef typename Tridiagonalization::WorkspaceType WorkspaceType; template static EIGEN_DEVICE_FUNC void run(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ) { CoeffVectorType hCoeffs(mat.cols()-1); - tridiagonalization_inplace(mat,hCoeffs); + if(mat.cols()>Tridiagonalization::BlockThreshold){ + WorkspaceType workspace(tridiagonalization_inplace_blocked_workspace_size(mat.cols(), Tridiagonalization::BlockSize)); + tridiagonalization_inplace_blocked(mat, hCoeffs, workspace.data()); + } + else { + tridiagonalization_inplace_unblocked(mat, hCoeffs); + } diag = mat.diagonal().real(); subdiag = mat.template diagonal<-1>().real(); if(extractQ) mat = HouseholderSequenceType(mat, hCoeffs.conjugate()) .setLength(mat.rows() - 1) .setShift(1); } };