From a2087cd7a3674c3d3ef74a474e417a3ea1f1e82b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 11 Jul 2009 21:14:59 +0200 Subject: [PATCH] Add an efficient rank2 update function (like the level2 blas xSYR2 routine). Note that it is already used in Tridiagonalization. --- Eigen/Core | 1 + Eigen/src/Core/Product.h | 58 +++++------ Eigen/src/Core/SelfAdjointView.h | 10 ++ Eigen/src/Core/SolveTriangular.h | 17 ++-- Eigen/src/Core/products/GeneralMatrixMatrix.h | 2 +- Eigen/src/Core/products/GeneralMatrixVector.h | 2 +- .../Core/products/SelfadjointMatrixVector.h | 2 +- .../Core/products/SelfadjointRank2Update.h | 96 +++++++++++++++++++ Eigen/src/QR/Tridiagonalization.h | 6 +- test/eigensolver_selfadjoint.cpp | 4 +- test/product_selfadjoint.cpp | 40 ++++++-- 11 files changed, 187 insertions(+), 51 deletions(-) create mode 100644 Eigen/src/Core/products/SelfadjointRank2Update.h diff --git a/Eigen/Core b/Eigen/Core index 89b18d201..18e6a6045 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -180,6 +180,7 @@ namespace Eigen { #include "src/Core/TriangularMatrix.h" #include "src/Core/SelfAdjointView.h" #include "src/Core/SolveTriangular.h" +#include "src/Core/products/SelfadjointRank2Update.h" } // namespace Eigen diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index bd99bfee8..44e3f606e 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -76,7 +76,7 @@ struct ProductReturnType /* Helper class to analyze the factors of a Product expression. * In particular it allows to pop out operator-, scalar multiples, * and conjugate */ -template struct ei_product_factor_traits +template struct ei_blas_traits { typedef typename ei_traits::Scalar Scalar; typedef XprType ActualXprType; @@ -85,15 +85,19 @@ template struct ei_product_factor_traits NeedToConjugate = false, ActualAccess = int(ei_traits::Flags)&DirectAccessBit ? HasDirectAccess : NoDirectAccess }; + typedef typename ei_meta_if::ret DirectLinearAccessType; static inline const ActualXprType& extract(const XprType& x) { return x; } static inline Scalar extractScalarFactor(const XprType&) { return Scalar(1); } }; // pop conjugate -template struct ei_product_factor_traits, NestedXpr> > - : ei_product_factor_traits +template struct ei_blas_traits, NestedXpr> > + : ei_blas_traits { - typedef ei_product_factor_traits Base; + typedef ei_blas_traits Base; typedef CwiseUnaryOp, NestedXpr> XprType; typedef typename Base::ActualXprType ActualXprType; @@ -106,10 +110,10 @@ template struct ei_product_factor_traits struct ei_product_factor_traits, NestedXpr> > - : ei_product_factor_traits +template struct ei_blas_traits, NestedXpr> > + : ei_blas_traits { - typedef ei_product_factor_traits Base; + typedef ei_blas_traits Base; typedef CwiseUnaryOp, NestedXpr> XprType; typedef typename Base::ActualXprType ActualXprType; static inline const ActualXprType& extract(const XprType& x) { return Base::extract(x._expression()); } @@ -118,10 +122,10 @@ template struct ei_product_factor_traits struct ei_product_factor_traits, NestedXpr> > - : ei_product_factor_traits +template struct ei_blas_traits, NestedXpr> > + : ei_blas_traits { - typedef ei_product_factor_traits Base; + typedef ei_blas_traits Base; typedef CwiseUnaryOp, NestedXpr> XprType; typedef typename Base::ActualXprType ActualXprType; static inline const ActualXprType& extract(const XprType& x) { return Base::extract(x._expression()); } @@ -130,11 +134,11 @@ template struct ei_product_factor_traits struct ei_product_factor_traits > - : ei_product_factor_traits +template struct ei_blas_traits > + : ei_blas_traits { typedef typename NestedXpr::Scalar Scalar; - typedef ei_product_factor_traits Base; + typedef ei_blas_traits Base; typedef NestByValue XprType; typedef typename Base::ActualXprType ActualXprType; static inline const ActualXprType& extract(const XprType& x) { return Base::extract(static_cast(x)); } @@ -148,8 +152,8 @@ template struct ei_product_factor_traits struct ei_product_mode { - typedef typename ei_product_factor_traits::ActualXprType ActualLhs; - typedef typename ei_product_factor_traits::ActualXprType ActualRhs; + typedef typename ei_blas_traits::ActualXprType ActualLhs; + typedef typename ei_blas_traits::ActualXprType ActualRhs; enum{ value = Lhs::MaxColsAtCompileTime == Dynamic @@ -600,10 +604,10 @@ static void ei_cache_friendly_product_rowmajor_times_vector( template::RowsAtCompileTime, int LhsOrder = int(ei_traits::LhsFlags)&RowMajorBit ? RowMajor : ColMajor, - int LhsHasDirectAccess = ei_product_factor_traits::_LhsNested>::ActualAccess, + int LhsHasDirectAccess = ei_blas_traits::_LhsNested>::ActualAccess, int RhsCols = ei_traits::ColsAtCompileTime, int RhsOrder = int(ei_traits::RhsFlags)&RowMajorBit ? RowMajor : ColMajor, - int RhsHasDirectAccess = ei_product_factor_traits::_RhsNested>::ActualAccess> + int RhsHasDirectAccess = ei_blas_traits::_RhsNested>::ActualAccess> struct ei_cache_friendly_product_selector { template @@ -633,8 +637,8 @@ template struct ei_cache_friendly_product_selector { typedef typename ProductType::Scalar Scalar; - typedef ei_product_factor_traits::_LhsNested> LhsProductTraits; - typedef ei_product_factor_traits::_RhsNested> RhsProductTraits; + typedef ei_blas_traits::_LhsNested> LhsProductTraits; + typedef ei_blas_traits::_RhsNested> RhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; typedef typename RhsProductTraits::ActualXprType ActualRhsType; @@ -694,8 +698,8 @@ template struct ei_cache_friendly_product_selector { typedef typename ProductType::Scalar Scalar; - typedef ei_product_factor_traits::_LhsNested> LhsProductTraits; - typedef ei_product_factor_traits::_RhsNested> RhsProductTraits; + typedef ei_blas_traits::_LhsNested> LhsProductTraits; + typedef ei_blas_traits::_RhsNested> RhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; typedef typename RhsProductTraits::ActualXprType ActualRhsType; @@ -740,8 +744,8 @@ struct ei_cache_friendly_product_selector::_LhsNested> LhsProductTraits; - typedef ei_product_factor_traits::_RhsNested> RhsProductTraits; + typedef ei_blas_traits::_LhsNested> LhsProductTraits; + typedef ei_blas_traits::_RhsNested> RhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; typedef typename RhsProductTraits::ActualXprType ActualRhsType; @@ -783,8 +787,8 @@ struct ei_cache_friendly_product_selector::_LhsNested> LhsProductTraits; - typedef ei_product_factor_traits::_RhsNested> RhsProductTraits; + typedef ei_blas_traits::_LhsNested> LhsProductTraits; + typedef ei_blas_traits::_RhsNested> RhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; typedef typename RhsProductTraits::ActualXprType ActualRhsType; @@ -903,8 +907,8 @@ template template inline void Product::_cacheFriendlyEvalAndAdd(DestDerived& res, Scalar alpha) const { - typedef ei_product_factor_traits<_LhsNested> LhsProductTraits; - typedef ei_product_factor_traits<_RhsNested> RhsProductTraits; + typedef ei_blas_traits<_LhsNested> LhsProductTraits; + typedef ei_blas_traits<_RhsNested> RhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; typedef typename RhsProductTraits::ActualXprType ActualRhsType; diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h index 2f66cfa45..28f44cbbc 100644 --- a/Eigen/src/Core/SelfAdjointView.h +++ b/Eigen/src/Core/SelfAdjointView.h @@ -106,6 +106,16 @@ template class SelfAdjointView return ei_selfadjoint_vector_product_returntype(*this, rhs.derived()); } + /** Perform a symmetric rank 2 update of the selfadjoint matrix \c *this: + * \f$ this = this + \alpha ( u v^* + v u^*) \f$ + * + * The vectors \a u and \c v \b must be column vectors, however they can be + * a adjoint expression without any overhead. Only the meaningful triangular + * part of the matrix is updated, the rest is left unchanged. + */ + template + void rank2update(const MatrixBase& u, const MatrixBase& v, Scalar alpha = Scalar(1)); + /////////// Cholesky module /////////// const LLT llt() const; diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index 3a65a8b27..200b4a325 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -33,12 +33,12 @@ template struct ei_triangular_solver_selector; -// forward substitution, row-major +// forward and backward substitution, row-major template struct ei_triangular_solver_selector { typedef typename Rhs::Scalar Scalar; - typedef ei_product_factor_traits LhsProductTraits; + typedef ei_blas_traits LhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; enum { IsLowerTriangular = ((Mode&LowerTriangularBit)==LowerTriangularBit) @@ -60,6 +60,9 @@ struct ei_triangular_solver_selector int r = IsLowerTriangular ? pi : size - pi; // remaining size if (r > 0) { + // let's directly call the low level product function because: + // 1 - it is faster to compile + // 2 - it is slighlty faster at runtime int startRow = IsLowerTriangular ? pi : pi-actualPanelWidth; int startCol = IsLowerTriangular ? 0 : pi; Block target(other,startRow,c,actualPanelWidth,1); @@ -86,17 +89,13 @@ struct ei_triangular_solver_selector } }; -// Implements the following configurations: -// - inv(LowerTriangular, ColMajor) * Column vectors -// - inv(LowerTriangular,UnitDiag,ColMajor) * Column vectors -// - inv(UpperTriangular, ColMajor) * Column vectors -// - inv(UpperTriangular,UnitDiag,ColMajor) * Column vectors +// forward and backward substitution, column-major template struct ei_triangular_solver_selector { typedef typename Rhs::Scalar Scalar; typedef typename ei_packet_traits::type Packet; - typedef ei_product_factor_traits LhsProductTraits; + typedef ei_blas_traits LhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; enum { PacketSize = ei_packet_traits::size, @@ -136,7 +135,7 @@ struct ei_triangular_solver_selector int r = IsLowerTriangular ? size - endBlock : startBlock; // remaining size if (r > 0) { - // let's directly call this function because: + // let's directly call the low level product function because: // 1 - it is faster to compile // 2 - it is slighlty faster at runtime ei_cache_friendly_product_colmajor_times_vector( diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index 0036fe390..fe3e877e1 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2008-2009 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index ccaafb8bd..57875035a 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2008-2009 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index fbdeb148f..aa3187a07 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2008-2009 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public diff --git a/Eigen/src/Core/products/SelfadjointRank2Update.h b/Eigen/src/Core/products/SelfadjointRank2Update.h new file mode 100644 index 000000000..edb57ecd5 --- /dev/null +++ b/Eigen/src/Core/products/SelfadjointRank2Update.h @@ -0,0 +1,96 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_SELFADJOINTRANK2UPTADE_H +#define EIGEN_SELFADJOINTRANK2UPTADE_H + +/* Optimized selfadjoint matrix += alpha * uv' + vu' + * It corresponds to the Level2 syr2 BLAS routine + */ + +template +struct ei_selfadjoint_rank2_update_selector; + +template +struct ei_selfadjoint_rank2_update_selector +{ + static void run(Scalar* mat, int stride, const UType& u, const VType& v, Scalar alpha) + { + const int size = u.size(); +// std::cerr << "lower \n" << u.transpose() << "\n" << v.transpose() << "\n\n"; + for (int i=0; i >(mat+stride*i+i, size-i) += + (alpha * ei_conj(u.coeff(i))) * v.end(size-i) + + (alpha * ei_conj(v.coeff(i))) * u.end(size-i); + } + } +}; + +template +struct ei_selfadjoint_rank2_update_selector +{ + static void run(Scalar* mat, int stride, const UType& u, const VType& v, Scalar alpha) + { + const int size = u.size(); + for (int i=0; i >(mat+stride*i, i+1) += + (alpha * ei_conj(u.coeff(i))) * v.start(i+1) + + (alpha * ei_conj(v.coeff(i))) * u.start(i+1); + } +}; + +template struct ei_conj_expr_if + : ei_meta_if::Scalar>,T> > {}; + + +template +template +void SelfAdjointView +::rank2update(const MatrixBase& u, const MatrixBase& v, Scalar alpha) +{ + typedef ei_blas_traits UBlasTraits; + typedef typename UBlasTraits::DirectLinearAccessType ActualUType; + typedef typename ei_cleantype::type _ActualUType; + const ActualUType actualU = UBlasTraits::extract(u.derived()); + + typedef ei_blas_traits VBlasTraits; + typedef typename VBlasTraits::DirectLinearAccessType ActualVType; + typedef typename ei_cleantype::type _ActualVType; + const ActualVType actualV = VBlasTraits::extract(v.derived()); + + Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived()) + * VBlasTraits::extractScalarFactor(v.derived()); + + enum { IsRowMajor = (ei_traits::Flags&RowMajorBit)?1:0 }; + ei_selfadjoint_rank2_update_selector::ret, + typename ei_conj_expr_if::ret, + (IsRowMajor ? (UpLo==UpperTriangular ? LowerTriangular : UpperTriangular) : UpLo)> + ::run(const_cast(_expression().data()),_expression().stride(),actualU,actualV,actualAlpha); +} + +#endif // EIGEN_SELFADJOINTRANK2UPTADE_H diff --git a/Eigen/src/QR/Tridiagonalization.h b/Eigen/src/QR/Tridiagonalization.h index bd8ff4fe3..4808b69ce 100644 --- a/Eigen/src/QR/Tridiagonalization.h +++ b/Eigen/src/QR/Tridiagonalization.h @@ -236,10 +236,8 @@ void Tridiagonalization::_compute(MatrixType& matA, CoeffVectorType& + (h*ei_conj(h)*Scalar(-0.5)*(matA.col(i).end(n-i-1).dot(hCoeffs.end(n-i-1)))) * matA.col(i).end(n-i-1); - // symmetric rank-2 update - for (int j1=i+1; j1() + .rank2update(matA.col(i).end(n-i-1), hCoeffs.end(n-i-1), -1); // note: at that point matA(i+1,i+1) is the (i+1)-th element of the final diagonal // note: the sequence of the beta values leads to the subdiagonal entries diff --git a/test/eigensolver_selfadjoint.cpp b/test/eigensolver_selfadjoint.cpp index c93953714..6b5092775 100644 --- a/test/eigensolver_selfadjoint.cpp +++ b/test/eigensolver_selfadjoint.cpp @@ -119,8 +119,8 @@ void test_eigensolver_selfadjoint() // very important to test a 3x3 matrix since we provide a special path for it CALL_SUBTEST( selfadjointeigensolver(Matrix3f()) ); CALL_SUBTEST( selfadjointeigensolver(Matrix4d()) ); - CALL_SUBTEST( selfadjointeigensolver(MatrixXf(7,7)) ); - CALL_SUBTEST( selfadjointeigensolver(MatrixXcd(5,5)) ); + CALL_SUBTEST( selfadjointeigensolver(MatrixXf(4,4)) ); + CALL_SUBTEST( selfadjointeigensolver(MatrixXcd(7,7)) ); CALL_SUBTEST( selfadjointeigensolver(MatrixXd(19,19)) ); // some trivial but implementation-wise tricky cases diff --git a/test/product_selfadjoint.cpp b/test/product_selfadjoint.cpp index b26b7223b..297bab1a9 100644 --- a/test/product_selfadjoint.cpp +++ b/test/product_selfadjoint.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2008-2009 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -29,20 +29,29 @@ template void product_selfadjoint(const MatrixType& m) typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits::Real RealScalar; typedef Matrix VectorType; + typedef Matrix RowVectorType; int rows = m.rows(); int cols = m.cols(); MatrixType m1 = MatrixType::Random(rows, cols), - m2 = MatrixType::Random(rows, cols); + m2 = MatrixType::Random(rows, cols), + m3; VectorType v1 = VectorType::Random(rows), v2 = VectorType::Random(rows); + + RowVectorType r1 = RowVectorType::Random(rows), + r2 = RowVectorType::Random(rows); + + Scalar s1 = ei_random(), + s2 = ei_random(), + s3 = ei_random(); m1 = m1.adjoint()*m1; // lower m2.setZero(); - m2.template part() = m1; + m2.template triangularView() = m1; ei_product_selfadjoint_vector (cols,m2.data(),cols, v1.data(), v2.data()); VERIFY_IS_APPROX(v2, m1 * v1); @@ -50,11 +59,30 @@ template void product_selfadjoint(const MatrixType& m) // upper m2.setZero(); - m2.template part() = m1; + m2.template triangularView() = m1; ei_product_selfadjoint_vector(cols,m2.data(),cols, v1.data(), v2.data()); VERIFY_IS_APPROX(v2, m1 * v1); VERIFY_IS_APPROX((m2.template selfadjointView() * v1).eval(), m1 * v1); + // rank2 update + m2 = m1.template triangularView(); + m2.template selfadjointView().rank2update(v1,v2); + VERIFY_IS_APPROX(m2, (m1 + v1 * v2.adjoint()+ v2 * v1.adjoint()).template triangularView().toDense()); + + m2 = m1.template triangularView(); + m2.template selfadjointView().rank2update(-v1,s2*v2,s3); + VERIFY_IS_APPROX(m2, (m1 + (-s2*s3) * (v1 * v2.adjoint()+ v2 * v1.adjoint())).template triangularView().toDense()); + + m2 = m1.template triangularView(); + m2.template selfadjointView().rank2update(-r1.adjoint(),r2.adjoint()*s3,s1); + VERIFY_IS_APPROX(m2, (m1 + (-s3*s1) * (r1.adjoint() * r2 + r2.adjoint() * r1)).template triangularView().toDense()); + + m2 = m1.template triangularView(); + m2.block(1,1,rows-1,cols-1).template selfadjointView().rank2update(v1.end(rows-1),v2.start(cols-1)); + m3 = m1; + m3.block(1,1,rows-1,cols-1) += v1.end(rows-1) * v2.start(cols-1).adjoint()+ v2.start(cols-1) * v1.end(rows-1).adjoint(); + VERIFY_IS_APPROX(m2, m3.template triangularView().toDense()); + } void test_product_selfadjoint() @@ -65,8 +93,8 @@ void test_product_selfadjoint() CALL_SUBTEST( product_selfadjoint(Matrix3d()) ); CALL_SUBTEST( product_selfadjoint(MatrixXcf(4, 4)) ); CALL_SUBTEST( product_selfadjoint(MatrixXcd(21,21)) ); - CALL_SUBTEST( product_selfadjoint(MatrixXd(17,17)) ); - CALL_SUBTEST( product_selfadjoint(Matrix(18,18)) ); + CALL_SUBTEST( product_selfadjoint(MatrixXd(4,4)) ); + CALL_SUBTEST( product_selfadjoint(Matrix(17,17)) ); CALL_SUBTEST( product_selfadjoint(Matrix,Dynamic,Dynamic,RowMajor>(19, 19)) ); } }