10 changed files with 208 additions and 151 deletions
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@ -86,7 +86,7 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const

 //---------- implementation of L2 norm and related functions ----------

-/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the squared Frobenius norm.
+/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm.
  * In both cases, it consists in the sum of the square of all the matrix entries.
  * For vectors, this is also equals to the dot product of \c *this with itself.
  *
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@ -572,9 +572,7 @@ struct rint_retval
 * Implementation of arg                                                     *
 ****************************************************************************/

-// Visual Studio 2017 has a bug where arg(float) returns 0 for negative inputs.
-// This seems to be fixed in VS 2019.
-#if EIGEN_HAS_CXX11_MATH && (!EIGEN_COMP_MSVC || EIGEN_COMP_MSVC >= 1920)
+#if EIGEN_HAS_CXX11_MATH
 // std::arg is only defined for types of std::complex, or integer types or float/double/long double
 template<typename Scalar,
          bool HasStdImpl = NumTraits<Scalar>::IsComplex || is_integral<Scalar>::value
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@ -16,8 +16,8 @@
 //------------------------------------------------------------------------------------------

 #define EIGEN_WORLD_VERSION 3
-#define EIGEN_MAJOR_VERSION 4
-#define EIGEN_MINOR_VERSION 0
+#define EIGEN_MAJOR_VERSION 3
+#define EIGEN_MINOR_VERSION 91

 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
                                      (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
--- a/doc/examples/matrixfree.cpp
+++ b/doc/examples/matrixfree.cpp
@ -1,145 +0,0 @@
-#include <iostream>
-#include <Eigen/Core>
-#include <Eigen/Dense>
-#include <Eigen/IterativeLinearSolvers>
-#include <unsupported/Eigen/IterativeSolvers>
-
-class MatrixReplacement;
-using Eigen::SparseMatrix;
-
-namespace Eigen {
-namespace internal {
-  // MatrixReplacement looks-like a SparseMatrix, so let's inherits its traits:
-  template<>
-  struct traits<MatrixReplacement> :  public Eigen::internal::traits<Eigen::SparseMatrix<double> >
-  {};
-}
-}
-
-// Example of a matrix-free wrapper from a user type to Eigen's compatible type
-// For the sake of simplicity, this example simply wrap a Eigen::SparseMatrix.
-class MatrixReplacement : public Eigen::EigenBase<MatrixReplacement> {
-public:
-  // Required typedefs, constants, and method:
-  typedef double Scalar;
-  typedef double RealScalar;
-  typedef int StorageIndex;
-  enum {
-    ColsAtCompileTime = Eigen::Dynamic,
-    MaxColsAtCompileTime = Eigen::Dynamic,
-    IsRowMajor = false
-  };
-
-  Index rows() const { return _n; }
-  Index cols() const { return _n; }
-
-  template<typename Rhs>
-  Eigen::Product<MatrixReplacement,Rhs,Eigen::AliasFreeProduct> operator*(const Eigen::MatrixBase<Rhs>& x) const {
-    return Eigen::Product<MatrixReplacement,Rhs,Eigen::AliasFreeProduct>(*this, x.derived());
-  }
-
-  // Custom API:
-  MatrixReplacement(int n){ _n = n; }
-
-private:
-  int _n;
-};
-
-
-// Implementation of MatrixReplacement * Eigen::DenseVector though a specialization of internal::generic_product_impl:
-namespace Eigen {
-namespace internal {
-
-  template<typename Rhs>
-  struct generic_product_impl<MatrixReplacement, Rhs, SparseShape, DenseShape, GemvProduct> // GEMV stands for matrix-vector
-  : generic_product_impl_base<MatrixReplacement,Rhs,generic_product_impl<MatrixReplacement,Rhs> >
-  {
-    typedef typename Product<MatrixReplacement,Rhs>::Scalar Scalar;
-
-    template<typename Dest>
-    static void scaleAndAddTo(Dest& dst, const MatrixReplacement& lhs, const Rhs& rhs, const Scalar& alpha)
-    {
-      // This method should implement "dst += alpha * lhs * rhs" inplace,
-      // however, for iterative solvers, alpha is always equal to 1, so let's not bother about it.
-      assert(alpha==Scalar(1) && "scaling is not implemented");
-      EIGEN_ONLY_USED_FOR_DEBUG(alpha);
-
-      // LLS:
-      // the matrix comes from 1D Poisson Eq. -u_{xx} = f
-      // FD scheme is: 2 U_{i} - U_{i-1}-U_{i+1} = f_i, i=0,...,n-1
-      // BC condition: U_{-1}=0, U_{n}=0
-      int n = lhs.cols();
-      Scalar *dst_ptr = dst.data();
-      const Scalar *rhs_ptr = rhs.data();
-
-      for(int i=0;i<n;i++) {
-        Scalar v=rhs_ptr[i];
-        Scalar vp,vm;
-        if(i==n-1) {
-          vp = 0.0;
-        } else {
-          vp = rhs_ptr[i+1];
-        }
-        if(i==0) {
-          vm = 0.0;
-        } else {
-          vm = rhs_ptr[i-1];
-        }
-        dst_ptr[i]=2.0*v-vp-vm;
-      }
-    }
-  };
-
-}
-}
-
-int main()
-{
-  int n = 4;
-
-  MatrixReplacement A(n);
-
-  Eigen::VectorXd b(n), x;
-  b.setOnes();
-
-  // Solve Ax = b using various iterative solver with matrix-free version:
-  {
-    Eigen::ConjugateGradient<MatrixReplacement, Eigen::Lower|Eigen::Upper, Eigen::IdentityPreconditioner> cg;
-    cg.compute(A);
-    x = cg.solve(b);
-    std::cout << "CG:       #iterations: " << cg.iterations() << ", estimated error: " << cg.error() << std::endl;
-    for(int i=0;i<n;i++) std::cout << "x["<<i<<"] = "<<x[i]<<"\n";
-  }
-
-  {
-    Eigen::BiCGSTAB<MatrixReplacement, Eigen::IdentityPreconditioner> bicg;
-    bicg.compute(A);
-    x = bicg.solve(b);
-    std::cout << "BiCGSTAB: #iterations: " << bicg.iterations() << ", estimated error: " << bicg.error() << std::endl;
-    for(int i=0;i<n;i++) std::cout << "x["<<i<<"] = "<<x[i]<<"\n";
-  }
-
-  {
-    Eigen::GMRES<MatrixReplacement, Eigen::IdentityPreconditioner> gmres;
-    gmres.compute(A);
-    x = gmres.solve(b);
-    std::cout << "GMRES:    #iterations: " << gmres.iterations() << ", estimated error: " << gmres.error() << std::endl;
-    for(int i=0;i<n;i++) std::cout << "x["<<i<<"] = "<<x[i]<<"\n";
-  }
-
-  {
-    Eigen::DGMRES<MatrixReplacement, Eigen::IdentityPreconditioner> gmres;
-    gmres.compute(A);
-    x = gmres.solve(b);
-    std::cout << "DGMRES:   #iterations: " << gmres.iterations() << ", estimated error: " << gmres.error() << std::endl;
-    for(int i=0;i<n;i++) std::cout << "x["<<i<<"] = "<<x[i]<<"\n";
-  }
-
-  {
-    Eigen::MINRES<MatrixReplacement, Eigen::Lower|Eigen::Upper, Eigen::IdentityPreconditioner> minres;
-    minres.compute(A);
-    x = minres.solve(b);
-    std::cout << "MINRES:   #iterations: " << minres.iterations() << ", estimated error: " << minres.error() << std::endl;
-    for(int i=0;i<n;i++) std::cout << "x["<<i<<"] = "<<x[i]<<"\n";
-  }
-}
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -164,6 +164,7 @@ ei_add_test(nullary)
 ei_add_test(mixingtypes)
 ei_add_test(io)
 ei_add_test(packetmath "-DEIGEN_FAST_MATH=1")
+ei_add_test(unalignedassert)
 ei_add_test(vectorization_logic)
 ei_add_test(basicstuff)
 ei_add_test(constructor)
--- a/test/geo_hyperplane.cpp
+++ b/test/geo_hyperplane.cpp
@ -172,6 +172,11 @@ template<typename Scalar> void hyperplane_alignment()

  VERIFY_IS_APPROX(p1->coeffs(), p2->coeffs());
  VERIFY_IS_APPROX(p1->coeffs(), p3->coeffs());
+  
+  #if defined(EIGEN_VECTORIZE) && EIGEN_MAX_STATIC_ALIGN_BYTES > 0
+  if(internal::packet_traits<Scalar>::Vectorizable && internal::packet_traits<Scalar>::size<=4)
+    VERIFY_RAISES_ASSERT((::new(reinterpret_cast<void*>(array3u)) Plane3a));
+  #endif
 }


--- a/test/geo_parametrizedline.cpp
+++ b/test/geo_parametrizedline.cpp
@ -110,6 +110,11 @@ template<typename Scalar> void parametrizedline_alignment()
  VERIFY_IS_APPROX(p1->origin(), p3->origin());
  VERIFY_IS_APPROX(p1->direction(), p2->direction());
  VERIFY_IS_APPROX(p1->direction(), p3->direction());
+  
+  #if defined(EIGEN_VECTORIZE) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
+  if(internal::packet_traits<Scalar>::Vectorizable && internal::packet_traits<Scalar>::size<=4)
+    VERIFY_RAISES_ASSERT((::new(reinterpret_cast<void*>(array3u)) Line4a));
+  #endif
 }

 EIGEN_DECLARE_TEST(geo_parametrizedline)
--- a/test/geo_quaternion.cpp
+++ b/test/geo_quaternion.cpp
@ -218,6 +218,10 @@ template<typename Scalar> void mapQuaternion(void){
  VERIFY_IS_APPROX(q1.coeffs(), q2.coeffs());
  VERIFY_IS_APPROX(q1.coeffs(), q3.coeffs());
  VERIFY_IS_APPROX(q4.coeffs(), q3.coeffs());
+  #ifdef EIGEN_VECTORIZE
+  if(internal::packet_traits<Scalar>::Vectorizable)
+    VERIFY_RAISES_ASSERT((MQuaternionA(array3unaligned)));
+  #endif
    
  VERIFY_IS_APPROX(mq1 * (mq1.inverse() * v1), v1);
  VERIFY_IS_APPROX(mq1 * (mq1.conjugate() * v1), v1);
@ -277,6 +281,10 @@ template<typename Scalar> void quaternionAlignment(void){

  VERIFY_IS_APPROX(q1->coeffs(), q2->coeffs());
  VERIFY_IS_APPROX(q1->coeffs(), q3->coeffs());
+  #if defined(EIGEN_VECTORIZE) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
+  if(internal::packet_traits<Scalar>::Vectorizable && internal::packet_traits<Scalar>::size<=4)
+    VERIFY_RAISES_ASSERT((::new(reinterpret_cast<void*>(arrayunaligned)) QuaternionA));
+  #endif
 }

 template<typename PlainObjectType> void check_const_correctness(const PlainObjectType&)
--- a/test/geo_transformations.cpp
+++ b/test/geo_transformations.cpp
@ -582,6 +582,11 @@ template<typename Scalar> void transform_alignment()
  VERIFY_IS_APPROX(p1->matrix(), p3->matrix());
  
  VERIFY_IS_APPROX( (*p1) * (*p1), (*p2)*(*p3));
+  
+  #if defined(EIGEN_VECTORIZE) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
+  if(internal::packet_traits<Scalar>::Vectorizable)
+    VERIFY_RAISES_ASSERT((::new(reinterpret_cast<void*>(array3u)) Projective3a));
+  #endif
 }

 template<typename Scalar, int Dim, int Options> void transform_products()
--- a/test/unalignedassert.cpp
+++ b/test/unalignedassert.cpp
@ -0,0 +1,180 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#if defined(EIGEN_TEST_PART_1)
+  // default
+#elif defined(EIGEN_TEST_PART_2)
+  #define EIGEN_MAX_STATIC_ALIGN_BYTES 16
+  #define EIGEN_MAX_ALIGN_BYTES 16
+#elif defined(EIGEN_TEST_PART_3)
+  #define EIGEN_MAX_STATIC_ALIGN_BYTES 32
+  #define EIGEN_MAX_ALIGN_BYTES 32
+#elif defined(EIGEN_TEST_PART_4)
+  #define EIGEN_MAX_STATIC_ALIGN_BYTES 64
+  #define EIGEN_MAX_ALIGN_BYTES 64
+#endif
+
+#include "main.h"
+
+typedef Matrix<float,  6,1> Vector6f;
+typedef Matrix<float,  8,1> Vector8f;
+typedef Matrix<float, 12,1> Vector12f;
+
+typedef Matrix<double, 5,1> Vector5d;
+typedef Matrix<double, 6,1> Vector6d;
+typedef Matrix<double, 7,1> Vector7d;
+typedef Matrix<double, 8,1> Vector8d;
+typedef Matrix<double, 9,1> Vector9d;
+typedef Matrix<double,10,1> Vector10d;
+typedef Matrix<double,12,1> Vector12d;
+
+struct TestNew1
+{
+  MatrixXd m; // good: m will allocate its own array, taking care of alignment.
+  TestNew1() : m(20,20) {}
+};
+
+struct TestNew2
+{
+  Matrix3d m; // good: m's size isn't a multiple of 16 bytes, so m doesn't have to be 16-byte aligned,
+              // 8-byte alignment is good enough here, which we'll get automatically
+};
+
+struct TestNew3
+{
+  Vector2f m; // good: m's size isn't a multiple of 16 bytes, so m doesn't have to be 16-byte aligned
+};
+
+struct TestNew4
+{
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+  Vector2d m;
+  float f; // make the struct have sizeof%16!=0 to make it a little more tricky when we allow an array of 2 such objects
+};
+
+struct TestNew5
+{
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+  float f; // try the f at first -- the EIGEN_ALIGN_MAX attribute of m should make that still work
+  Matrix4f m;
+};
+
+struct TestNew6
+{
+  Matrix<float,2,2,DontAlign> m; // good: no alignment requested
+  float f;
+};
+
+template<bool Align> struct Depends
+{
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(Align)
+  Vector2d m;
+  float f;
+};
+
+template<typename T>
+void check_unalignedassert_good()
+{
+  T *x, *y;
+  x = new T;
+  delete x;
+  y = new T[2];
+  delete[] y;
+}
+
+#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
+template<typename T>
+void construct_at_boundary(int boundary)
+{
+  char buf[sizeof(T)+256];
+  size_t _buf = reinterpret_cast<internal::UIntPtr>(buf);
+  _buf += (EIGEN_MAX_ALIGN_BYTES - (_buf % EIGEN_MAX_ALIGN_BYTES)); // make 16/32/...-byte aligned
+  _buf += boundary; // make exact boundary-aligned
+  T *x = ::new(reinterpret_cast<void*>(_buf)) T;
+  x[0].setZero(); // just in order to silence warnings
+  x->~T();
+}
+#endif
+
+void unalignedassert()
+{
+#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
+  construct_at_boundary<Vector2f>(4);
+  construct_at_boundary<Vector3f>(4);
+  construct_at_boundary<Vector4f>(16);
+  construct_at_boundary<Vector6f>(4);
+  construct_at_boundary<Vector8f>(EIGEN_MAX_ALIGN_BYTES);
+  construct_at_boundary<Vector12f>(16);
+  construct_at_boundary<Matrix2f>(16);
+  construct_at_boundary<Matrix3f>(4);
+  construct_at_boundary<Matrix4f>(EIGEN_MAX_ALIGN_BYTES);
+
+  construct_at_boundary<Vector2d>(16);
+  construct_at_boundary<Vector3d>(4);
+  construct_at_boundary<Vector4d>(EIGEN_MAX_ALIGN_BYTES);
+  construct_at_boundary<Vector5d>(4);
+  construct_at_boundary<Vector6d>(16);
+  construct_at_boundary<Vector7d>(4);
+  construct_at_boundary<Vector8d>(EIGEN_MAX_ALIGN_BYTES);
+  construct_at_boundary<Vector9d>(4);
+  construct_at_boundary<Vector10d>(16);
+  construct_at_boundary<Vector12d>(EIGEN_MAX_ALIGN_BYTES);
+  construct_at_boundary<Matrix2d>(EIGEN_MAX_ALIGN_BYTES);
+  construct_at_boundary<Matrix3d>(4);
+  construct_at_boundary<Matrix4d>(EIGEN_MAX_ALIGN_BYTES);
+
+  construct_at_boundary<Vector2cf>(16);
+  construct_at_boundary<Vector3cf>(4);
+  construct_at_boundary<Vector2cd>(EIGEN_MAX_ALIGN_BYTES);
+  construct_at_boundary<Vector3cd>(16);
+#endif
+
+  check_unalignedassert_good<TestNew1>();
+  check_unalignedassert_good<TestNew2>();
+  check_unalignedassert_good<TestNew3>();
+
+  check_unalignedassert_good<TestNew4>();
+  check_unalignedassert_good<TestNew5>();
+  check_unalignedassert_good<TestNew6>();
+  check_unalignedassert_good<Depends<true> >();
+
+#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
+  if(EIGEN_MAX_ALIGN_BYTES>=16)
+  {
+    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4f>(8));
+    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8f>(8));
+    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector12f>(8));
+    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2d>(8));
+    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4d>(8));
+    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector6d>(8));
+    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8d>(8));
+    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector10d>(8));
+    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector12d>(8));
+    // Complexes are disabled because the compiler might aggressively vectorize
+    // the initialization of complex coeffs to 0 before we can check for alignedness
+    //VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2cf>(8));
+    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4i>(8));
+  }
+  for(int b=8; b<EIGEN_MAX_ALIGN_BYTES; b+=8)
+  {
+    if(b<32)  VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8f>(b));
+    if(b<64)  VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4f>(b));
+    if(b<32)  VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4d>(b));
+    if(b<32)  VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix2d>(b));
+    if(b<128) VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4d>(b));
+    //if(b<32)  VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2cd>(b));
+  }
+#endif
+}
+
+EIGEN_DECLARE_TEST(unalignedassert)
+{
+  CALL_SUBTEST(unalignedassert());
+}