From 502bf4a81dfd13630702e253fc265849d0e00ae6 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 6 Apr 2009 13:33:42 +0000 Subject: [PATCH] * fix the binary bloat issue, Rohit's idea was the good one * a few dox fixes (alloc routines do return 0 on error) and forgot to update version number in CMakeLists --- CMakeLists.txt | 2 +- Eigen/src/Core/arch/SSE/MathFunctions.h | 140 ++++++++++++++---------- Eigen/src/Core/util/Memory.h | 4 +- 3 files changed, 88 insertions(+), 58 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ff592f646..3b430a7aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ project(Eigen) -set(EIGEN_VERSION_NUMBER "2.0.51-unstable") +set(EIGEN_VERSION_NUMBER "2.0.52-unstable") #if the svnversion program is absent, this will leave the SVN_REVISION string empty, #but won't stop CMake. diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index 7df9dc659..64f9640af 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -52,38 +52,34 @@ #ifndef EIGEN_MATH_FUNCTIONS_SSE_H #define EIGEN_MATH_FUNCTIONS_SSE_H -_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0); -_EIGEN_DECLARE_CONST_Packet4f(half, 0.5); -/* the smallest non denormalized float number */ -_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000); -_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000); - -_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000); - -_EIGEN_DECLARE_CONST_Packet4i(1, 1); -_EIGEN_DECLARE_CONST_Packet4i(not1, ~1); -_EIGEN_DECLARE_CONST_Packet4i(2, 2); -_EIGEN_DECLARE_CONST_Packet4i(4, 4); -_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f); - -/* natural logarithm computed for 4 simultaneous float - return NaN for x <= 0 -*/ -_EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4); -_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375); - template<> EIGEN_DONT_INLINE Packet4f ei_plog(Packet4f x) { + _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0); + _EIGEN_DECLARE_CONST_Packet4f(half, 0.5); + _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f); + + _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000); + + /* the smallest non denormalized float number */ + _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000); + + /* natural logarithm computed for 4 simultaneous float + return NaN for x <= 0 + */ + _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375); + + Packet4i emm0; Packet4f invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps()); @@ -134,22 +130,27 @@ template<> EIGEN_DONT_INLINE Packet4f ei_plog(Packet4f x) return _mm_or_ps(x, invalid_mask); // negative arg will be NAN } -_EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647949f); -_EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f); - -_EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341); -_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375); -_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4); - -_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4); -_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3); -_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3); -_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2); -_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1); -_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1); - template<> EIGEN_DONT_INLINE Packet4f ei_pexp(Packet4f x) { + _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0); + _EIGEN_DECLARE_CONST_Packet4f(half, 0.5); + _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f); + + + _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647949f); + _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f); + + _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4); + + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1); + Packet4f tmp = _mm_setzero_ps(), fx; Packet4i emm0; @@ -202,19 +203,29 @@ template<> EIGEN_DONT_INLINE Packet4f ei_pexp(Packet4f x) surprising but correct result. */ -_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625); -_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4); -_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8); -_EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4); -_EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3); -_EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1); -_EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005); -_EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003); -_EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002); -_EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516); // 4 / M_PI - template<> EIGEN_DONT_INLINE Packet4f ei_psin(Packet4f x) { + _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0); + _EIGEN_DECLARE_CONST_Packet4f(half, 0.5); + + _EIGEN_DECLARE_CONST_Packet4i(1, 1); + _EIGEN_DECLARE_CONST_Packet4i(not1, ~1); + _EIGEN_DECLARE_CONST_Packet4i(2, 2); + _EIGEN_DECLARE_CONST_Packet4i(4, 4); + + _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000); + + _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625); + _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4); + _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8); + _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4); + _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3); + _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1); + _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005); + _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003); + _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002); + _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516); // 4 / M_PI + Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y; Packet4i emm0, emm2; @@ -291,6 +302,25 @@ template<> EIGEN_DONT_INLINE Packet4f ei_psin(Packet4f x) /* almost the same as ei_psin */ template<> Packet4f ei_pcos(Packet4f x) { + _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0); + _EIGEN_DECLARE_CONST_Packet4f(half, 0.5); + + _EIGEN_DECLARE_CONST_Packet4i(1, 1); + _EIGEN_DECLARE_CONST_Packet4i(not1, ~1); + _EIGEN_DECLARE_CONST_Packet4i(2, 2); + _EIGEN_DECLARE_CONST_Packet4i(4, 4); + + _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625); + _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4); + _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8); + _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4); + _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3); + _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1); + _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005); + _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003); + _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002); + _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516); // 4 / M_PI + Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, y; Packet4i emm0, emm2; diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index c1aee498a..1b377b1d9 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -65,7 +65,7 @@ inline void ei_handmade_aligned_free(void *ptr) } /** \internal allocates \a size bytes. The returned pointer is guaranteed to have 16 bytes alignment. - * On allocation error, the returned pointer is undefined, but if exceptions are enabled then a std::bad_alloc is thrown. + * On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown. */ inline void* ei_aligned_malloc(size_t size) { @@ -98,7 +98,7 @@ inline void* ei_aligned_malloc(size_t size) } /** allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned. - * On allocation error, the returned pointer is undefined, but if exceptions are enabled then a std::bad_alloc is thrown. + * On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown. */ template inline void* ei_conditional_aligned_malloc(size_t size) {