From 49fc1e3e84d036c43ffbac0128588d47c5a11f36 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 27 Mar 2009 14:41:46 +0000 Subject: [PATCH] add vectorization of sqrt for float --- Eigen/Core | 2 +- Eigen/src/Array/Functors.h | 8 ++++++- Eigen/src/Core/GenericPacketMath.h | 4 ++++ ...nscendentalFunctions.h => MathFunctions.h} | 23 +++++++++++-------- Eigen/src/Core/arch/SSE/PacketMath.h | 3 ++- test/packetmath.cpp | 1 + 6 files changed, 29 insertions(+), 12 deletions(-) rename Eigen/src/Core/arch/SSE/{TranscendentalFunctions.h => MathFunctions.h} (95%) diff --git a/Eigen/Core b/Eigen/Core index ad85b6595..904c6089b 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -102,7 +102,7 @@ namespace Eigen { #if defined EIGEN_VECTORIZE_SSE #include "src/Core/arch/SSE/PacketMath.h" - #include "src/Core/arch/SSE/TranscendentalFunctions.h" + #include "src/Core/arch/SSE/MathFunctions.h" #elif defined EIGEN_VECTORIZE_ALTIVEC #include "src/Core/arch/AltiVec/PacketMath.h" #endif diff --git a/Eigen/src/Array/Functors.h b/Eigen/src/Array/Functors.h index 9759ebf2a..f5607ba90 100644 --- a/Eigen/src/Array/Functors.h +++ b/Eigen/src/Array/Functors.h @@ -58,10 +58,16 @@ struct ei_functor_traits > */ template struct ei_scalar_sqrt_op EIGEN_EMPTY_STRUCT { inline const Scalar operator() (const Scalar& a) const { return ei_sqrt(a); } + typedef typename ei_packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return ei_psqrt(a); } }; template struct ei_functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; }; +{ enum { + Cost = 5 * NumTraits::MulCost, + PacketAccess = ei_packet_traits::HasSqrt + }; +}; /** \internal * diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 8583faa4a..0b5144632 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -46,6 +46,7 @@ struct ei_default_packet_traits HasMax = 1, HasDiv = 0, + HasSqrt = 0, HasExp = 0, HasLog = 0, HasPow = 0, @@ -192,6 +193,9 @@ template inline Packet ei_pexp(Packet a) { return ei_exp(a); } /** \internal \returns the log of \a a (coeff-wise) */ template inline Packet ei_plog(Packet a) { return ei_log(a); } +/** \internal \returns the square-root of \a a (coeff-wise) */ +template inline Packet ei_psqrt(Packet a) { return ei_log(a); } + /*************************************************************************** * The following functions might not have to be overwritten for vectorized types ***************************************************************************/ diff --git a/Eigen/src/Core/arch/SSE/TranscendentalFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h similarity index 95% rename from Eigen/src/Core/arch/SSE/TranscendentalFunctions.h rename to Eigen/src/Core/arch/SSE/MathFunctions.h index 0c8accc17..7df9dc659 100644 --- a/Eigen/src/Core/arch/SSE/TranscendentalFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -23,9 +23,9 @@ // License and a copy of the GNU General Public License along with // Eigen. If not, see . -/* The functions of this file come from Julien Pommier's sse math library. - * which is itself inspired by Intel Approximate Math library, and based on the - * corresponding algorithms of the cephes math library. +/* The sin, cos, exp, and log functions of this file come from Julien Pommier's sse + * math library, which is itself inspired by Intel Approximate Math library, + * and based on the corresponding algorithms of the cephes math library. */ /* Copyright (C) 2007 Julien Pommier @@ -49,18 +49,16 @@ (this is the zlib license) */ -#ifndef EIGEN_TRANSCENDENTAL_FUNCTIONS_SSE_H -#define EIGEN_TRANSCENDENTAL_FUNCTIONS_SSE_H +#ifndef EIGEN_MATH_FUNCTIONS_SSE_H +#define EIGEN_MATH_FUNCTIONS_SSE_H _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0); _EIGEN_DECLARE_CONST_Packet4f(half, 0.5); /* the smallest non denormalized float number */ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000); -// _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(mant_mask, 0x7f800000); _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000); _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000); -// _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_sign_mask, ~0x80000000); _EIGEN_DECLARE_CONST_Packet4i(1, 1); _EIGEN_DECLARE_CONST_Packet4i(not1, ~1); @@ -214,7 +212,6 @@ _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005); _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003); _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002); _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516); // 4 / M_PI -_EIGEN_DECLARE_CONST_Packet4f(2pi, 2.*M_PI); template<> EIGEN_DONT_INLINE Packet4f ei_psin(Packet4f x) { @@ -358,4 +355,12 @@ template<> Packet4f ei_pcos(Packet4f x) return _mm_xor_ps(y, sign_bit); } -#endif // EIGEN_TRANSCENDENTAL_FUNCTIONS_SSE_H +template<> Packet4f ei_psqrt(Packet4f _x) +{ + Packet4f half = ei_pmul(_x, ei_pset1(.5f)); + Packet4f x = _mm_rsqrt_ps(_x); + x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x)))); + return ei_pmul(_x,x); +} + +#endif // EIGEN_MATH_FUNCTIONS_SSE_H diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index d49479c16..cb8052a4b 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -61,7 +61,8 @@ template<> struct ei_packet_traits : ei_default_packet_traits HasSin = 1, HasCos = 1, HasLog = 1, - HasExp = 1 + HasExp = 1, + HasSqrt = 1 }; }; template<> struct ei_packet_traits : ei_default_packet_traits diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 858bbb9c3..250398241 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -227,6 +227,7 @@ template void packetmath_real() data2[i] = ei_random(0,1e6); } CHECK_CWISE1_IF(ei_packet_traits::HasLog, ei_log, ei_plog); + CHECK_CWISE1_IF(ei_packet_traits::HasSqrt, ei_sqrt, ei_psqrt); } void test_packetmath()