add bench_reverse, draft of a reverse vectorization for AltiVec, make
global Scaling function static
This commit is contained in:
parent
f5d96df800
commit
cc90495e30
@ -111,7 +111,7 @@ template<> inline v4i ei_pmul(const v4i& a, const v4i& b)
|
|||||||
USE_CONST_v1i;
|
USE_CONST_v1i;
|
||||||
USE_CONST_v16i_;
|
USE_CONST_v16i_;
|
||||||
|
|
||||||
// Get the absolute values
|
// Get the absolute values
|
||||||
a1 = vec_abs(a);
|
a1 = vec_abs(a);
|
||||||
b1 = vec_abs(b);
|
b1 = vec_abs(b);
|
||||||
|
|
||||||
@ -146,7 +146,7 @@ template<> inline v4f ei_pdiv(const v4f& a, const v4f& b) {
|
|||||||
|
|
||||||
// Altivec does not offer a divide instruction, we have to do a reciprocal approximation
|
// Altivec does not offer a divide instruction, we have to do a reciprocal approximation
|
||||||
y_0 = vec_re(b);
|
y_0 = vec_re(b);
|
||||||
|
|
||||||
// Do one Newton-Raphson iteration to get the needed accuracy
|
// Do one Newton-Raphson iteration to get the needed accuracy
|
||||||
t = vec_nmsub(y_0, b, v1f);
|
t = vec_nmsub(y_0, b, v1f);
|
||||||
y_1 = vec_madd(y_0, t, y_0);
|
y_1 = vec_madd(y_0, t, y_0);
|
||||||
@ -260,6 +260,17 @@ template<> inline int ei_pfirst(const v4i& a)
|
|||||||
return ai[0];
|
return ai[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE v4f ei_preverse(const v4f& a)
|
||||||
|
{
|
||||||
|
static const unsigned char __attribute__(aligned(16)) reverse_mask = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
|
||||||
|
return (v4f)vec_perm((__vector unsigned char)a,(__vector unsigned char)a,reverse_mask);
|
||||||
|
}
|
||||||
|
template<> EIGEN_STRONG_INLINE v4i ei_preverse(const v4i& a)
|
||||||
|
{
|
||||||
|
static const unsigned char __attribute__(aligned(16)) reverse_mask = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
|
||||||
|
return (v4i)vec_perm((__vector unsigned char)a,(__vector unsigned char)a,reverse_mask);
|
||||||
|
}
|
||||||
|
|
||||||
inline v4f ei_preduxp(const v4f* vecs)
|
inline v4f ei_preduxp(const v4f* vecs)
|
||||||
{
|
{
|
||||||
v4f v[4], sum[4];
|
v4f v[4], sum[4];
|
||||||
|
|||||||
@ -120,28 +120,28 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
/** Constructs a uniform scaling from scale factor \a s */
|
/** Constructs a uniform scaling from scale factor \a s */
|
||||||
inline UniformScaling<float> Scaling(float s) { return UniformScaling<float>(s); }
|
static inline UniformScaling<float> Scaling(float s) { return UniformScaling<float>(s); }
|
||||||
/** Constructs a uniform scaling from scale factor \a s */
|
/** Constructs a uniform scaling from scale factor \a s */
|
||||||
inline UniformScaling<double> Scaling(double s) { return UniformScaling<double>(s); }
|
static inline UniformScaling<double> Scaling(double s) { return UniformScaling<double>(s); }
|
||||||
/** Constructs a uniform scaling from scale factor \a s */
|
/** Constructs a uniform scaling from scale factor \a s */
|
||||||
template<typename RealScalar> inline UniformScaling<std::complex<RealScalar> >
|
template<typename RealScalar>
|
||||||
Scaling(const std::complex<RealScalar>& s)
|
static inline UniformScaling<std::complex<RealScalar> > Scaling(const std::complex<RealScalar>& s)
|
||||||
{ return UniformScaling<std::complex<RealScalar> >(s); }
|
{ return UniformScaling<std::complex<RealScalar> >(s); }
|
||||||
|
|
||||||
/** Constructs a 2D axis aligned scaling */
|
/** Constructs a 2D axis aligned scaling */
|
||||||
template<typename Scalar> inline DiagonalMatrix<Scalar,2>
|
template<typename Scalar>
|
||||||
Scaling(Scalar sx, Scalar sy)
|
static inline DiagonalMatrix<Scalar,2> Scaling(Scalar sx, Scalar sy)
|
||||||
{ return DiagonalMatrix<Scalar,2>(sx, sy); }
|
{ return DiagonalMatrix<Scalar,2>(sx, sy); }
|
||||||
/** Constructs a 3D axis aligned scaling */
|
/** Constructs a 3D axis aligned scaling */
|
||||||
template<typename Scalar> inline DiagonalMatrix<Scalar,3>
|
template<typename Scalar>
|
||||||
Scaling(Scalar sx, Scalar sy, Scalar sz)
|
static inline DiagonalMatrix<Scalar,3> Scaling(Scalar sx, Scalar sy, Scalar sz)
|
||||||
{ return DiagonalMatrix<Scalar,3>(sx, sy, sz); }
|
{ return DiagonalMatrix<Scalar,3>(sx, sy, sz); }
|
||||||
|
|
||||||
/** Constructs an axis aligned scaling expression from vector expression \a coeffs
|
/** Constructs an axis aligned scaling expression from vector expression \a coeffs
|
||||||
* This is an alias for coeffs.asDiagonal()
|
* This is an alias for coeffs.asDiagonal()
|
||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
inline const DiagonalMatrixWrapper<Derived> Scaling(const MatrixBase<Derived>& coeffs)
|
static inline const DiagonalMatrixWrapper<Derived> Scaling(const MatrixBase<Derived>& coeffs)
|
||||||
{ return coeffs.asDiagonal(); }
|
{ return coeffs.asDiagonal(); }
|
||||||
|
|
||||||
/** \addtogroup Geometry_Module */
|
/** \addtogroup Geometry_Module */
|
||||||
|
|||||||
83
bench/bench_reverse.cpp
Normal file
83
bench/bench_reverse.cpp
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
|
||||||
|
#include <Eigen/Array>
|
||||||
|
#include <bench/BenchUtil.h>
|
||||||
|
using namespace Eigen;
|
||||||
|
|
||||||
|
#ifndef REPEAT
|
||||||
|
#define REPEAT 100000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef TRIES
|
||||||
|
#define TRIES 20
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef double Scalar;
|
||||||
|
|
||||||
|
template <typename MatrixType>
|
||||||
|
__attribute__ ((noinline)) void bench_reverse(const MatrixType& m)
|
||||||
|
{
|
||||||
|
int rows = m.rows();
|
||||||
|
int cols = m.cols();
|
||||||
|
int size = m.size();
|
||||||
|
|
||||||
|
int repeats = (REPEAT*1000)/size;
|
||||||
|
MatrixType a = MatrixType::Random(rows,cols);
|
||||||
|
MatrixType b = MatrixType::Random(rows,cols);
|
||||||
|
|
||||||
|
BenchTimer timerB, timerH, timerV;
|
||||||
|
|
||||||
|
Scalar acc = 0;
|
||||||
|
int r = ei_random<int>(0,rows-1);
|
||||||
|
int c = ei_random<int>(0,cols-1);
|
||||||
|
for (int t=0; t<TRIES; ++t)
|
||||||
|
{
|
||||||
|
timerB.start();
|
||||||
|
for (int k=0; k<repeats; ++k)
|
||||||
|
{
|
||||||
|
asm("#begin foo");
|
||||||
|
b = a.reverse();
|
||||||
|
asm("#end foo");
|
||||||
|
acc += b.coeff(r,c);
|
||||||
|
}
|
||||||
|
timerB.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (MatrixType::RowsAtCompileTime==Dynamic)
|
||||||
|
std::cout << "dyn ";
|
||||||
|
else
|
||||||
|
std::cout << "fixed ";
|
||||||
|
std::cout << rows << " x " << cols << " \t"
|
||||||
|
<< (timerB.value() * REPEAT) / repeats << "s "
|
||||||
|
<< "(" << 1e-6 * size*repeats/timerB.value() << " MFLOPS)\t";
|
||||||
|
|
||||||
|
std::cout << "\n";
|
||||||
|
// make sure the compiler does not optimize too much
|
||||||
|
if (acc==123)
|
||||||
|
std::cout << acc;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
const int dynsizes[] = {4,6,8,16,24,32,49,64,128,256,512,900,0};
|
||||||
|
std::cout << "size no sqrt standard";
|
||||||
|
// #ifdef BENCH_GSL
|
||||||
|
// std::cout << " GSL (standard + double + ATLAS) ";
|
||||||
|
// #endif
|
||||||
|
std::cout << "\n";
|
||||||
|
for (uint i=0; dynsizes[i]>0; ++i)
|
||||||
|
{
|
||||||
|
bench_reverse(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
|
||||||
|
bench_reverse(Matrix<Scalar,Dynamic,1>(dynsizes[i]*dynsizes[i]));
|
||||||
|
}
|
||||||
|
// bench_reverse(Matrix<Scalar,2,2>());
|
||||||
|
// bench_reverse(Matrix<Scalar,3,3>());
|
||||||
|
// bench_reverse(Matrix<Scalar,4,4>());
|
||||||
|
// bench_reverse(Matrix<Scalar,5,5>());
|
||||||
|
// bench_reverse(Matrix<Scalar,6,6>());
|
||||||
|
// bench_reverse(Matrix<Scalar,7,7>());
|
||||||
|
// bench_reverse(Matrix<Scalar,8,8>());
|
||||||
|
// bench_reverse(Matrix<Scalar,12,12>());
|
||||||
|
// bench_reverse(Matrix<Scalar,16,16>());
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
Loading…
Reference in New Issue
Block a user