Rename predux_downto4 to be more accurate on its semantic.
This commit is contained in:
parent
67bac6368c
commit
584951ca4d
@ -324,13 +324,13 @@ preduxp(const Packet* vecs) { return vecs[0]; }
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the sum of the elements of \a a by block of 4 elements.
|
||||
/** \internal \returns the sum of the elements of upper and lower half of \a a if \a a is larger than 4.
|
||||
* For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
|
||||
* For packet-size smaller or equal to 4, this boils down to a noop.
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline
|
||||
typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
|
||||
predux_downto4(const Packet& a)
|
||||
predux_half_dowto4(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the product of the elements of \a a*/
|
||||
|
||||
@ -412,7 +412,7 @@ template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
|
||||
return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f predux_downto4<Packet8f>(const Packet8f& a)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f predux_half_dowto4<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
|
||||
}
|
||||
|
||||
@ -888,7 +888,7 @@ EIGEN_STRONG_INLINE double predux<Packet8d>(const Packet8d& a) {
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8f predux_downto4<Packet16f>(const Packet16f& a) {
|
||||
EIGEN_STRONG_INLINE Packet8f predux_half_dowto4<Packet16f>(const Packet16f& a) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
||||
__m256 lane0 = _mm512_extractf32x8_ps(a, 0);
|
||||
__m256 lane1 = _mm512_extractf32x8_ps(a, 1);
|
||||
@ -904,7 +904,7 @@ EIGEN_STRONG_INLINE Packet8f predux_downto4<Packet16f>(const Packet16f& a) {
|
||||
#endif
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4d predux_downto4<Packet8d>(const Packet8d& a) {
|
||||
EIGEN_STRONG_INLINE Packet4d predux_half_dowto4<Packet8d>(const Packet8d& a) {
|
||||
__m256d lane0 = _mm512_extractf64x4_pd(a, 0);
|
||||
__m256d lane1 = _mm512_extractf64x4_pd(a, 1);
|
||||
__m256d res = _mm256_add_pd(lane0, lane1);
|
||||
|
||||
@ -580,7 +580,7 @@ DoublePacket<Packet> padd(const DoublePacket<Packet> &a, const DoublePacket<Pack
|
||||
}
|
||||
|
||||
template<typename Packet>
|
||||
const DoublePacket<Packet>& predux_downto4(const DoublePacket<Packet> &a)
|
||||
const DoublePacket<Packet>& predux_half_dowto4(const DoublePacket<Packet> &a)
|
||||
{
|
||||
return a;
|
||||
}
|
||||
@ -1596,13 +1596,13 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
SRhsPacketHalf b0;
|
||||
straits.loadLhsUnaligned(blB, a0);
|
||||
straits.loadRhs(blA, b0);
|
||||
SAccPacketHalf c0 = predux_downto4(C0);
|
||||
SAccPacketHalf c0 = predux_half_dowto4(C0);
|
||||
straits.madd(a0,b0,c0,b0);
|
||||
straits.acc(c0, alphav, R);
|
||||
}
|
||||
else
|
||||
{
|
||||
straits.acc(predux_downto4(C0), alphav, R);
|
||||
straits.acc(predux_half_dowto4(C0), alphav, R);
|
||||
}
|
||||
res.scatterPacket(i, j2, R);
|
||||
}
|
||||
|
||||
@ -253,8 +253,8 @@ template<typename Scalar> void packetmath()
|
||||
ref[i] = 0;
|
||||
for (int i=0; i<PacketSize; ++i)
|
||||
ref[i%HalfPacketSize] += data1[i];
|
||||
internal::pstore(data2, internal::predux_downto4(internal::pload<Packet>(data1)));
|
||||
VERIFY(areApprox(ref, data2, HalfPacketSize) && "internal::predux_downto4");
|
||||
internal::pstore(data2, internal::predux_half_dowto4(internal::pload<Packet>(data1)));
|
||||
VERIFY(areApprox(ref, data2, HalfPacketSize) && "internal::predux_half_dowto4");
|
||||
}
|
||||
|
||||
ref[0] = 1;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user