Bugzilla – Attachment 443 Details for
Bug 692
Meta-Packets
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
Log In
[x]
|
Forgot Password
Login:
[x]
This bugzilla service is closed. All entries have been migrated to
https://gitlab.com/libeigen/eigen
[patch]
Patch against the latest version of the codebase that introduces the notion of MetaPacket as well as the broadcast_from() initializer method.
patch_692.txt (text/plain), 10.19 KB, created by
Benoit Steiner
on 2014-03-26 19:34:00 UTC
(
hide
)
Description:
Patch against the latest version of the codebase that introduces the notion of MetaPacket as well as the broadcast_from() initializer method.
Filename:
MIME Type:
Creator:
Benoit Steiner
Created:
2014-03-26 19:34:00 UTC
Size:
10.19 KB
patch
obsolete
># HG changeset patch ># User Benoit Steiner <benoit.steiner.goog@gmail.com> ># Date 1395854226 25200 ># Node ID abfb73434fba38c2bd059c95b5d10ccaf4e36c3f ># Parent 7cbf8f5a9bdfb5119ebfd9ae607351be4180fdfd >Introduced the MetaPacket class, which can be used to process several packets together. >Created the broadcast_from primitive to efficiently initialize the content of a metapacket from memory. > >diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h >--- a/Eigen/src/Core/GenericPacketMath.h >+++ b/Eigen/src/Core/GenericPacketMath.h >@@ -347,14 +347,30 @@ inline void palign(PacketType& first, co > template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b) > { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } > > template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b) > { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } > > #endif > >+ >+/*************************************************************************** >+ * MetaPacket, that is a collection of N packets. >+ * For performance reasons, N should be a small multiple of the packet size. >+***************************************************************************/ >+template <typename Packet, int N> struct MetaPacket { >+ Packet packet[N]; >+ >+ // Initializes (through a pset1 operation) the ith packet with the value at from[i]. >+ // The from address must be aligned. >+ EIGEN_STRONG_INLINE void broadcast_from(const typename unpacket_traits<Packet>::type* from) { >+ for (int i = 0; i < N; ++i) { >+ packet[i] = from[i]; >+ } >+ } >+}; >+ > } // end namespace internal > > } // end namespace Eigen > > #endif // EIGEN_GENERIC_PACKET_MATH_H >- >diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h >--- a/Eigen/src/Core/arch/AltiVec/Complex.h >+++ b/Eigen/src/Core/arch/AltiVec/Complex.h >@@ -205,13 +205,23 @@ template<> EIGEN_STRONG_INLINE Packet2cf > return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX_REV)))); > } > > template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x) > { > return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV)); > } > >+template<int N> struct MetaPacket<Packet2cf, N> { >+ Packet2cf packet[N]; >+ >+ EIGEN_STRONG_INLINE void broadcast_from(const std::complex<float>* from) { >+ for (int i = 0; i < N; ++i) { >+ packet[i] = pset1<Packet2cf>(from+i); >+ } >+ } >+}; >+ > } // end namespace internal > > } // end namespace Eigen > > #endif // EIGEN_COMPLEX_ALTIVEC_H >diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h >--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h >+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h >@@ -489,13 +489,33 @@ struct palign_impl<Offset,Packet4i> > { > static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second) > { > if (Offset!=0) > first = vec_sld(first, second, Offset*4); > } > }; > >+template<int N> struct MetaPacket<Packet4f, N> { >+ Packet4f packet[N]; >+ >+ EIGEN_STRONG_INLINE void broadcast_from(const float* from) { >+ for (int i = 0; i < N; ++i) { >+ packet[i] = pset1<Packet4f>(from+i); >+ } >+ } >+}; >+ >+template<int N> struct MetaPacket<Packet4i, N> { >+ Packet4i packet[N]; >+ >+ EIGEN_STRONG_INLINE void broadcast_from(const int* from) { >+ for (int i = 0; i < N; ++i) { >+ packet[i] = pset1<Packet4i>(from+i); >+ } >+ } >+}; >+ > } // end namespace internal > > } // end namespace Eigen > > #endif // EIGEN_PACKET_MATH_ALTIVEC_H >diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h >--- a/Eigen/src/Core/arch/NEON/Complex.h >+++ b/Eigen/src/Core/arch/NEON/Complex.h >@@ -241,13 +241,23 @@ template<> EIGEN_STRONG_INLINE Packet2cf > > // this computes the norm > s = vmulq_f32(b.v, b.v); > rev_s = vrev64q_f32(s); > > return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s))); > } > >+template<int N> struct MetaPacket<Packet2cf, N> { >+ Packet2cf packet[N]; >+ >+ EIGEN_STRONG_INLINE void broadcast_from(const std::complex<float>* from) { >+ for (int i = 0; i < N; ++i) { >+ packet[i] = pset1<Packet2cf>(from+i); >+ } >+ } >+}; >+ > } // end namespace internal > > } // end namespace Eigen > > #endif // EIGEN_COMPLEX_NEON_H >diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h >--- a/Eigen/src/Core/arch/NEON/PacketMath.h >+++ b/Eigen/src/Core/arch/NEON/PacketMath.h >@@ -408,13 +408,35 @@ PALIGN_NEON(2,Packet4f,vextq_f32) > PALIGN_NEON(3,Packet4f,vextq_f32) > PALIGN_NEON(0,Packet4i,vextq_s32) > PALIGN_NEON(1,Packet4i,vextq_s32) > PALIGN_NEON(2,Packet4i,vextq_s32) > PALIGN_NEON(3,Packet4i,vextq_s32) > > #undef PALIGN_NEON > >+ >+template<int N> struct MetaPacket<Packet4f, N> { >+ Packet4f packet[N]; >+ >+ EIGEN_STRONG_INLINE void broadcast_from(const float* from) { >+ for (int i = 0; i < N; ++i) { >+ packet[i] = pset1<Packet4f>(from+i); >+ } >+ } >+}; >+ >+template<int N> struct MetaPacket<Packet4i, N> { >+ Packet4i packet[N]; >+ >+ EIGEN_STRONG_INLINE void broadcast_from(const int* from) { >+ for (int i = 0; i < N; ++i) { >+ packet[i] = pset1<Packet4i>(from+i); >+ } >+ } >+}; >+ >+ > } // end namespace internal > > } // end namespace Eigen > > #endif // EIGEN_PACKET_MATH_NEON_H >diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h >--- a/Eigen/src/Core/arch/SSE/Complex.h >+++ b/Eigen/src/Core/arch/SSE/Complex.h >@@ -430,13 +430,43 @@ template<> EIGEN_STRONG_INLINE Packet1cd > return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1)))); > } > > EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x) > { > return Packet1cd(preverse(x.v)); > } > >+template<int N> struct MetaPacket<Packet2cf, N> { >+ Packet2cf packet[N]; >+ >+ EIGEN_STRONG_INLINE void broadcast_from(const std::complex<float>* from) { >+ EIGEN_ASM_COMMENT("Start broadcast_from<Packet2cf>"); >+ const int k = N/2; >+ for (int i = 0; i < k*2; i+=2) { >+ packet[i+1] = pload<Packet2cf>(from+i); >+ packet[i+0].v = vec4f_swizzle1(packet[i+1].v, 0,1,0,1); >+ packet[i+1].v = vec4f_swizzle1(packet[i+1].v, 2,3,2,3); >+ } >+ for (int i = k*2; i < N; ++i) { >+ packet[i] = pload1<Packet2cf>(from+i); >+ } >+ EIGEN_ASM_COMMENT("Done broadcast_from<Packet2cf>"); >+ } >+}; >+ >+template<int N> struct MetaPacket<Packet1cd, N> { >+ Packet1cd packet[N]; >+ >+ EIGEN_STRONG_INLINE void broadcast_from(const std::complex<double>* from) { >+ EIGEN_ASM_COMMENT("Start broadcast_from<Packet1cd>"); >+ for (int i = 0; i < N; ++i) { >+ packet[i] = pload1<Packet1cd>(from+i); >+ } >+ EIGEN_ASM_COMMENT("Done broadcast_from<Packet1cd>"); >+ } >+}; >+ > } // end namespace internal > > } // end namespace Eigen > > #endif // EIGEN_COMPLEX_SSE_H >diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h >--- a/Eigen/src/Core/arch/SSE/PacketMath.h >+++ b/Eigen/src/Core/arch/SSE/PacketMath.h >@@ -658,13 +658,71 @@ struct palign_impl<Offset,Packet2d> > { > first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first))); > first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second))); > } > } > }; > #endif > >+template<int N> struct MetaPacket<Packet4f, N> { >+ Packet4f packet[N]; >+ >+ EIGEN_STRONG_INLINE void broadcast_from(const float* from) { >+ EIGEN_ASM_COMMENT("Start broadcast_from<Packet4f>"); >+ const int k = N/4; >+ for (int i = 0; i < k*4; i+=4) { >+ packet[i+3] = pload<Packet4f>(from+i); >+ packet[i+0] = vec4f_swizzle1(packet[i+3], 0,0,0,0); >+ packet[i+1] = vec4f_swizzle1(packet[i+3], 1,1,1,1); >+ packet[i+2] = vec4f_swizzle1(packet[i+3], 2,2,2,2); >+ packet[i+3] = vec4f_swizzle1(packet[i+3], 3,3,3,3); >+ } >+ for (int i = k*4; i < N; ++i) { >+ packet[i] = pload1<Packet4f>(from+i); >+ } >+ EIGEN_ASM_COMMENT("Done broadcast_from<Packet4f>"); >+ } >+}; >+ >+template<int N> struct MetaPacket<Packet2d, N> { >+ Packet2d packet[N]; >+ >+ EIGEN_STRONG_INLINE void broadcast_from(const double* from) { >+ EIGEN_ASM_COMMENT("Start broadcast_from<Packet2d"); >+ const int k = N/2; >+ for (int i = 0; i < k*2; i+=2) { >+ packet[i+1] = pload<Packet2d>(from+i); >+ packet[i+0] = vec2d_swizzle1(packet[i+1], 0,0); >+ packet[i+1] = vec2d_swizzle1(packet[i+1], 1,1); >+ } >+ for (int i = k*2; i < N; ++i) { >+ packet[i] = pload1<Packet2d>(from+i); >+ } >+ EIGEN_ASM_COMMENT("Done broadcast_from<Packet2d>"); >+ } >+}; >+ >+template<int N> struct MetaPacket<Packet4i, N> { >+ Packet4i packet[N]; >+ >+ EIGEN_STRONG_INLINE void broadcast_from(const int* from) { >+ EIGEN_ASM_COMMENT("Start broadcast_from<Packet4i>"); >+ const int k = N/4; >+ for (int i = 0; i < k*4; i+=4) { >+ packet[i+3] = pload<Packet4i>(from+i); >+ packet[i+0] = vec4i_swizzle1(packet[i+3], 0,0,0,0); >+ packet[i+1] = vec4i_swizzle1(packet[i+3], 1,1,1,1); >+ packet[i+2] = vec4i_swizzle1(packet[i+3], 2,2,2,2); >+ packet[i+3] = vec4i_swizzle1(packet[i+3], 3,3,3,3); >+ } >+ for (int i = k*4; i < N; ++i) { >+ packet[i] = pload1<Packet4i>(from+i); >+ } >+ EIGEN_ASM_COMMENT("Done broadcast_from<Packet4i>"); >+ } >+}; >+ > } // end namespace internal > > } // end namespace Eigen > > #endif // EIGEN_PACKET_MATH_SSE_H >diff --git a/test/packetmath.cpp b/test/packetmath.cpp >--- a/test/packetmath.cpp >+++ b/test/packetmath.cpp >@@ -198,16 +198,25 @@ template<typename Scalar> void packetmat > } > internal::pstore(data2, internal::preduxp(packets)); > VERIFY(areApproxAbs(ref, data2, PacketSize, refvalue) && "internal::preduxp"); > > for (int i=0; i<PacketSize; ++i) > ref[i] = data1[PacketSize-i-1]; > internal::pstore(data2, internal::preverse(internal::pload<Packet>(data1))); > VERIFY(areApprox(ref, data2, PacketSize) && "internal::preverse"); >+ >+ internal::MetaPacket<Packet, 2*PacketSize+1> meta; >+ meta.broadcast_from(data1); >+ for (int i = 0; i < PacketSize; ++i) { >+ internal::pstore(data2, meta.packet[i]); >+ for (int j = 0; j < PacketSize; ++j) { >+ VERIFY(isApproxAbs(data2[j], data1[i], refvalue)); >+ } >+ } > } > > template<typename Scalar> void packetmath_real() > { > using std::abs; > typedef typename internal::packet_traits<Scalar>::type Packet; > const int PacketSize = internal::packet_traits<Scalar>::size; >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 692
: 443 |
553