Bugzilla – Attachment 554 Details for
Bug 973
vectorization_logic fails on AVX
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
Log In
[x]
|
Forgot Password
Login:
[x]
This bugzilla service is closed. All entries have been migrated to
https://gitlab.com/libeigen/eigen
[patch]
re-enable vectorization of Vector4i and alignment of Vector4f
mixed_alignment_requirements.diff (text/plain), 14.27 KB, created by
Gael Guennebaud
on 2015-03-09 16:08:19 UTC
(
hide
)
Description:
re-enable vectorization of Vector4i and alignment of Vector4f
Filename:
MIME Type:
Creator:
Gael Guennebaud
Created:
2015-03-09 16:08:19 UTC
Size:
14.27 KB
patch
obsolete
>diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h >--- a/Eigen/src/Core/CoreEvaluators.h >+++ b/Eigen/src/Core/CoreEvaluators.h >@@ -642,21 +642,25 @@ struct evaluator<Map<PlainObjectType, Ma > OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 > ? int(PlainObjectType::OuterStrideAtCompileTime) > : int(StrideType::OuterStrideAtCompileTime), > HasNoInnerStride = InnerStrideAtCompileTime == 1, > HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, > HasNoStride = HasNoInnerStride && HasNoOuterStride, > IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), > IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, >+ >+ // TODO: should check for smaller packet types once we can handle multi-sized packet types >+ AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar), >+ > KeepsPacketAccess = bool(HasNoInnerStride) > && ( bool(IsDynamicSize) > || HasNoOuterStride > || ( OuterStrideAtCompileTime!=Dynamic >- && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ), >+ && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime) % AlignBytes)==0 ) ), > Flags0 = evaluator<PlainObjectType>::Flags, > Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), > Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) > ? int(Flags1) : int(Flags1 & ~LinearAccessBit), > Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit) > }; > > EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map) >@@ -712,17 +716,20 @@ struct evaluator<Block<ArgType, BlockRow > : int(outer_stride_at_compile_time<ArgType>::ret), > OuterStrideAtCompileTime = HasSameStorageOrderAsArgType > ? int(outer_stride_at_compile_time<ArgType>::ret) > : int(inner_stride_at_compile_time<ArgType>::ret), > MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0) > && (InnerStrideAtCompileTime == 1) > ? PacketAccessBit : 0, > >- MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0, >+ // TODO: should check for smaller packet types once we can handle multi-sized packet types >+ AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar), >+ >+ MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignedBit : 0, > FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0, > FlagsRowMajorBit = XprType::Flags&RowMajorBit, > Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) | > DirectAccessBit | > MaskPacketAccessBit | > MaskAlignedBit), > Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit > }; >@@ -820,22 +827,25 @@ protected: > // all action is via the data() as returned by the Block expression. > > template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> > struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAccess */ true> > : mapbase_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, > typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject> > { > typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType; >+ typedef typename XprType::Scalar Scalar; > > EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) > : mapbase_evaluator<XprType, typename XprType::PlainObject>(block) > { >+ // TODO: should check for smaller packet types once we can handle multi-sized packet types >+ const int AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar); > // FIXME this should be an internal assertion >- eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned"); >+ eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % AlignBytes) == 0) && "data is not aligned"); > } > }; > > > // -------------------- Select -------------------- > // TODO shall we introduce a ternary_evaluator? > > // TODO enable vectorization for Select >diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h >--- a/Eigen/src/Core/DenseStorage.h >+++ b/Eigen/src/Core/DenseStorage.h >@@ -29,24 +29,46 @@ EIGEN_DEVICE_FUNC > void check_static_allocation_size() > { > // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit > #if EIGEN_STACK_ALLOCATION_LIMIT > EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG); > #endif > } > >+template<typename T, int Size, typename Packet = typename packet_traits<T>::type, >+ bool Match = bool((Size%unpacket_traits<Packet>::size)==0), >+ bool TryHalf = bool(unpacket_traits<Packet>::size > Size) >+ && bool(unpacket_traits<Packet>::size > unpacket_traits<typename unpacket_traits<Packet>::half>::size) > >+struct compute_default_alignment >+{ >+ enum { value = 0 }; >+}; >+ >+template<typename T, int Size, typename Packet> >+struct compute_default_alignment<T, Size, Packet, true, false> // Match >+{ >+ enum { value = sizeof(T) * unpacket_traits<Packet>::size }; >+}; >+ >+template<typename T, int Size, typename Packet> >+struct compute_default_alignment<T, Size, Packet, false, true> >+{ >+ // current packet too large, try with an half-packet >+ enum { value = compute_default_alignment<T, Size, typename unpacket_traits<Packet>::half>::value }; >+}; >+ > /** \internal > * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned: > * to 16 bytes boundary if the total size is a multiple of 16 bytes. > */ > template <typename T, int Size, int MatrixOrArrayOptions, > int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0 >- : (((Size*sizeof(T))%EIGEN_ALIGN_BYTES)==0) ? EIGEN_ALIGN_BYTES >- : 0 > >+ : compute_default_alignment<T,Size>::value > >+// : (((Size*sizeof(T))%EIGEN_ALIGN_BYTES)==0) ? EIGEN_ALIGN_BYTES : 0 > > struct plain_array > { > T array[Size]; > > EIGEN_DEVICE_FUNC > plain_array() > { > check_static_allocation_size<T,Size>(); >@@ -76,24 +98,81 @@ struct plain_array > #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ > eigen_assert((reinterpret_cast<size_t>(array) & (sizemask)) == 0 \ > && "this assertion is explained here: " \ > "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ > " **** READ THIS WEB PAGE !!! ****"); > #endif > > template <typename T, int Size, int MatrixOrArrayOptions> >-struct plain_array<T, Size, MatrixOrArrayOptions, EIGEN_ALIGN_BYTES> >+struct plain_array<T, Size, MatrixOrArrayOptions, 8> > { >- EIGEN_USER_ALIGN_DEFAULT T array[Size]; >+ EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size]; > > EIGEN_DEVICE_FUNC > plain_array() > { >- EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(EIGEN_ALIGN_BYTES-1); >+ EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7); >+ check_static_allocation_size<T,Size>(); >+ } >+ >+ EIGEN_DEVICE_FUNC >+ plain_array(constructor_without_unaligned_array_assert) >+ { >+ check_static_allocation_size<T,Size>(); >+ } >+}; >+ >+template <typename T, int Size, int MatrixOrArrayOptions> >+struct plain_array<T, Size, MatrixOrArrayOptions, 16> >+{ >+ EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size]; >+ >+ EIGEN_DEVICE_FUNC >+ plain_array() >+ { >+ EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15); >+ check_static_allocation_size<T,Size>(); >+ } >+ >+ EIGEN_DEVICE_FUNC >+ plain_array(constructor_without_unaligned_array_assert) >+ { >+ check_static_allocation_size<T,Size>(); >+ } >+}; >+ >+template <typename T, int Size, int MatrixOrArrayOptions> >+struct plain_array<T, Size, MatrixOrArrayOptions, 32> >+{ >+ EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size]; >+ >+ EIGEN_DEVICE_FUNC >+ plain_array() >+ { >+ EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31); >+ check_static_allocation_size<T,Size>(); >+ } >+ >+ EIGEN_DEVICE_FUNC >+ plain_array(constructor_without_unaligned_array_assert) >+ { >+ check_static_allocation_size<T,Size>(); >+ } >+}; >+ >+template <typename T, int Size, int MatrixOrArrayOptions> >+struct plain_array<T, Size, MatrixOrArrayOptions, 64> >+{ >+ EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size]; >+ >+ EIGEN_DEVICE_FUNC >+ plain_array() >+ { >+ EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63); > check_static_allocation_size<T,Size>(); > } > > EIGEN_DEVICE_FUNC > plain_array(constructor_without_unaligned_array_assert) > { > check_static_allocation_size<T,Size>(); > } >diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h >--- a/Eigen/src/Core/util/Macros.h >+++ b/Eigen/src/Core/util/Macros.h >@@ -313,16 +313,19 @@ > #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1 > #else > #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0 > #endif > > // Defined the boundary (in bytes) on which the data needs to be aligned. Note > // that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be > // aligned at all regardless of the value of this #define. >+// TODO should be renamed EIGEN_MAXIMAL_ALIGN_BYTES, >+// for instance with AVX 1 EIGEN_MAXIMAL_ALIGN_BYTES=32 while for 'int' 16 bytes alignment is always enough, >+// and 16 bytes alignment is also enough for Vector4f. > #define EIGEN_ALIGN_BYTES 16 > > #ifdef EIGEN_DONT_ALIGN > #ifndef EIGEN_DONT_ALIGN_STATICALLY > #define EIGEN_DONT_ALIGN_STATICALLY > #endif > #define EIGEN_ALIGN 0 > #elif !defined(EIGEN_DONT_VECTORIZE) >diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h >--- a/Eigen/src/Core/util/XprHelper.h >+++ b/Eigen/src/Core/util/XprHelper.h >@@ -154,23 +154,26 @@ class compute_matrix_flags > }; > > template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> > class compute_matrix_evaluator_flags > { > enum { > row_major_bit = Options&RowMajor ? RowMajorBit : 0, > is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic, >+ >+ // TODO: should check for smaller packet types once we can handle multi-sized packet types >+ align_bytes = int(packet_traits<Scalar>::size) * sizeof(Scalar), > > aligned_bit = > ( > ((Options&DontAlign)==0) > && ( > #if EIGEN_ALIGN_STATICALLY >- ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) >+ ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0)) > #else > 0 > #endif > > || > > #if EIGEN_ALIGN > is_dynamic_size_storage >diff --git a/test/unalignedassert.cpp b/test/unalignedassert.cpp >--- a/test/unalignedassert.cpp >+++ b/test/unalignedassert.cpp >@@ -76,17 +76,17 @@ void construct_at_boundary(int boundary) > T *x = ::new(reinterpret_cast<void*>(_buf)) T; > x[0].setZero(); // just in order to silence warnings > x->~T(); > } > #endif > > void unalignedassert() > { >- #if EIGEN_ALIGN_STATICALLY >+#if EIGEN_ALIGN_STATICALLY > construct_at_boundary<Vector2f>(4); > construct_at_boundary<Vector3f>(4); > construct_at_boundary<Vector4f>(16); > construct_at_boundary<Matrix2f>(16); > construct_at_boundary<Matrix3f>(4); > construct_at_boundary<Matrix4f>(EIGEN_ALIGN_BYTES); > > construct_at_boundary<Vector2d>(16); >@@ -95,33 +95,34 @@ void unalignedassert() > construct_at_boundary<Matrix2d>(EIGEN_ALIGN_BYTES); > construct_at_boundary<Matrix3d>(4); > construct_at_boundary<Matrix4d>(EIGEN_ALIGN_BYTES); > > construct_at_boundary<Vector2cf>(16); > construct_at_boundary<Vector3cf>(4); > construct_at_boundary<Vector2cd>(EIGEN_ALIGN_BYTES); > construct_at_boundary<Vector3cd>(16); >- #endif >+#endif > > check_unalignedassert_good<TestNew1>(); > check_unalignedassert_good<TestNew2>(); > check_unalignedassert_good<TestNew3>(); > > check_unalignedassert_good<TestNew4>(); > check_unalignedassert_good<TestNew5>(); > check_unalignedassert_good<TestNew6>(); > check_unalignedassert_good<Depends<true> >(); > > #if EIGEN_ALIGN_STATICALLY >- if(EIGEN_ALIGN_BYTES==16) >+ if(EIGEN_ALIGN_BYTES>=16) > { > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4f>(8)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2d>(8)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2cf>(8)); >+ VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4i>(8)); > } > for(int b=8; b<EIGEN_ALIGN_BYTES; b+=8) > { > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8f>(b)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4f>(b)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4d>(b)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix2d>(b)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4d>(b)); >diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp >--- a/test/vectorization_logic.cpp >+++ b/test/vectorization_logic.cpp >@@ -209,17 +209,17 @@ template<typename Scalar, bool Enable = > >(InnerVectorizedTraversal,CompleteUnrolling))); > > VERIFY((test_assign< > Map<Matrix<Scalar,EIGEN_PLAIN_ENUM_MAX(2,PacketSize),EIGEN_PLAIN_ENUM_MAX(2,PacketSize)>, Aligned, InnerStride<3*PacketSize> >, > Matrix<Scalar,EIGEN_PLAIN_ENUM_MAX(2,PacketSize),EIGEN_PLAIN_ENUM_MAX(2,PacketSize)> > >(DefaultTraversal,CompleteUnrolling))); > > VERIFY((test_assign(Matrix11(), Matrix<Scalar,PacketSize,EIGEN_PLAIN_ENUM_MIN(2,PacketSize)>()*Matrix<Scalar,EIGEN_PLAIN_ENUM_MIN(2,PacketSize),PacketSize>(), >- PacketSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD?DefaultTraversal:InnerVectorizedTraversal, CompleteUnrolling))); >+ InnerVectorizedTraversal, CompleteUnrolling))); > #endif > > VERIFY(test_assign(MatrixXX(10,10),MatrixXX(20,20).block(10,10,2,3), > SliceVectorizedTraversal,NoUnrolling)); > > VERIFY(test_redux(VectorX(10), > LinearVectorizedTraversal,NoUnrolling)); >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 973
: 554 |
593