Bugzilla – Attachment 593 Details for
Bug 973
vectorization_logic fails on AVX
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
Log In
[x]
|
Forgot Password
Login:
[x]
This bugzilla service is closed. All entries have been migrated to
https://gitlab.com/libeigen/eigen
[patch]
Refactoring of the macro-level control of alignment.
macro-level_alignment_control.diff (text/plain), 43.76 KB, created by
Gael Guennebaud
on 2015-07-24 09:25:29 UTC
(
hide
)
Description:
Refactoring of the macro-level control of alignment.
Filename:
MIME Type:
Creator:
Gael Guennebaud
Created:
2015-07-24 09:25:29 UTC
Size:
43.76 KB
patch
obsolete
># HG changeset patch ># Parent b4218a6971a9de9dbdcd394dd08371e7848885a7 >Bug 973: update macro-level control of alignement by introducing user-controllable EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES macros. This changeset also removes EIGEN_ALIGN (replaced by EIGEN_MAX_ALIGN_BYTES>0), EIGEN_ALIGN_STATICALLY (replaced by EIGEN_MAX_STATIC_ALIGN_BYTES>0), EIGEN_USER_ALIGN*, EIGEN_ALIGN_DEFAULT (replaced by EIGEN_ALIGN_MAX). > >diff --git a/Eigen/Core b/Eigen/Core >--- a/Eigen/Core >+++ b/Eigen/Core >@@ -68,19 +68,19 @@ > #endif > > #include <complex> > > // this include file manages BLAS and MKL related macros > // and inclusion of their respective header files > #include "src/Core/util/MKL_support.h" > >-// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into >-// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks >-#if !EIGEN_ALIGN >+// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into >+// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks >+#if EIGEN_MAX_ALIGN_BYTES==0 > #ifndef EIGEN_DONT_VECTORIZE > #define EIGEN_DONT_VECTORIZE > #endif > #endif > > #if EIGEN_COMP_MSVC > #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled > #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later >diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h >--- a/Eigen/src/Core/CoreEvaluators.h >+++ b/Eigen/src/Core/CoreEvaluators.h >@@ -636,17 +636,17 @@ struct evaluator<Map<PlainObjectType, Ma > ? int(PlainObjectType::InnerStrideAtCompileTime) > : int(StrideType::InnerStrideAtCompileTime), > OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 > ? int(PlainObjectType::OuterStrideAtCompileTime) > : int(StrideType::OuterStrideAtCompileTime), > HasNoInnerStride = InnerStrideAtCompileTime == 1, > HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, > HasNoStride = HasNoInnerStride && HasNoOuterStride, >- IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), >+ IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&Aligned)==Aligned), > IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, > > // TODO: should check for smaller packet types once we can handle multi-sized packet types > AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar), > > KeepsPacketAccess = bool(HasNoInnerStride) > && ( bool(IsDynamicSize) > || HasNoOuterStride >diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h >--- a/Eigen/src/Core/DenseStorage.h >+++ b/Eigen/src/Core/DenseStorage.h >@@ -29,45 +29,44 @@ EIGEN_DEVICE_FUNC > void check_static_allocation_size() > { > // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit > #if EIGEN_STACK_ALLOCATION_LIMIT > EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG); > #endif > } > >-template<typename T, int Size, typename Packet = typename packet_traits<T>::type, >- bool Match = bool((Size%unpacket_traits<Packet>::size)==0), >- bool TryHalf = bool(int(unpacket_traits<Packet>::size) > 1) >- && bool(int(unpacket_traits<Packet>::size) > int(unpacket_traits<typename unpacket_traits<Packet>::half>::size)) > >+template<int ArrayBytes, int AlignmentBytes, >+ bool Match = bool((ArrayBytes%AlignmentBytes)==0), >+ bool TryHalf = bool(AlignmentBytes>EIGEN_MIN_ALIGN_BYTES) > > struct compute_default_alignment > { > enum { value = 0 }; > }; > >-template<typename T, int Size, typename Packet, bool TryHalf> >-struct compute_default_alignment<T, Size, Packet, true, TryHalf> // Match >+template<int ArrayBytes, int AlignmentBytes, bool TryHalf> >+struct compute_default_alignment<ArrayBytes, AlignmentBytes, true, TryHalf> // Match > { >- enum { value = sizeof(T) * unpacket_traits<Packet>::size }; >+ enum { value = AlignmentBytes }; > }; > >-template<typename T, int Size, typename Packet> >-struct compute_default_alignment<T, Size, Packet, false, true> // Try-half >+template<int ArrayBytes, int AlignmentBytes> >+struct compute_default_alignment<ArrayBytes, AlignmentBytes, false, true> // Try-half > { > // current packet too large, try with an half-packet >- enum { value = compute_default_alignment<T, Size, typename unpacket_traits<Packet>::half>::value }; >+ enum { value = compute_default_alignment<ArrayBytes, AlignmentBytes/2>::value }; > }; > > /** \internal > * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned: > * to 16 bytes boundary if the total size is a multiple of 16 bytes. > */ > template <typename T, int Size, int MatrixOrArrayOptions, > int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0 >- : compute_default_alignment<T,Size>::value > >+ : compute_default_alignment<Size*sizeof(T), EIGEN_PLAIN_ENUM_MAX(packet_traits<T>::size*sizeof(T), EIGEN_MAX_STATIC_ALIGN_BYTES) >::value > > struct plain_array > { > T array[Size]; > > EIGEN_DEVICE_FUNC > plain_array() > { > check_static_allocation_size<T,Size>(); >@@ -175,17 +174,17 @@ struct plain_array<T, Size, MatrixOrArra > { > check_static_allocation_size<T,Size>(); > } > }; > > template <typename T, int MatrixOrArrayOptions, int Alignment> > struct plain_array<T, 0, MatrixOrArrayOptions, Alignment> > { >- EIGEN_USER_ALIGN_DEFAULT T array[1]; >+ T array[1]; > EIGEN_DEVICE_FUNC plain_array() {} > EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {} > }; > > } // end namespace internal > > /** \internal > * >diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h >--- a/Eigen/src/Core/GeneralProduct.h >+++ b/Eigen/src/Core/GeneralProduct.h >@@ -178,30 +178,30 @@ template<typename Scalar,int Size> > struct gemv_static_vector_if<Scalar,Size,Dynamic,true> > { > EIGEN_STRONG_INLINE Scalar* data() { return 0; } > }; > > template<typename Scalar,int Size,int MaxSize> > struct gemv_static_vector_if<Scalar,Size,MaxSize,true> > { >- #if EIGEN_ALIGN_STATICALLY >+ #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0 > internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data; > EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } > #else > // Some architectures cannot align on the stack, > // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. > enum { > ForceAlignment = internal::packet_traits<Scalar>::Vectorizable, > PacketSize = internal::packet_traits<Scalar>::size > }; > internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data; > EIGEN_STRONG_INLINE Scalar* data() { > return ForceAlignment >- ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES) >+ ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES) > : m_data.array; > } > #endif > }; > > // The vector is on the left => transposition > template<int StorageOrder, bool BlasCompatible> > struct gemv_dense_sense_selector<OnTheLeft,StorageOrder,BlasCompatible> >diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h >--- a/Eigen/src/Core/Map.h >+++ b/Eigen/src/Core/Map.h >@@ -72,17 +72,17 @@ struct traits<Map<PlainObjectType, MapOp > typedef traits<PlainObjectType> TraitsBase; > enum { > InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 > ? int(PlainObjectType::InnerStrideAtCompileTime) > : int(StrideType::InnerStrideAtCompileTime), > OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 > ? int(PlainObjectType::OuterStrideAtCompileTime) > : int(StrideType::OuterStrideAtCompileTime), >- IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), >+ IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&Aligned)==Aligned), > Flags0 = TraitsBase::Flags & (~NestByRefBit), > Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit) > }; > private: > enum { Options }; // Expressions don't have Options > }; > } > >diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h >--- a/Eigen/src/Core/MapBase.h >+++ b/Eigen/src/Core/MapBase.h >@@ -155,17 +155,18 @@ template<typename Derived> class MapBase > checkSanity(); > } > > protected: > > EIGEN_DEVICE_FUNC > void checkSanity() const > { >- eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned"); >+ // TODO "IsAligned" should be replaced to handle arbitrary alignment >+ eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % EIGEN_MAX_ALIGN_BYTES) == 0) && "data is not aligned"); > } > > PointerType m_data; > const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows; > const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols; > }; > > template<typename Derived> class MapBase<Derived, WriteAccessors> >diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h >--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h >+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h >@@ -288,18 +288,18 @@ class gemm_blocking_space<StorageOrder,_ > typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar; > typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar; > typedef gebp_traits<LhsScalar,RhsScalar> Traits; > enum { > SizeA = ActualRows * MaxDepth, > SizeB = ActualCols * MaxDepth > }; > >- EIGEN_ALIGN_DEFAULT LhsScalar m_staticA[SizeA]; >- EIGEN_ALIGN_DEFAULT RhsScalar m_staticB[SizeB]; >+ EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA]; >+ EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB]; > > public: > > gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, Index /*num_threads*/, bool /*full_rows = false*/) > { > this->m_mc = ActualRows; > this->m_nc = ActualCols; > this->m_kc = MaxDepth; >diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h >--- a/Eigen/src/Core/products/GeneralMatrixVector.h >+++ b/Eigen/src/Core/products/GeneralMatrixVector.h >@@ -458,17 +458,18 @@ EIGEN_DONT_INLINE void general_matrix_ve > } > > const Index offset1 = (FirstAligned && alignmentStep==1?3:1); > const Index offset3 = (FirstAligned && alignmentStep==1?1:3); > > Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows; > for (Index i=skipRows; i<rowBound; i+=rowsAtOnce) > { >- EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0); >+ // FIXME: what is the purpose of this EIGEN_ALIGN_DEFAULT ?? >+ EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0); > ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0); > > // this helps the compiler generating good binary code > const LhsScalars lhs0 = lhs.getVectorMapper(i+0, 0), lhs1 = lhs.getVectorMapper(i+offset1, 0), > lhs2 = lhs.getVectorMapper(i+2, 0), lhs3 = lhs.getVectorMapper(i+offset3, 0); > > if (Vectorizable) > { >@@ -567,17 +568,17 @@ EIGEN_DONT_INLINE void general_matrix_ve > > // process remaining first and last rows (at most columnsAtOnce-1) > Index end = rows; > Index start = rowBound; > do > { > for (Index i=start; i<end; ++i) > { >- EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0); >+ EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0); > ResPacket ptmp0 = pset1<ResPacket>(tmp0); > const LhsScalars lhs0 = lhs.getVectorMapper(i, 0); > // process first unaligned result's coeffs > // FIXME this loop get vectorized by the compiler ! > for (Index j=0; j<alignedStart; ++j) > tmp0 += cj.pmul(lhs0(j), rhs(j, 0)); > > if (alignedSize>alignedStart) >diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h >--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h >+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h >@@ -269,17 +269,17 @@ EIGEN_DONT_INLINE void product_triangula > LhsMapper lhs(_lhs,lhsStride); > RhsMapper rhs(_rhs,rhsStride); > ResMapper res(_res, resStride); > > Index kc = blocking.kc(); // cache block size along the K direction > Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction > > std::size_t sizeA = kc*mc; >- std::size_t sizeB = kc*cols+EIGEN_ALIGN_BYTES/sizeof(Scalar); >+ std::size_t sizeB = kc*cols+EIGEN_MAX_ALIGN_BYTES/sizeof(Scalar); > > ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); > ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); > > Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer; > triangularBuffer.setZero(); > if((Mode&ZeroDiag)==ZeroDiag) > triangularBuffer.diagonal().setZero(); >@@ -306,17 +306,17 @@ EIGEN_DONT_INLINE void product_triangula > } > > // remaining size > Index rs = IsLower ? (std::min)(cols,actual_k2) : cols - k2; > // size of the triangular part > Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc; > > Scalar* geb = blockB+ts*ts; >- geb = geb + internal::first_aligned(geb,EIGEN_ALIGN_BYTES/sizeof(Scalar)); >+ geb = geb + internal::first_aligned(geb,EIGEN_MAX_ALIGN_BYTES/sizeof(Scalar)); > > pack_rhs(geb, rhs.getSubMapper(actual_k2,IsLower ? 0 : k2), actual_kc, rs); > > // pack the triangular part of the rhs padding the unrolled blocks with zeros > if(ts>0) > { > for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth) > { >diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h >--- a/Eigen/src/Core/util/Macros.h >+++ b/Eigen/src/Core/util/Macros.h >@@ -1,12 +1,12 @@ > // This file is part of Eigen, a lightweight C++ template library > // for linear algebra. > // >-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> >+// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr> > // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> > // > // This Source Code Form is subject to the terms of the Mozilla > // Public License v. 2.0. If a copy of the MPL was not distributed > // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. > > #ifndef EIGEN_MACROS_H > #define EIGEN_MACROS_H >@@ -301,78 +301,20 @@ > > #if EIGEN_GNUC_AT_MOST(4,3) && !EIGEN_COMP_CLANG > // see bug 89 > #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0 > #else > #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1 > #endif > >-// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable >-// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always >-// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in >-// certain common platform (compiler+architecture combinations) to avoid these problems. >-// Only static alignment is really problematic (relies on nonstandard compiler extensions that don't >-// work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even >-// when we have to disable static alignment. >-#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64) >-#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 >-#else >-#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0 >-#endif >- >-// static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX >-#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \ >- && !EIGEN_GCC3_OR_OLDER \ >- && !EIGEN_COMP_SUNCC \ >- && !EIGEN_OS_QNX >- #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1 >-#else >- #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0 >-#endif >- >-// Defined the boundary (in bytes) on which the data needs to be aligned. Note >-// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be >-// aligned at all regardless of the value of this #define. >-// TODO should be renamed EIGEN_MAXIMAL_ALIGN_BYTES, >-// for instance with AVX 1 EIGEN_MAXIMAL_ALIGN_BYTES=32 while for 'int' 16 bytes alignment is always enough, >-// and 16 bytes alignment is also enough for Vector4f. >-#define EIGEN_ALIGN_BYTES 16 >- >-#ifdef EIGEN_DONT_ALIGN >- #ifndef EIGEN_DONT_ALIGN_STATICALLY >- #define EIGEN_DONT_ALIGN_STATICALLY >- #endif >- #define EIGEN_ALIGN 0 >-#elif !defined(EIGEN_DONT_VECTORIZE) >- #if defined(__AVX__) >- #undef EIGEN_ALIGN_BYTES >- #define EIGEN_ALIGN_BYTES 32 >- #endif >- #define EIGEN_ALIGN 1 >-#else >- #define EIGEN_ALIGN 0 >-#endif >- >- > // This macro can be used to prevent from macro expansion, e.g.: > // std::max EIGEN_NOT_A_MACRO(a,b) > #define EIGEN_NOT_A_MACRO > >-// EIGEN_ALIGN_STATICALLY is the true test whether we want to align arrays on the stack or not. It takes into account both the user choice to explicitly disable >-// alignment (EIGEN_DONT_ALIGN_STATICALLY) and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). Henceforth, only EIGEN_ALIGN_STATICALLY should be used. >-#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT && !defined(EIGEN_DONT_ALIGN_STATICALLY) >- #define EIGEN_ALIGN_STATICALLY 1 >-#else >- #define EIGEN_ALIGN_STATICALLY 0 >- #ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT >- #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT >- #endif >-#endif >- > #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR > #define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor > #else > #define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::ColMajor > #endif > > #ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE > #define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t >@@ -580,16 +522,30 @@ namespace Eigen { > #if !defined(EIGEN_ASM_COMMENT) > #if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64) > #define EIGEN_ASM_COMMENT(X) __asm__("#" X) > #else > #define EIGEN_ASM_COMMENT(X) > #endif > #endif > >+ >+//------------------------------------------------------------------------------------------ >+// Static and dynamic alignment control >+// >+// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES >+// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively. >+// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not, >+// a default value is automatically computed based on architecture, compiler, and OS. >+// >+// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX} >+// to be used to declare statically aligned buffers. >+//------------------------------------------------------------------------------------------ >+ >+ > /* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements. > * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled, > * so that vectorization doesn't affect binary compatibility. > * > * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link > * vectorized and non-vectorized code. > */ > #if (defined __CUDACC__) >@@ -600,33 +556,122 @@ namespace Eigen { > #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n)) > #elif EIGEN_COMP_SUNCC > // FIXME not sure about this one: > #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) > #else > #error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler > #endif > >+// If the user explicitly disable vectorization, then we also disable alignment >+#if defined(EIGEN_DONT_VECTORIZE) >+ #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0 >+#elif defined(__AVX__) >+ // 32 bytes static alignmeent is preferred only if really required >+ #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32 >+#else >+ #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16 >+#endif >+ >+ >+// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense >+#define EIGEN_MIN_ALIGN_BYTES 16 >+ >+// Defined the boundary (in bytes) on which the data needs to be aligned. Note >+// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be >+// aligned at all regardless of the value of this #define. >+ >+#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0 >+#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY. >+#endif >+ >+// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprectated >+// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0 >+#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN) >+ #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES >+ #undef EIGEN_MAX_STATIC_ALIGN_BYTES >+ #endif >+ #define EIGEN_MAX_STATIC_ALIGN_BYTES 0 >+#endif >+ >+#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES >+ >+ // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES >+ >+ // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable >+ // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always >+ // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in >+ // certain common platform (compiler+architecture combinations) to avoid these problems. >+ // Only static alignment is really problematic (relies on nonstandard compiler extensions that don't >+ // work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even >+ // when we have to disable static alignment. >+ #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64) >+ #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 >+ #else >+ #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0 >+ #endif >+ >+ // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX >+ #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \ >+ && !EIGEN_GCC3_OR_OLDER \ >+ && !EIGEN_COMP_SUNCC \ >+ && !EIGEN_OS_QNX >+ #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1 >+ #else >+ #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0 >+ #endif >+ >+ #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT >+ #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES >+ #endif >+ >+#endif >+ >+// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_ALIGN_BYTES >+#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES >+#undef EIGEN_MAX_STATIC_ALIGN_BYTES >+#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES >+#endif >+ >+#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT) >+ #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT >+#endif >+ >+// At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not. >+// It takes into account both the user choice to explicitly enable/disable alignment (by settting EIGEN_MAX_STATIC_ALIGN_BYTES) >+// and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). >+// Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used. >+ >+ >+// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY > #define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8) > #define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16) > #define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32) >-#define EIGEN_ALIGN_DEFAULT EIGEN_ALIGN_TO_BOUNDARY(EIGEN_ALIGN_BYTES) >+#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64) >+#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_ALIGN_BYTES) > >-#if EIGEN_ALIGN_STATICALLY >-#define EIGEN_USER_ALIGN_TO_BOUNDARY(n) EIGEN_ALIGN_TO_BOUNDARY(n) >-#define EIGEN_USER_ALIGN16 EIGEN_ALIGN16 >-#define EIGEN_USER_ALIGN32 EIGEN_ALIGN32 >-#define EIGEN_USER_ALIGN_DEFAULT EIGEN_ALIGN_DEFAULT >-#else >-#define EIGEN_USER_ALIGN_TO_BOUNDARY(n) >-#define EIGEN_USER_ALIGN16 >-#define EIGEN_USER_ALIGN32 >-#define EIGEN_USER_ALIGN_DEFAULT >+ >+// Dynamic alignment control >+ >+#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0 >+#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN. > #endif > >+#ifdef EIGEN_DONT_ALIGN >+ #ifdef EIGEN_MAX_ALIGN_BYTES >+ #undef EIGEN_MAX_ALIGN_BYTES >+ #endif >+ #define EIGEN_MAX_ALIGN_BYTES 0 >+#elif !defined(EIGEN_MAX_ALIGN_BYTES) >+ #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES >+#endif >+ >+//---------------------------------------------------------------------- >+ >+ > #ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD > #define EIGEN_RESTRICT > #endif > #ifndef EIGEN_RESTRICT > #define EIGEN_RESTRICT __restrict > #endif > > #ifndef EIGEN_STACK_ALLOCATION_LIMIT >diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h >--- a/Eigen/src/Core/util/Memory.h >+++ b/Eigen/src/Core/util/Memory.h >@@ -1,12 +1,12 @@ > // This file is part of Eigen, a lightweight C++ template library > // for linear algebra. > // >-// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> >+// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr> > // Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com> > // Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com> > // Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com> > // Copyright (C) 2010 Thomas Capricelli <orzel@freehackers.org> > // Copyright (C) 2013 Pavel Holoborodko <pavel@holoborodko.com> > // > // This Source Code Form is subject to the terms of the Mozilla > // Public License v. 2.0. If a copy of the MPL was not distributed >@@ -27,34 +27,34 @@ > // On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see: > // http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html > // This is true at least since glibc 2.8. > // This leaves the question how to detect 64-bit. According to this document, > // http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf > // page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed > // quite safe, at least within the context of glibc, to equate 64-bit with LP64. > #if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \ >- && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_ALIGN_BYTES == 16) >+ && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_MAX_ALIGN_BYTES == 16) > #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1 > #else > #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0 > #endif > > // FreeBSD 6 seems to have 16-byte aligned malloc > // See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup > // FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures > // See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup >-#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_ALIGN_BYTES == 16) >+#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_MAX_ALIGN_BYTES == 16) > #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1 > #else > #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0 > #endif > >-#if (EIGEN_OS_MAC && (EIGEN_ALIGN_BYTES == 16)) \ >- || (EIGEN_OS_WIN64 && (EIGEN_ALIGN_BYTES == 16)) \ >+#if (EIGEN_OS_MAC && (EIGEN_MAX_ALIGN_BYTES == 16)) \ >+ || (EIGEN_OS_WIN64 && (EIGEN_MAX_ALIGN_BYTES == 16)) \ > || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \ > || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED > #define EIGEN_MALLOC_ALREADY_ALIGNED 1 > #else > #define EIGEN_MALLOC_ALREADY_ALIGNED 0 > #endif > > #endif >@@ -102,19 +102,19 @@ inline void throw_std_bad_alloc() > > /* ----- Hand made implementations of aligned malloc/free and realloc ----- */ > > /** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned. > * Fast, but wastes 16 additional bytes of memory. Does not throw any exception. > */ > inline void* handmade_aligned_malloc(std::size_t size) > { >- void *original = std::malloc(size+EIGEN_ALIGN_BYTES); >+ void *original = std::malloc(size+EIGEN_MAX_ALIGN_BYTES); > if (original == 0) return 0; >- void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES); >+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES); > *(reinterpret_cast<void**>(aligned) - 1) = original; > return aligned; > } > > /** \internal Frees memory allocated with handmade_aligned_malloc */ > inline void handmade_aligned_free(void *ptr) > { > if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1)); >@@ -125,19 +125,19 @@ inline void handmade_aligned_free(void * > * Since we know that our handmade version is based on std::realloc > * we can use std::realloc to implement efficient reallocation. > */ > inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0) > { > if (ptr == 0) return handmade_aligned_malloc(size); > void *original = *(reinterpret_cast<void**>(ptr) - 1); > std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original); >- original = std::realloc(original,size+EIGEN_ALIGN_BYTES); >+ original = std::realloc(original,size+EIGEN_MAX_ALIGN_BYTES); > if (original == 0) return 0; >- void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES); >+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES); > void *previous_aligned = static_cast<char *>(original)+previous_offset; > if(aligned!=previous_aligned) > std::memmove(aligned, previous_aligned, size); > > *(reinterpret_cast<void**>(aligned) - 1) = original; > return aligned; > } > >@@ -213,40 +213,40 @@ EIGEN_DEVICE_FUNC inline void check_that > /** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements. > * On allocation error, the returned pointer is null, and std::bad_alloc is thrown. > */ > EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size) > { > check_that_malloc_is_allowed(); > > void *result; >- #if !EIGEN_ALIGN >+ #if EIGEN_MAX_ALIGN_BYTES==0 > result = std::malloc(size); > #elif EIGEN_MALLOC_ALREADY_ALIGNED > result = std::malloc(size); > #elif EIGEN_HAS_POSIX_MEMALIGN >- if(posix_memalign(&result, EIGEN_ALIGN_BYTES, size)) result = 0; >+ if(posix_memalign(&result, EIGEN_MAX_ALIGN_BYTES, size)) result = 0; > #elif EIGEN_HAS_MM_MALLOC >- result = _mm_malloc(size, EIGEN_ALIGN_BYTES); >+ result = _mm_malloc(size, EIGEN_MAX_ALIGN_BYTES); > #elif EIGEN_OS_WIN_STRICT >- result = _aligned_malloc(size, EIGEN_ALIGN_BYTES); >+ result = _aligned_malloc(size, EIGEN_MAX_ALIGN_BYTES); > #else > result = handmade_aligned_malloc(size); > #endif > > if(!result && size) > throw_std_bad_alloc(); > > return result; > } > > /** \internal Frees memory allocated with aligned_malloc. */ > EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr) > { >- #if !EIGEN_ALIGN >+ #if EIGEN_MAX_ALIGN_BYTES==0 > std::free(ptr); > #elif EIGEN_MALLOC_ALREADY_ALIGNED > std::free(ptr); > #elif EIGEN_HAS_POSIX_MEMALIGN > std::free(ptr); > #elif EIGEN_HAS_MM_MALLOC > _mm_free(ptr); > #elif EIGEN_OS_WIN_STRICT >@@ -261,33 +261,33 @@ EIGEN_DEVICE_FUNC inline void aligned_fr > * \brief Reallocates an aligned block of memory. > * \throws std::bad_alloc on allocation failure > **/ > inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size) > { > EIGEN_UNUSED_VARIABLE(old_size); > > void *result; >-#if !EIGEN_ALIGN >+#if EIGEN_MAX_ALIGN_BYTES==0 > result = std::realloc(ptr,new_size); > #elif EIGEN_MALLOC_ALREADY_ALIGNED > result = std::realloc(ptr,new_size); > #elif EIGEN_HAS_POSIX_MEMALIGN > result = generic_aligned_realloc(ptr,new_size,old_size); > #elif EIGEN_HAS_MM_MALLOC > // The defined(_mm_free) is just here to verify that this MSVC version > // implements _mm_malloc/_mm_free based on the corresponding _aligned_ > // functions. This may not always be the case and we just try to be safe. > #if EIGEN_OS_WIN_STRICT && defined(_mm_free) >- result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES); >+ result = _aligned_realloc(ptr,new_size,EIGEN_MAX_ALIGN_BYTES); > #else > result = generic_aligned_realloc(ptr,new_size,old_size); > #endif > #elif EIGEN_OS_WIN_STRICT >- result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES); >+ result = _aligned_realloc(ptr,new_size,EIGEN_MAX_ALIGN_BYTES); > #else > result = handmade_aligned_realloc(ptr,new_size,old_size); > #endif > > if (!result && new_size) > throw_std_bad_alloc(); > > return result; >@@ -686,17 +686,17 @@ template<typename T> void swap(scoped_ar > * // use data[0] to data[size-1] > * } > * \endcode > * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token. > */ > #ifdef EIGEN_ALLOCA > // We always manually re-align the result of EIGEN_ALLOCA. > // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment. >- #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+EIGEN_ALIGN_BYTES-1)) + EIGEN_ALIGN_BYTES-1) & ~(size_t(EIGEN_ALIGN_BYTES-1))) >+ #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+EIGEN_MAX_ALIGN_BYTES-1)) + EIGEN_MAX_ALIGN_BYTES-1) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) > > #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \ > Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \ > TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \ > : reinterpret_cast<TYPE*>( \ > (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \ > : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \ > Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) >@@ -710,17 +710,17 @@ template<typename T> void swap(scoped_ar > > #endif > > > /***************************************************************************** > *** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] *** > *****************************************************************************/ > >-#if EIGEN_ALIGN >+#if EIGEN_MAX_ALIGN_BYTES!=0 > #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ > void* operator new(size_t size, const std::nothrow_t&) throw() { \ > EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \ > EIGEN_CATCH (...) { return 0; } \ > } > #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \ > void *operator new(size_t size) { \ > return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \ >@@ -746,17 +746,17 @@ template<typename T> void swap(scoped_ar > } \ > typedef void eigen_aligned_operator_new_marker_type; > #else > #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) > #endif > > #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true) > #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \ >- EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_ALIGN_BYTES==0))) >+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0))) > > /****************************************************************************/ > > /** \class aligned_allocator > * \ingroup Core_Module > * > * \brief STL compatible allocator to use with with 16 byte aligned types > * >diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h >--- a/Eigen/src/Core/util/XprHelper.h >+++ b/Eigen/src/Core/util/XprHelper.h >@@ -162,25 +162,25 @@ class compute_matrix_evaluator_flags > > // TODO: should check for smaller packet types once we can handle multi-sized packet types > align_bytes = int(packet_traits<Scalar>::size) * sizeof(Scalar), > > aligned_bit = > ( > ((Options&DontAlign)==0) > && ( >-#if EIGEN_ALIGN_STATICALLY >+#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0 > ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0)) > #else > 0 > #endif > > || > >-#if EIGEN_ALIGN >+#if EIGEN_MAX_ALIGN_BYTES!=0 > is_dynamic_size_storage > #else > 0 > #endif > > ) > ) ? AlignedBit : 0, > packet_access_bit = packet_traits<Scalar>::Vectorizable && aligned_bit ? PacketAccessBit : 0 >diff --git a/test/sizeof.cpp b/test/sizeof.cpp >--- a/test/sizeof.cpp >+++ b/test/sizeof.cpp >@@ -16,16 +16,27 @@ template<typename MatrixType> void verif > VERIFY_IS_EQUAL(std::ptrdiff_t(sizeof(MatrixType)),std::ptrdiff_t(sizeof(Scalar))*std::ptrdiff_t(MatrixType::SizeAtCompileTime)); > else > VERIFY_IS_EQUAL(sizeof(MatrixType),sizeof(Scalar*) + 2 * sizeof(typename MatrixType::Index)); > } > > void test_sizeof() > { > CALL_SUBTEST(verifySizeOf(Matrix<float, 1, 1>()) ); >+ CALL_SUBTEST(verifySizeOf(Array<float, 2, 1>()) ); >+ CALL_SUBTEST(verifySizeOf(Array<float, 3, 1>()) ); >+ CALL_SUBTEST(verifySizeOf(Array<float, 4, 1>()) ); >+ CALL_SUBTEST(verifySizeOf(Array<float, 5, 1>()) ); >+ CALL_SUBTEST(verifySizeOf(Array<float, 6, 1>()) ); >+ CALL_SUBTEST(verifySizeOf(Array<float, 7, 1>()) ); >+ CALL_SUBTEST(verifySizeOf(Array<float, 8, 1>()) ); >+ CALL_SUBTEST(verifySizeOf(Array<float, 9, 1>()) ); >+ CALL_SUBTEST(verifySizeOf(Array<float, 10, 1>()) ); >+ CALL_SUBTEST(verifySizeOf(Array<float, 11, 1>()) ); >+ CALL_SUBTEST(verifySizeOf(Array<float, 12, 1>()) ); > CALL_SUBTEST(verifySizeOf(Vector2d()) ); > CALL_SUBTEST(verifySizeOf(Vector4f()) ); > CALL_SUBTEST(verifySizeOf(Matrix4d()) ); > CALL_SUBTEST(verifySizeOf(Matrix<double, 4, 2>()) ); > CALL_SUBTEST(verifySizeOf(Matrix<bool, 7, 5>()) ); > CALL_SUBTEST(verifySizeOf(MatrixXcf(3, 3)) ); > CALL_SUBTEST(verifySizeOf(MatrixXi(8, 12)) ); > CALL_SUBTEST(verifySizeOf(MatrixXcd(20, 20)) ); >diff --git a/test/unalignedassert.cpp b/test/unalignedassert.cpp >--- a/test/unalignedassert.cpp >+++ b/test/unalignedassert.cpp >@@ -1,17 +1,31 @@ > // This file is part of Eigen, a lightweight C++ template library > // for linear algebra. > // > // Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com> >+// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr> > // > // This Source Code Form is subject to the terms of the Mozilla > // Public License v. 2.0. If a copy of the MPL was not distributed > // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. > >+#if defined(EIGEN_TEST_PART_1) >+ // default >+#elif defined(EIGEN_TEST_PART_2) >+ #define EIGEN_MAX_STATIC_ALIGN_BYTES 16 >+ #define EIGEN_MAX_ALIGN_BYTES 16 >+#elif defined(EIGEN_TEST_PART_3) >+ #define EIGEN_MAX_STATIC_ALIGN_BYTES 32 >+ #define EIGEN_MAX_ALIGN_BYTES 32 >+#elif defined(EIGEN_TEST_PART_4) >+ #define EIGEN_MAX_STATIC_ALIGN_BYTES 64 >+ #define EIGEN_MAX_ALIGN_BYTES 64 >+#endif >+ > #include "main.h" > > typedef Matrix<float, 6,1> Vector6f; > typedef Matrix<float, 8,1> Vector8f; > typedef Matrix<float, 12,1> Vector12f; > > typedef Matrix<double, 5,1> Vector5d; > typedef Matrix<double, 6,1> Vector6d; >@@ -43,17 +57,17 @@ struct TestNew4 > EIGEN_MAKE_ALIGNED_OPERATOR_NEW > Vector2d m; > float f; // make the struct have sizeof%16!=0 to make it a little more tricky when we allow an array of 2 such objects > }; > > struct TestNew5 > { > EIGEN_MAKE_ALIGNED_OPERATOR_NEW >- float f; // try the f at first -- the EIGEN_ALIGN16 attribute of m should make that still work >+ float f; // try the f at first -- the EIGEN_ALIGN_MAX attribute of m should make that still work > Matrix4f m; > }; > > struct TestNew6 > { > Matrix<float,2,2,DontAlign> m; // good: no alignment requested > float f; > }; >@@ -70,88 +84,88 @@ void check_unalignedassert_good() > { > T *x, *y; > x = new T; > delete x; > y = new T[2]; > delete[] y; > } > >-#if EIGEN_ALIGN_STATICALLY >+#if EIGEN_MAX_STATIC_ALIGN_BYTES>0 > template<typename T> > void construct_at_boundary(int boundary) > { > char buf[sizeof(T)+256]; > size_t _buf = reinterpret_cast<size_t>(buf); >- _buf += (EIGEN_ALIGN_BYTES - (_buf % EIGEN_ALIGN_BYTES)); // make 16/32-byte aligned >+ _buf += (EIGEN_MAX_ALIGN_BYTES - (_buf % EIGEN_MAX_ALIGN_BYTES)); // make 16/32/...-byte aligned > _buf += boundary; // make exact boundary-aligned > T *x = ::new(reinterpret_cast<void*>(_buf)) T; > x[0].setZero(); // just in order to silence warnings > x->~T(); > } > #endif > > void unalignedassert() > { >-#if EIGEN_ALIGN_STATICALLY >+#if EIGEN_MAX_STATIC_ALIGN_BYTES>0 > construct_at_boundary<Vector2f>(4); > construct_at_boundary<Vector3f>(4); > construct_at_boundary<Vector4f>(16); > construct_at_boundary<Vector6f>(4); >- construct_at_boundary<Vector8f>(EIGEN_ALIGN_BYTES); >+ construct_at_boundary<Vector8f>(EIGEN_MAX_ALIGN_BYTES); > construct_at_boundary<Vector12f>(16); > construct_at_boundary<Matrix2f>(16); > construct_at_boundary<Matrix3f>(4); >- construct_at_boundary<Matrix4f>(EIGEN_ALIGN_BYTES); >+ construct_at_boundary<Matrix4f>(EIGEN_MAX_ALIGN_BYTES); > > construct_at_boundary<Vector2d>(16); > construct_at_boundary<Vector3d>(4); >- construct_at_boundary<Vector4d>(EIGEN_ALIGN_BYTES); >+ construct_at_boundary<Vector4d>(EIGEN_MAX_ALIGN_BYTES); > construct_at_boundary<Vector5d>(4); > construct_at_boundary<Vector6d>(16); > construct_at_boundary<Vector7d>(4); >- construct_at_boundary<Vector8d>(EIGEN_ALIGN_BYTES); >+ construct_at_boundary<Vector8d>(EIGEN_MAX_ALIGN_BYTES); > construct_at_boundary<Vector9d>(4); > construct_at_boundary<Vector10d>(16); >- construct_at_boundary<Vector12d>(EIGEN_ALIGN_BYTES); >- construct_at_boundary<Matrix2d>(EIGEN_ALIGN_BYTES); >+ construct_at_boundary<Vector12d>(EIGEN_MAX_ALIGN_BYTES); >+ construct_at_boundary<Matrix2d>(EIGEN_MAX_ALIGN_BYTES); > construct_at_boundary<Matrix3d>(4); >- construct_at_boundary<Matrix4d>(EIGEN_ALIGN_BYTES); >+ construct_at_boundary<Matrix4d>(EIGEN_MAX_ALIGN_BYTES); > > construct_at_boundary<Vector2cf>(16); > construct_at_boundary<Vector3cf>(4); >- construct_at_boundary<Vector2cd>(EIGEN_ALIGN_BYTES); >+ construct_at_boundary<Vector2cd>(EIGEN_MAX_ALIGN_BYTES); > construct_at_boundary<Vector3cd>(16); > #endif > > check_unalignedassert_good<TestNew1>(); > check_unalignedassert_good<TestNew2>(); > check_unalignedassert_good<TestNew3>(); > > check_unalignedassert_good<TestNew4>(); > check_unalignedassert_good<TestNew5>(); > check_unalignedassert_good<TestNew6>(); > check_unalignedassert_good<Depends<true> >(); > >-#if EIGEN_ALIGN_STATICALLY >- if(EIGEN_ALIGN_BYTES>=16) >+#if EIGEN_MAX_STATIC_ALIGN_BYTES>0 >+ if(EIGEN_MAX_ALIGN_BYTES>=16) > { > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4f>(8)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8f>(8)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector12f>(8)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2d>(8)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4d>(8)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector6d>(8)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8d>(8)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector10d>(8)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector12d>(8)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2cf>(8)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4i>(8)); > } >- for(int b=8; b<EIGEN_ALIGN_BYTES; b+=8) >+ for(int b=8; b<EIGEN_MAX_ALIGN_BYTES; b+=8) > { > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8f>(b)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4f>(b)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4d>(b)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix2d>(b)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4d>(b)); > VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2cd>(b)); > }
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 973
:
554
|
593