This bugzilla service is closed. All entries have been migrated to https://gitlab.com/libeigen/eigen
View | Details | Raw Unified | Return to bug 973 | Differences between
and this patch

Collapse All | Expand All

(-)a/Eigen/Core (-3 / +3 lines)
Lines 68-86 Link Here
68
#endif
68
#endif
69
69
70
#include <complex>
70
#include <complex>
71
71
72
// this include file manages BLAS and MKL related macros
72
// this include file manages BLAS and MKL related macros
73
// and inclusion of their respective header files
73
// and inclusion of their respective header files
74
#include "src/Core/util/MKL_support.h"
74
#include "src/Core/util/MKL_support.h"
75
75
76
// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into
76
// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
77
// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks
77
// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
78
#if !EIGEN_ALIGN
78
#if EIGEN_MAX_ALIGN_BYTES==0
79
  #ifndef EIGEN_DONT_VECTORIZE
79
  #ifndef EIGEN_DONT_VECTORIZE
80
    #define EIGEN_DONT_VECTORIZE
80
    #define EIGEN_DONT_VECTORIZE
81
  #endif
81
  #endif
82
#endif
82
#endif
83
83
84
#if EIGEN_COMP_MSVC
84
#if EIGEN_COMP_MSVC
85
  #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
85
  #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
86
  #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later
86
  #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later
(-)a/Eigen/src/Core/CoreEvaluators.h (-1 / +1 lines)
Lines 636-652 struct evaluator<Map<PlainObjectType, Ma Link Here
636
                             ? int(PlainObjectType::InnerStrideAtCompileTime)
636
                             ? int(PlainObjectType::InnerStrideAtCompileTime)
637
                             : int(StrideType::InnerStrideAtCompileTime),
637
                             : int(StrideType::InnerStrideAtCompileTime),
638
    OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
638
    OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
639
                             ? int(PlainObjectType::OuterStrideAtCompileTime)
639
                             ? int(PlainObjectType::OuterStrideAtCompileTime)
640
                             : int(StrideType::OuterStrideAtCompileTime),
640
                             : int(StrideType::OuterStrideAtCompileTime),
641
    HasNoInnerStride = InnerStrideAtCompileTime == 1,
641
    HasNoInnerStride = InnerStrideAtCompileTime == 1,
642
    HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
642
    HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
643
    HasNoStride = HasNoInnerStride && HasNoOuterStride,
643
    HasNoStride = HasNoInnerStride && HasNoOuterStride,
644
    IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
644
    IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&Aligned)==Aligned),
645
    IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
645
    IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
646
    
646
    
647
    // TODO: should check for smaller packet types once we can handle multi-sized packet types
647
    // TODO: should check for smaller packet types once we can handle multi-sized packet types
648
    AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
648
    AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
649
    
649
    
650
    KeepsPacketAccess = bool(HasNoInnerStride)
650
    KeepsPacketAccess = bool(HasNoInnerStride)
651
                        && ( bool(IsDynamicSize)
651
                        && ( bool(IsDynamicSize)
652
                           || HasNoOuterStride
652
                           || HasNoOuterStride
(-)a/Eigen/src/Core/DenseStorage.h (-12 / +11 lines)
Lines 29-73 EIGEN_DEVICE_FUNC Link Here
29
void check_static_allocation_size()
29
void check_static_allocation_size()
30
{
30
{
31
  // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
31
  // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
32
  #if EIGEN_STACK_ALLOCATION_LIMIT
32
  #if EIGEN_STACK_ALLOCATION_LIMIT
33
  EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
33
  EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
34
  #endif
34
  #endif
35
}
35
}
36
36
37
template<typename T, int Size, typename Packet = typename packet_traits<T>::type,
37
template<int ArrayBytes, int AlignmentBytes,
38
         bool Match     =  bool((Size%unpacket_traits<Packet>::size)==0),
38
         bool Match     =  bool((ArrayBytes%AlignmentBytes)==0),
39
         bool TryHalf   =  bool(int(unpacket_traits<Packet>::size) > 1)
39
         bool TryHalf   =  bool(AlignmentBytes>EIGEN_MIN_ALIGN_BYTES) >
40
                        && bool(int(unpacket_traits<Packet>::size) > int(unpacket_traits<typename unpacket_traits<Packet>::half>::size)) >
41
struct compute_default_alignment
40
struct compute_default_alignment
42
{
41
{
43
  enum { value = 0 };
42
  enum { value = 0 };
44
};
43
};
45
44
46
template<typename T, int Size, typename Packet, bool TryHalf>
45
template<int ArrayBytes, int AlignmentBytes, bool TryHalf>
47
struct compute_default_alignment<T, Size, Packet, true, TryHalf> // Match
46
struct compute_default_alignment<ArrayBytes, AlignmentBytes, true, TryHalf> // Match
48
{
47
{
49
  enum { value = sizeof(T) * unpacket_traits<Packet>::size };
48
  enum { value = AlignmentBytes };
50
};
49
};
51
50
52
template<typename T, int Size, typename Packet>
51
template<int ArrayBytes, int AlignmentBytes>
53
struct compute_default_alignment<T, Size, Packet, false, true> // Try-half
52
struct compute_default_alignment<ArrayBytes, AlignmentBytes, false, true> // Try-half
54
{
53
{
55
  // current packet too large, try with an half-packet
54
  // current packet too large, try with an half-packet
56
  enum { value = compute_default_alignment<T, Size, typename unpacket_traits<Packet>::half>::value };
55
  enum { value = compute_default_alignment<ArrayBytes, AlignmentBytes/2>::value };
57
};
56
};
58
57
59
/** \internal
58
/** \internal
60
  * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned:
59
  * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned:
61
  * to 16 bytes boundary if the total size is a multiple of 16 bytes.
60
  * to 16 bytes boundary if the total size is a multiple of 16 bytes.
62
  */
61
  */
63
template <typename T, int Size, int MatrixOrArrayOptions,
62
template <typename T, int Size, int MatrixOrArrayOptions,
64
          int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
63
          int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
65
                        : compute_default_alignment<T,Size>::value >
64
                        : compute_default_alignment<Size*sizeof(T), EIGEN_PLAIN_ENUM_MAX(packet_traits<T>::size*sizeof(T), EIGEN_MAX_STATIC_ALIGN_BYTES) >::value >
66
struct plain_array
65
struct plain_array
67
{
66
{
68
  T array[Size];
67
  T array[Size];
69
68
70
  EIGEN_DEVICE_FUNC
69
  EIGEN_DEVICE_FUNC
71
  plain_array()
70
  plain_array()
72
  { 
71
  { 
73
    check_static_allocation_size<T,Size>();
72
    check_static_allocation_size<T,Size>();
Lines 175-191 struct plain_array<T, Size, MatrixOrArra Link Here
175
  { 
174
  { 
176
    check_static_allocation_size<T,Size>();
175
    check_static_allocation_size<T,Size>();
177
  }
176
  }
178
};
177
};
179
178
180
template <typename T, int MatrixOrArrayOptions, int Alignment>
179
template <typename T, int MatrixOrArrayOptions, int Alignment>
181
struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
180
struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
182
{
181
{
183
  EIGEN_USER_ALIGN_DEFAULT T array[1];
182
  T array[1];
184
  EIGEN_DEVICE_FUNC plain_array() {}
183
  EIGEN_DEVICE_FUNC plain_array() {}
185
  EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
184
  EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
186
};
185
};
187
186
188
} // end namespace internal
187
} // end namespace internal
189
188
190
/** \internal
189
/** \internal
191
  *
190
  *
(-)a/Eigen/src/Core/GeneralProduct.h (-2 / +2 lines)
Lines 178-207 template<typename Scalar,int Size> Link Here
178
struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
178
struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
179
{
179
{
180
  EIGEN_STRONG_INLINE Scalar* data() { return 0; }
180
  EIGEN_STRONG_INLINE Scalar* data() { return 0; }
181
};
181
};
182
182
183
template<typename Scalar,int Size,int MaxSize>
183
template<typename Scalar,int Size,int MaxSize>
184
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
184
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
185
{
185
{
186
  #if EIGEN_ALIGN_STATICALLY
186
  #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
187
  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
187
  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
188
  EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
188
  EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
189
  #else
189
  #else
190
  // Some architectures cannot align on the stack,
190
  // Some architectures cannot align on the stack,
191
  // => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
191
  // => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
192
  enum {
192
  enum {
193
    ForceAlignment  = internal::packet_traits<Scalar>::Vectorizable,
193
    ForceAlignment  = internal::packet_traits<Scalar>::Vectorizable,
194
    PacketSize      = internal::packet_traits<Scalar>::size
194
    PacketSize      = internal::packet_traits<Scalar>::size
195
  };
195
  };
196
  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
196
  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
197
  EIGEN_STRONG_INLINE Scalar* data() {
197
  EIGEN_STRONG_INLINE Scalar* data() {
198
    return ForceAlignment
198
    return ForceAlignment
199
            ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES)
199
            ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
200
            : m_data.array;
200
            : m_data.array;
201
  }
201
  }
202
  #endif
202
  #endif
203
};
203
};
204
204
205
// The vector is on the left => transposition
205
// The vector is on the left => transposition
206
template<int StorageOrder, bool BlasCompatible>
206
template<int StorageOrder, bool BlasCompatible>
207
struct gemv_dense_sense_selector<OnTheLeft,StorageOrder,BlasCompatible>
207
struct gemv_dense_sense_selector<OnTheLeft,StorageOrder,BlasCompatible>
(-)a/Eigen/src/Core/Map.h (-1 / +1 lines)
Lines 72-88 struct traits<Map<PlainObjectType, MapOp Link Here
72
  typedef traits<PlainObjectType> TraitsBase;
72
  typedef traits<PlainObjectType> TraitsBase;
73
  enum {
73
  enum {
74
    InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
74
    InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
75
                             ? int(PlainObjectType::InnerStrideAtCompileTime)
75
                             ? int(PlainObjectType::InnerStrideAtCompileTime)
76
                             : int(StrideType::InnerStrideAtCompileTime),
76
                             : int(StrideType::InnerStrideAtCompileTime),
77
    OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
77
    OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
78
                             ? int(PlainObjectType::OuterStrideAtCompileTime)
78
                             ? int(PlainObjectType::OuterStrideAtCompileTime)
79
                             : int(StrideType::OuterStrideAtCompileTime),
79
                             : int(StrideType::OuterStrideAtCompileTime),
80
    IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
80
    IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&Aligned)==Aligned),
81
    Flags0 = TraitsBase::Flags & (~NestByRefBit),
81
    Flags0 = TraitsBase::Flags & (~NestByRefBit),
82
    Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
82
    Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
83
  };
83
  };
84
private:
84
private:
85
  enum { Options }; // Expressions don't have Options
85
  enum { Options }; // Expressions don't have Options
86
};
86
};
87
}
87
}
88
88
(-)a/Eigen/src/Core/MapBase.h (-1 / +2 lines)
Lines 155-171 template<typename Derived> class MapBase Link Here
155
      checkSanity();
155
      checkSanity();
156
    }
156
    }
157
157
158
  protected:
158
  protected:
159
159
160
    EIGEN_DEVICE_FUNC
160
    EIGEN_DEVICE_FUNC
161
    void checkSanity() const
161
    void checkSanity() const
162
    {
162
    {
163
      eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned");
163
      // TODO "IsAligned" should be replaced to handle arbitrary alignment
164
      eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % EIGEN_MAX_ALIGN_BYTES) == 0) && "data is not aligned");
164
    }
165
    }
165
166
166
    PointerType m_data;
167
    PointerType m_data;
167
    const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
168
    const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
168
    const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
169
    const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
169
};
170
};
170
171
171
template<typename Derived> class MapBase<Derived, WriteAccessors>
172
template<typename Derived> class MapBase<Derived, WriteAccessors>
(-)a/Eigen/src/Core/products/GeneralMatrixMatrix.h (-2 / +2 lines)
Lines 288-305 class gemm_blocking_space<StorageOrder,_ Link Here
288
    typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
288
    typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
289
    typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
289
    typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
290
    typedef gebp_traits<LhsScalar,RhsScalar> Traits;
290
    typedef gebp_traits<LhsScalar,RhsScalar> Traits;
291
    enum {
291
    enum {
292
      SizeA = ActualRows * MaxDepth,
292
      SizeA = ActualRows * MaxDepth,
293
      SizeB = ActualCols * MaxDepth
293
      SizeB = ActualCols * MaxDepth
294
    };
294
    };
295
295
296
    EIGEN_ALIGN_DEFAULT LhsScalar m_staticA[SizeA];
296
    EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA];
297
    EIGEN_ALIGN_DEFAULT RhsScalar m_staticB[SizeB];
297
    EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB];
298
298
299
  public:
299
  public:
300
300
301
    gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, Index /*num_threads*/, bool /*full_rows = false*/)
301
    gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, Index /*num_threads*/, bool /*full_rows = false*/)
302
    {
302
    {
303
      this->m_mc = ActualRows;
303
      this->m_mc = ActualRows;
304
      this->m_nc = ActualCols;
304
      this->m_nc = ActualCols;
305
      this->m_kc = MaxDepth;
305
      this->m_kc = MaxDepth;
(-)a/Eigen/src/Core/products/GeneralMatrixVector.h (-2 / +3 lines)
Lines 458-474 EIGEN_DONT_INLINE void general_matrix_ve Link Here
458
  }
458
  }
459
459
460
  const Index offset1 = (FirstAligned && alignmentStep==1?3:1);
460
  const Index offset1 = (FirstAligned && alignmentStep==1?3:1);
461
  const Index offset3 = (FirstAligned && alignmentStep==1?1:3);
461
  const Index offset3 = (FirstAligned && alignmentStep==1?1:3);
462
462
463
  Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
463
  Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
464
  for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
464
  for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
465
  {
465
  {
466
    EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
466
    // FIXME: what is the purpose of this EIGEN_ALIGN_DEFAULT ??
467
    EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
467
    ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
468
    ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
468
469
469
    // this helps the compiler generating good binary code
470
    // this helps the compiler generating good binary code
470
    const LhsScalars lhs0 = lhs.getVectorMapper(i+0, 0),    lhs1 = lhs.getVectorMapper(i+offset1, 0),
471
    const LhsScalars lhs0 = lhs.getVectorMapper(i+0, 0),    lhs1 = lhs.getVectorMapper(i+offset1, 0),
471
                     lhs2 = lhs.getVectorMapper(i+2, 0),    lhs3 = lhs.getVectorMapper(i+offset3, 0);
472
                     lhs2 = lhs.getVectorMapper(i+2, 0),    lhs3 = lhs.getVectorMapper(i+offset3, 0);
472
473
473
    if (Vectorizable)
474
    if (Vectorizable)
474
    {
475
    {
Lines 567-583 EIGEN_DONT_INLINE void general_matrix_ve Link Here
567
568
568
  // process remaining first and last rows (at most columnsAtOnce-1)
569
  // process remaining first and last rows (at most columnsAtOnce-1)
569
  Index end = rows;
570
  Index end = rows;
570
  Index start = rowBound;
571
  Index start = rowBound;
571
  do
572
  do
572
  {
573
  {
573
    for (Index i=start; i<end; ++i)
574
    for (Index i=start; i<end; ++i)
574
    {
575
    {
575
      EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
576
      EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
576
      ResPacket ptmp0 = pset1<ResPacket>(tmp0);
577
      ResPacket ptmp0 = pset1<ResPacket>(tmp0);
577
      const LhsScalars lhs0 = lhs.getVectorMapper(i, 0);
578
      const LhsScalars lhs0 = lhs.getVectorMapper(i, 0);
578
      // process first unaligned result's coeffs
579
      // process first unaligned result's coeffs
579
      // FIXME this loop get vectorized by the compiler !
580
      // FIXME this loop get vectorized by the compiler !
580
      for (Index j=0; j<alignedStart; ++j)
581
      for (Index j=0; j<alignedStart; ++j)
581
        tmp0 += cj.pmul(lhs0(j), rhs(j, 0));
582
        tmp0 += cj.pmul(lhs0(j), rhs(j, 0));
582
583
583
      if (alignedSize>alignedStart)
584
      if (alignedSize>alignedStart)
(-)a/Eigen/src/Core/products/TriangularMatrixMatrix.h (-2 / +2 lines)
Lines 269-285 EIGEN_DONT_INLINE void product_triangula Link Here
269
    LhsMapper lhs(_lhs,lhsStride);
269
    LhsMapper lhs(_lhs,lhsStride);
270
    RhsMapper rhs(_rhs,rhsStride);
270
    RhsMapper rhs(_rhs,rhsStride);
271
    ResMapper res(_res, resStride);
271
    ResMapper res(_res, resStride);
272
272
273
    Index kc = blocking.kc();                   // cache block size along the K direction
273
    Index kc = blocking.kc();                   // cache block size along the K direction
274
    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
274
    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
275
275
276
    std::size_t sizeA = kc*mc;
276
    std::size_t sizeA = kc*mc;
277
    std::size_t sizeB = kc*cols+EIGEN_ALIGN_BYTES/sizeof(Scalar);
277
    std::size_t sizeB = kc*cols+EIGEN_MAX_ALIGN_BYTES/sizeof(Scalar);
278
278
279
    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
279
    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
280
    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
280
    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
281
281
282
    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
282
    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
283
    triangularBuffer.setZero();
283
    triangularBuffer.setZero();
284
    if((Mode&ZeroDiag)==ZeroDiag)
284
    if((Mode&ZeroDiag)==ZeroDiag)
285
      triangularBuffer.diagonal().setZero();
285
      triangularBuffer.diagonal().setZero();
Lines 306-322 EIGEN_DONT_INLINE void product_triangula Link Here
306
      }
306
      }
307
307
308
      // remaining size
308
      // remaining size
309
      Index rs = IsLower ? (std::min)(cols,actual_k2) : cols - k2;
309
      Index rs = IsLower ? (std::min)(cols,actual_k2) : cols - k2;
310
      // size of the triangular part
310
      // size of the triangular part
311
      Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
311
      Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
312
312
313
      Scalar* geb = blockB+ts*ts;
313
      Scalar* geb = blockB+ts*ts;
314
      geb = geb + internal::first_aligned(geb,EIGEN_ALIGN_BYTES/sizeof(Scalar));
314
      geb = geb + internal::first_aligned(geb,EIGEN_MAX_ALIGN_BYTES/sizeof(Scalar));
315
315
316
      pack_rhs(geb, rhs.getSubMapper(actual_k2,IsLower ? 0 : k2), actual_kc, rs);
316
      pack_rhs(geb, rhs.getSubMapper(actual_k2,IsLower ? 0 : k2), actual_kc, rs);
317
317
318
      // pack the triangular part of the rhs padding the unrolled blocks with zeros
318
      // pack the triangular part of the rhs padding the unrolled blocks with zeros
319
      if(ts>0)
319
      if(ts>0)
320
      {
320
      {
321
        for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
321
        for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
322
        {
322
        {
(-)a/Eigen/src/Core/util/Macros.h (-70 / +115 lines)
Lines 1-12 Link Here
1
// This file is part of Eigen, a lightweight C++ template library
1
// This file is part of Eigen, a lightweight C++ template library
2
// for linear algebra.
2
// for linear algebra.
3
//
3
//
4
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
4
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
5
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
5
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6
//
6
//
7
// This Source Code Form is subject to the terms of the Mozilla
7
// This Source Code Form is subject to the terms of the Mozilla
8
// Public License v. 2.0. If a copy of the MPL was not distributed
8
// Public License v. 2.0. If a copy of the MPL was not distributed
9
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
10
11
#ifndef EIGEN_MACROS_H
11
#ifndef EIGEN_MACROS_H
12
#define EIGEN_MACROS_H
12
#define EIGEN_MACROS_H
Lines 301-378 Link Here
301
301
302
#if EIGEN_GNUC_AT_MOST(4,3) && !EIGEN_COMP_CLANG
302
#if EIGEN_GNUC_AT_MOST(4,3) && !EIGEN_COMP_CLANG
303
  // see bug 89
303
  // see bug 89
304
  #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0
304
  #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0
305
#else
305
#else
306
  #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1
306
  #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1
307
#endif
307
#endif
308
308
309
// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
310
// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
311
// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
312
// certain common platform (compiler+architecture combinations) to avoid these problems.
313
// Only static alignment is really problematic (relies on nonstandard compiler extensions that don't
314
// work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even
315
// when we have to disable static alignment.
316
#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
317
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
318
#else
319
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
320
#endif
321
322
// static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
323
#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
324
 && !EIGEN_GCC3_OR_OLDER \
325
 && !EIGEN_COMP_SUNCC \
326
 && !EIGEN_OS_QNX
327
  #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
328
#else
329
  #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
330
#endif
331
332
// Defined the boundary (in bytes) on which the data needs to be aligned. Note
333
// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
334
// aligned at all regardless of the value of this #define.
335
// TODO should be renamed EIGEN_MAXIMAL_ALIGN_BYTES,
336
//      for instance with AVX 1 EIGEN_MAXIMAL_ALIGN_BYTES=32 while for 'int' 16 bytes alignment is always enough,
337
//      and 16 bytes alignment is also enough for Vector4f.
338
#define EIGEN_ALIGN_BYTES 16
339
340
#ifdef EIGEN_DONT_ALIGN
341
  #ifndef EIGEN_DONT_ALIGN_STATICALLY
342
    #define EIGEN_DONT_ALIGN_STATICALLY
343
  #endif
344
  #define EIGEN_ALIGN 0
345
#elif !defined(EIGEN_DONT_VECTORIZE)
346
  #if defined(__AVX__)
347
    #undef EIGEN_ALIGN_BYTES
348
    #define EIGEN_ALIGN_BYTES 32
349
  #endif
350
  #define EIGEN_ALIGN 1
351
#else
352
  #define EIGEN_ALIGN 0
353
#endif
354
355
356
// This macro can be used to prevent from macro expansion, e.g.:
309
// This macro can be used to prevent from macro expansion, e.g.:
357
//   std::max EIGEN_NOT_A_MACRO(a,b)
310
//   std::max EIGEN_NOT_A_MACRO(a,b)
358
#define EIGEN_NOT_A_MACRO
311
#define EIGEN_NOT_A_MACRO
359
312
360
// EIGEN_ALIGN_STATICALLY is the true test whether we want to align arrays on the stack or not. It takes into account both the user choice to explicitly disable
361
// alignment (EIGEN_DONT_ALIGN_STATICALLY) and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). Henceforth, only EIGEN_ALIGN_STATICALLY should be used.
362
#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT && !defined(EIGEN_DONT_ALIGN_STATICALLY)
363
  #define EIGEN_ALIGN_STATICALLY 1
364
#else
365
  #define EIGEN_ALIGN_STATICALLY 0
366
  #ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
367
    #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
368
  #endif
369
#endif
370
371
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
313
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
372
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor
314
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor
373
#else
315
#else
374
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::ColMajor
316
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::ColMajor
375
#endif
317
#endif
376
318
377
#ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE
319
#ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE
378
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t
320
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t
Lines 580-595 namespace Eigen { Link Here
580
#if !defined(EIGEN_ASM_COMMENT)
522
#if !defined(EIGEN_ASM_COMMENT)
581
  #if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64)
523
  #if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64)
582
    #define EIGEN_ASM_COMMENT(X)  __asm__("#" X)
524
    #define EIGEN_ASM_COMMENT(X)  __asm__("#" X)
583
  #else
525
  #else
584
    #define EIGEN_ASM_COMMENT(X)
526
    #define EIGEN_ASM_COMMENT(X)
585
  #endif
527
  #endif
586
#endif
528
#endif
587
529
530
531
//------------------------------------------------------------------------------------------
532
// Static and dynamic alignment control
533
// 
534
// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES
535
// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.
536
// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,
537
// a default value is automatically computed based on architecture, compiler, and OS.
538
// 
539
// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}
540
// to be used to declare statically aligned buffers.
541
//------------------------------------------------------------------------------------------
542
543
588
/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
544
/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
589
 * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
545
 * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
590
 * so that vectorization doesn't affect binary compatibility.
546
 * so that vectorization doesn't affect binary compatibility.
591
 *
547
 *
592
 * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
548
 * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
593
 * vectorized and non-vectorized code.
549
 * vectorized and non-vectorized code.
594
 */
550
 */
595
#if (defined __CUDACC__)
551
#if (defined __CUDACC__)
Lines 600-632 namespace Eigen { Link Here
600
  #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
556
  #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
601
#elif EIGEN_COMP_SUNCC
557
#elif EIGEN_COMP_SUNCC
602
  // FIXME not sure about this one:
558
  // FIXME not sure about this one:
603
  #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
559
  #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
604
#else
560
#else
605
  #error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler
561
  #error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler
606
#endif
562
#endif
607
563
564
// If the user explicitly disable vectorization, then we also disable alignment
565
#if defined(EIGEN_DONT_VECTORIZE)
566
  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
567
#elif defined(__AVX__)
568
  // 32 bytes static alignmeent is preferred only if really required
569
  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
570
#else
571
  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
572
#endif
573
574
575
// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense
576
#define EIGEN_MIN_ALIGN_BYTES 16
577
578
// Defined the boundary (in bytes) on which the data needs to be aligned. Note
579
// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
580
// aligned at all regardless of the value of this #define.
581
582
#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN))  && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
583
#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
584
#endif
585
586
// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprectated
587
// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0
588
#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
589
  #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
590
    #undef EIGEN_MAX_STATIC_ALIGN_BYTES
591
  #endif
592
  #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
593
#endif
594
595
#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
596
597
  // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES
598
  
599
  // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
600
  // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
601
  // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
602
  // certain common platform (compiler+architecture combinations) to avoid these problems.
603
  // Only static alignment is really problematic (relies on nonstandard compiler extensions that don't
604
  // work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even
605
  // when we have to disable static alignment.
606
  #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
607
  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
608
  #else
609
  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
610
  #endif
611
612
  // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
613
  #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
614
  && !EIGEN_GCC3_OR_OLDER \
615
  && !EIGEN_COMP_SUNCC \
616
  && !EIGEN_OS_QNX
617
    #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
618
  #else
619
    #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
620
  #endif
621
  
622
  #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
623
    #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
624
  #endif
625
  
626
#endif
627
628
// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_ALIGN_BYTES
629
#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
630
#undef EIGEN_MAX_STATIC_ALIGN_BYTES
631
#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
632
#endif
633
634
#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
635
  #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
636
#endif
637
638
// At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.
639
// It takes into account both the user choice to explicitly enable/disable alignment (by settting EIGEN_MAX_STATIC_ALIGN_BYTES)
640
// and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT).
641
// Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used.
642
643
644
// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY
608
#define EIGEN_ALIGN8  EIGEN_ALIGN_TO_BOUNDARY(8)
645
#define EIGEN_ALIGN8  EIGEN_ALIGN_TO_BOUNDARY(8)
609
#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
646
#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
610
#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
647
#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
611
#define EIGEN_ALIGN_DEFAULT EIGEN_ALIGN_TO_BOUNDARY(EIGEN_ALIGN_BYTES)
648
#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
649
#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_ALIGN_BYTES)
612
650
613
#if EIGEN_ALIGN_STATICALLY
651
614
#define EIGEN_USER_ALIGN_TO_BOUNDARY(n) EIGEN_ALIGN_TO_BOUNDARY(n)
652
// Dynamic alignment control
615
#define EIGEN_USER_ALIGN16 EIGEN_ALIGN16
653
616
#define EIGEN_USER_ALIGN32 EIGEN_ALIGN32
654
#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
617
#define EIGEN_USER_ALIGN_DEFAULT EIGEN_ALIGN_DEFAULT
655
#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
618
#else
619
#define EIGEN_USER_ALIGN_TO_BOUNDARY(n)
620
#define EIGEN_USER_ALIGN16
621
#define EIGEN_USER_ALIGN32
622
#define EIGEN_USER_ALIGN_DEFAULT
623
#endif
656
#endif
624
657
658
#ifdef EIGEN_DONT_ALIGN
659
  #ifdef EIGEN_MAX_ALIGN_BYTES
660
    #undef EIGEN_MAX_ALIGN_BYTES
661
  #endif
662
  #define EIGEN_MAX_ALIGN_BYTES 0
663
#elif !defined(EIGEN_MAX_ALIGN_BYTES)
664
  #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
665
#endif
666
667
//----------------------------------------------------------------------
668
669
625
#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
670
#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
626
  #define EIGEN_RESTRICT
671
  #define EIGEN_RESTRICT
627
#endif
672
#endif
628
#ifndef EIGEN_RESTRICT
673
#ifndef EIGEN_RESTRICT
629
  #define EIGEN_RESTRICT __restrict
674
  #define EIGEN_RESTRICT __restrict
630
#endif
675
#endif
631
676
632
#ifndef EIGEN_STACK_ALLOCATION_LIMIT
677
#ifndef EIGEN_STACK_ALLOCATION_LIMIT
(-)a/Eigen/src/Core/util/Memory.h (-20 / +20 lines)
Lines 1-12 Link Here
1
// This file is part of Eigen, a lightweight C++ template library
1
// This file is part of Eigen, a lightweight C++ template library
2
// for linear algebra.
2
// for linear algebra.
3
//
3
//
4
// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
4
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
5
// Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
5
// Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
6
// Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
6
// Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
7
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
7
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
8
// Copyright (C) 2010 Thomas Capricelli <orzel@freehackers.org>
8
// Copyright (C) 2010 Thomas Capricelli <orzel@freehackers.org>
9
// Copyright (C) 2013 Pavel Holoborodko <pavel@holoborodko.com>
9
// Copyright (C) 2013 Pavel Holoborodko <pavel@holoborodko.com>
10
//
10
//
11
// This Source Code Form is subject to the terms of the Mozilla
11
// This Source Code Form is subject to the terms of the Mozilla
12
// Public License v. 2.0. If a copy of the MPL was not distributed
12
// Public License v. 2.0. If a copy of the MPL was not distributed
Lines 27-60 Link Here
27
// On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
27
// On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
28
//   http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
28
//   http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
29
// This is true at least since glibc 2.8.
29
// This is true at least since glibc 2.8.
30
// This leaves the question how to detect 64-bit. According to this document,
30
// This leaves the question how to detect 64-bit. According to this document,
31
//   http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf
31
//   http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf
32
// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
32
// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
33
// quite safe, at least within the context of glibc, to equate 64-bit with LP64.
33
// quite safe, at least within the context of glibc, to equate 64-bit with LP64.
34
#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
34
#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
35
 && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_ALIGN_BYTES == 16)
35
 && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_MAX_ALIGN_BYTES == 16)
36
  #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
36
  #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
37
#else
37
#else
38
  #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
38
  #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
39
#endif
39
#endif
40
40
41
// FreeBSD 6 seems to have 16-byte aligned malloc
41
// FreeBSD 6 seems to have 16-byte aligned malloc
42
//   See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
42
//   See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
43
// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
43
// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
44
//   See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
44
//   See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
45
#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_ALIGN_BYTES == 16)
45
#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_MAX_ALIGN_BYTES == 16)
46
  #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
46
  #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
47
#else
47
#else
48
  #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
48
  #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
49
#endif
49
#endif
50
50
51
#if (EIGEN_OS_MAC && (EIGEN_ALIGN_BYTES == 16))     \
51
#if (EIGEN_OS_MAC && (EIGEN_MAX_ALIGN_BYTES == 16))     \
52
 || (EIGEN_OS_WIN64 && (EIGEN_ALIGN_BYTES == 16))   \
52
 || (EIGEN_OS_WIN64 && (EIGEN_MAX_ALIGN_BYTES == 16))   \
53
 || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED              \
53
 || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED              \
54
 || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
54
 || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
55
  #define EIGEN_MALLOC_ALREADY_ALIGNED 1
55
  #define EIGEN_MALLOC_ALREADY_ALIGNED 1
56
#else
56
#else
57
  #define EIGEN_MALLOC_ALREADY_ALIGNED 0
57
  #define EIGEN_MALLOC_ALREADY_ALIGNED 0
58
#endif
58
#endif
59
59
60
#endif
60
#endif
Lines 102-120 inline void throw_std_bad_alloc() Link Here
102
102
103
/* ----- Hand made implementations of aligned malloc/free and realloc ----- */
103
/* ----- Hand made implementations of aligned malloc/free and realloc ----- */
104
104
105
/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
105
/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
106
  * Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
106
  * Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
107
  */
107
  */
108
inline void* handmade_aligned_malloc(std::size_t size)
108
inline void* handmade_aligned_malloc(std::size_t size)
109
{
109
{
110
  void *original = std::malloc(size+EIGEN_ALIGN_BYTES);
110
  void *original = std::malloc(size+EIGEN_MAX_ALIGN_BYTES);
111
  if (original == 0) return 0;
111
  if (original == 0) return 0;
112
  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES);
112
  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES);
113
  *(reinterpret_cast<void**>(aligned) - 1) = original;
113
  *(reinterpret_cast<void**>(aligned) - 1) = original;
114
  return aligned;
114
  return aligned;
115
}
115
}
116
116
117
/** \internal Frees memory allocated with handmade_aligned_malloc */
117
/** \internal Frees memory allocated with handmade_aligned_malloc */
118
inline void handmade_aligned_free(void *ptr)
118
inline void handmade_aligned_free(void *ptr)
119
{
119
{
120
  if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
120
  if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
Lines 125-143 inline void handmade_aligned_free(void * Link Here
125
  * Since we know that our handmade version is based on std::realloc
125
  * Since we know that our handmade version is based on std::realloc
126
  * we can use std::realloc to implement efficient reallocation.
126
  * we can use std::realloc to implement efficient reallocation.
127
  */
127
  */
128
inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
128
inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
129
{
129
{
130
  if (ptr == 0) return handmade_aligned_malloc(size);
130
  if (ptr == 0) return handmade_aligned_malloc(size);
131
  void *original = *(reinterpret_cast<void**>(ptr) - 1);
131
  void *original = *(reinterpret_cast<void**>(ptr) - 1);
132
  std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
132
  std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
133
  original = std::realloc(original,size+EIGEN_ALIGN_BYTES);
133
  original = std::realloc(original,size+EIGEN_MAX_ALIGN_BYTES);
134
  if (original == 0) return 0;
134
  if (original == 0) return 0;
135
  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES);
135
  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES);
136
  void *previous_aligned = static_cast<char *>(original)+previous_offset;
136
  void *previous_aligned = static_cast<char *>(original)+previous_offset;
137
  if(aligned!=previous_aligned)
137
  if(aligned!=previous_aligned)
138
    std::memmove(aligned, previous_aligned, size);
138
    std::memmove(aligned, previous_aligned, size);
139
  
139
  
140
  *(reinterpret_cast<void**>(aligned) - 1) = original;
140
  *(reinterpret_cast<void**>(aligned) - 1) = original;
141
  return aligned;
141
  return aligned;
142
}
142
}
143
143
Lines 213-252 EIGEN_DEVICE_FUNC inline void check_that Link Here
213
/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements.
213
/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements.
214
  * On allocation error, the returned pointer is null, and std::bad_alloc is thrown.
214
  * On allocation error, the returned pointer is null, and std::bad_alloc is thrown.
215
  */
215
  */
216
EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size)
216
EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size)
217
{
217
{
218
  check_that_malloc_is_allowed();
218
  check_that_malloc_is_allowed();
219
219
220
  void *result;
220
  void *result;
221
  #if !EIGEN_ALIGN
221
  #if EIGEN_MAX_ALIGN_BYTES==0
222
    result = std::malloc(size);
222
    result = std::malloc(size);
223
  #elif EIGEN_MALLOC_ALREADY_ALIGNED
223
  #elif EIGEN_MALLOC_ALREADY_ALIGNED
224
    result = std::malloc(size);
224
    result = std::malloc(size);
225
  #elif EIGEN_HAS_POSIX_MEMALIGN
225
  #elif EIGEN_HAS_POSIX_MEMALIGN
226
    if(posix_memalign(&result, EIGEN_ALIGN_BYTES, size)) result = 0;
226
    if(posix_memalign(&result, EIGEN_MAX_ALIGN_BYTES, size)) result = 0;
227
  #elif EIGEN_HAS_MM_MALLOC
227
  #elif EIGEN_HAS_MM_MALLOC
228
    result = _mm_malloc(size, EIGEN_ALIGN_BYTES);
228
    result = _mm_malloc(size, EIGEN_MAX_ALIGN_BYTES);
229
  #elif EIGEN_OS_WIN_STRICT
229
  #elif EIGEN_OS_WIN_STRICT
230
    result = _aligned_malloc(size, EIGEN_ALIGN_BYTES);
230
    result = _aligned_malloc(size, EIGEN_MAX_ALIGN_BYTES);
231
  #else
231
  #else
232
    result = handmade_aligned_malloc(size);
232
    result = handmade_aligned_malloc(size);
233
  #endif
233
  #endif
234
234
235
  if(!result && size)
235
  if(!result && size)
236
    throw_std_bad_alloc();
236
    throw_std_bad_alloc();
237
237
238
  return result;
238
  return result;
239
}
239
}
240
240
241
/** \internal Frees memory allocated with aligned_malloc. */
241
/** \internal Frees memory allocated with aligned_malloc. */
242
EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
242
EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
243
{
243
{
244
  #if !EIGEN_ALIGN
244
  #if EIGEN_MAX_ALIGN_BYTES==0
245
    std::free(ptr);
245
    std::free(ptr);
246
  #elif EIGEN_MALLOC_ALREADY_ALIGNED
246
  #elif EIGEN_MALLOC_ALREADY_ALIGNED
247
    std::free(ptr);
247
    std::free(ptr);
248
  #elif EIGEN_HAS_POSIX_MEMALIGN
248
  #elif EIGEN_HAS_POSIX_MEMALIGN
249
    std::free(ptr);
249
    std::free(ptr);
250
  #elif EIGEN_HAS_MM_MALLOC
250
  #elif EIGEN_HAS_MM_MALLOC
251
    _mm_free(ptr);
251
    _mm_free(ptr);
252
  #elif EIGEN_OS_WIN_STRICT
252
  #elif EIGEN_OS_WIN_STRICT
Lines 261-293 EIGEN_DEVICE_FUNC inline void aligned_fr Link Here
261
* \brief Reallocates an aligned block of memory.
261
* \brief Reallocates an aligned block of memory.
262
* \throws std::bad_alloc on allocation failure
262
* \throws std::bad_alloc on allocation failure
263
**/
263
**/
264
inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
264
inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
265
{
265
{
266
  EIGEN_UNUSED_VARIABLE(old_size);
266
  EIGEN_UNUSED_VARIABLE(old_size);
267
267
268
  void *result;
268
  void *result;
269
#if !EIGEN_ALIGN
269
#if EIGEN_MAX_ALIGN_BYTES==0
270
  result = std::realloc(ptr,new_size);
270
  result = std::realloc(ptr,new_size);
271
#elif EIGEN_MALLOC_ALREADY_ALIGNED
271
#elif EIGEN_MALLOC_ALREADY_ALIGNED
272
  result = std::realloc(ptr,new_size);
272
  result = std::realloc(ptr,new_size);
273
#elif EIGEN_HAS_POSIX_MEMALIGN
273
#elif EIGEN_HAS_POSIX_MEMALIGN
274
  result = generic_aligned_realloc(ptr,new_size,old_size);
274
  result = generic_aligned_realloc(ptr,new_size,old_size);
275
#elif EIGEN_HAS_MM_MALLOC
275
#elif EIGEN_HAS_MM_MALLOC
276
  // The defined(_mm_free) is just here to verify that this MSVC version
276
  // The defined(_mm_free) is just here to verify that this MSVC version
277
  // implements _mm_malloc/_mm_free based on the corresponding _aligned_
277
  // implements _mm_malloc/_mm_free based on the corresponding _aligned_
278
  // functions. This may not always be the case and we just try to be safe.
278
  // functions. This may not always be the case and we just try to be safe.
279
  #if EIGEN_OS_WIN_STRICT && defined(_mm_free)
279
  #if EIGEN_OS_WIN_STRICT && defined(_mm_free)
280
    result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES);
280
    result = _aligned_realloc(ptr,new_size,EIGEN_MAX_ALIGN_BYTES);
281
  #else
281
  #else
282
    result = generic_aligned_realloc(ptr,new_size,old_size);
282
    result = generic_aligned_realloc(ptr,new_size,old_size);
283
  #endif
283
  #endif
284
#elif EIGEN_OS_WIN_STRICT
284
#elif EIGEN_OS_WIN_STRICT
285
  result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES);
285
  result = _aligned_realloc(ptr,new_size,EIGEN_MAX_ALIGN_BYTES);
286
#else
286
#else
287
  result = handmade_aligned_realloc(ptr,new_size,old_size);
287
  result = handmade_aligned_realloc(ptr,new_size,old_size);
288
#endif
288
#endif
289
289
290
  if (!result && new_size)
290
  if (!result && new_size)
291
    throw_std_bad_alloc();
291
    throw_std_bad_alloc();
292
292
293
  return result;
293
  return result;
Lines 686-702 template<typename T> void swap(scoped_ar Link Here
686
  *   // use data[0] to data[size-1]
686
  *   // use data[0] to data[size-1]
687
  * }
687
  * }
688
  * \endcode
688
  * \endcode
689
  * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
689
  * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
690
  */
690
  */
691
#ifdef EIGEN_ALLOCA
691
#ifdef EIGEN_ALLOCA
692
  // We always manually re-align the result of EIGEN_ALLOCA.
692
  // We always manually re-align the result of EIGEN_ALLOCA.
693
  // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
693
  // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
694
  #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+EIGEN_ALIGN_BYTES-1)) + EIGEN_ALIGN_BYTES-1) & ~(size_t(EIGEN_ALIGN_BYTES-1)))
694
  #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+EIGEN_MAX_ALIGN_BYTES-1)) + EIGEN_MAX_ALIGN_BYTES-1) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1)))
695
695
696
  #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
696
  #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
697
    Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
697
    Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
698
    TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \
698
    TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \
699
               : reinterpret_cast<TYPE*>( \
699
               : reinterpret_cast<TYPE*>( \
700
                      (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \
700
                      (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \
701
                    : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) );  \
701
                    : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) );  \
702
    Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
702
    Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
Lines 710-726 template<typename T> void swap(scoped_ar Link Here
710
    
710
    
711
#endif
711
#endif
712
712
713
713
714
/*****************************************************************************
714
/*****************************************************************************
715
*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF]                ***
715
*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF]                ***
716
*****************************************************************************/
716
*****************************************************************************/
717
717
718
#if EIGEN_ALIGN
718
#if EIGEN_MAX_ALIGN_BYTES!=0
719
  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
719
  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
720
      void* operator new(size_t size, const std::nothrow_t&) throw() { \
720
      void* operator new(size_t size, const std::nothrow_t&) throw() { \
721
        EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
721
        EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
722
        EIGEN_CATCH (...) { return 0; } \
722
        EIGEN_CATCH (...) { return 0; } \
723
      }
723
      }
724
  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
724
  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
725
      void *operator new(size_t size) { \
725
      void *operator new(size_t size) { \
726
        return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
726
        return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
Lines 746-762 template<typename T> void swap(scoped_ar Link Here
746
      } \
746
      } \
747
      typedef void eigen_aligned_operator_new_marker_type;
747
      typedef void eigen_aligned_operator_new_marker_type;
748
#else
748
#else
749
  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
749
  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
750
#endif
750
#endif
751
751
752
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
752
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
753
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
753
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
754
  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_ALIGN_BYTES==0)))
754
  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0)))
755
755
756
/****************************************************************************/
756
/****************************************************************************/
757
757
758
/** \class aligned_allocator
758
/** \class aligned_allocator
759
* \ingroup Core_Module
759
* \ingroup Core_Module
760
*
760
*
761
* \brief STL compatible allocator to use with with 16 byte aligned types
761
* \brief STL compatible allocator to use with with 16 byte aligned types
762
*
762
*
(-)a/Eigen/src/Core/util/XprHelper.h (-2 / +2 lines)
Lines 162-186 class compute_matrix_evaluator_flags Link Here
162
      
162
      
163
      // TODO: should check for smaller packet types once we can handle multi-sized packet types
163
      // TODO: should check for smaller packet types once we can handle multi-sized packet types
164
      align_bytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
164
      align_bytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
165
165
166
      aligned_bit =
166
      aligned_bit =
167
      (
167
      (
168
            ((Options&DontAlign)==0)
168
            ((Options&DontAlign)==0)
169
        && (
169
        && (
170
#if EIGEN_ALIGN_STATICALLY
170
#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
171
             ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0))
171
             ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0))
172
#else
172
#else
173
             0
173
             0
174
#endif
174
#endif
175
175
176
          ||
176
          ||
177
177
178
#if EIGEN_ALIGN
178
#if EIGEN_MAX_ALIGN_BYTES!=0
179
             is_dynamic_size_storage
179
             is_dynamic_size_storage
180
#else
180
#else
181
             0
181
             0
182
#endif
182
#endif
183
183
184
          )
184
          )
185
      ) ? AlignedBit : 0,
185
      ) ? AlignedBit : 0,
186
      packet_access_bit = packet_traits<Scalar>::Vectorizable && aligned_bit ? PacketAccessBit : 0
186
      packet_access_bit = packet_traits<Scalar>::Vectorizable && aligned_bit ? PacketAccessBit : 0
(-)a/test/sizeof.cpp (+11 lines)
Lines 16-31 template<typename MatrixType> void verif Link Here
16
    VERIFY_IS_EQUAL(std::ptrdiff_t(sizeof(MatrixType)),std::ptrdiff_t(sizeof(Scalar))*std::ptrdiff_t(MatrixType::SizeAtCompileTime));
16
    VERIFY_IS_EQUAL(std::ptrdiff_t(sizeof(MatrixType)),std::ptrdiff_t(sizeof(Scalar))*std::ptrdiff_t(MatrixType::SizeAtCompileTime));
17
  else
17
  else
18
    VERIFY_IS_EQUAL(sizeof(MatrixType),sizeof(Scalar*) + 2 * sizeof(typename MatrixType::Index));
18
    VERIFY_IS_EQUAL(sizeof(MatrixType),sizeof(Scalar*) + 2 * sizeof(typename MatrixType::Index));
19
}
19
}
20
20
21
void test_sizeof()
21
void test_sizeof()
22
{
22
{
23
  CALL_SUBTEST(verifySizeOf(Matrix<float, 1, 1>()) );
23
  CALL_SUBTEST(verifySizeOf(Matrix<float, 1, 1>()) );
24
  CALL_SUBTEST(verifySizeOf(Array<float, 2, 1>()) );
25
  CALL_SUBTEST(verifySizeOf(Array<float, 3, 1>()) );
26
  CALL_SUBTEST(verifySizeOf(Array<float, 4, 1>()) );
27
  CALL_SUBTEST(verifySizeOf(Array<float, 5, 1>()) );
28
  CALL_SUBTEST(verifySizeOf(Array<float, 6, 1>()) );
29
  CALL_SUBTEST(verifySizeOf(Array<float, 7, 1>()) );
30
  CALL_SUBTEST(verifySizeOf(Array<float, 8, 1>()) );
31
  CALL_SUBTEST(verifySizeOf(Array<float, 9, 1>()) );
32
  CALL_SUBTEST(verifySizeOf(Array<float, 10, 1>()) );
33
  CALL_SUBTEST(verifySizeOf(Array<float, 11, 1>()) );
34
  CALL_SUBTEST(verifySizeOf(Array<float, 12, 1>()) );
24
  CALL_SUBTEST(verifySizeOf(Vector2d()) );
35
  CALL_SUBTEST(verifySizeOf(Vector2d()) );
25
  CALL_SUBTEST(verifySizeOf(Vector4f()) );
36
  CALL_SUBTEST(verifySizeOf(Vector4f()) );
26
  CALL_SUBTEST(verifySizeOf(Matrix4d()) );
37
  CALL_SUBTEST(verifySizeOf(Matrix4d()) );
27
  CALL_SUBTEST(verifySizeOf(Matrix<double, 4, 2>()) );
38
  CALL_SUBTEST(verifySizeOf(Matrix<double, 4, 2>()) );
28
  CALL_SUBTEST(verifySizeOf(Matrix<bool, 7, 5>()) );
39
  CALL_SUBTEST(verifySizeOf(Matrix<bool, 7, 5>()) );
29
  CALL_SUBTEST(verifySizeOf(MatrixXcf(3, 3)) );
40
  CALL_SUBTEST(verifySizeOf(MatrixXcf(3, 3)) );
30
  CALL_SUBTEST(verifySizeOf(MatrixXi(8, 12)) );
41
  CALL_SUBTEST(verifySizeOf(MatrixXi(8, 12)) );
31
  CALL_SUBTEST(verifySizeOf(MatrixXcd(20, 20)) );
42
  CALL_SUBTEST(verifySizeOf(MatrixXcd(20, 20)) );
(-)a/test/unalignedassert.cpp (-15 / +29 lines)
Lines 1-17 Link Here
1
// This file is part of Eigen, a lightweight C++ template library
1
// This file is part of Eigen, a lightweight C++ template library
2
// for linear algebra.
2
// for linear algebra.
3
//
3
//
4
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
4
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
5
// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
5
//
6
//
6
// This Source Code Form is subject to the terms of the Mozilla
7
// This Source Code Form is subject to the terms of the Mozilla
7
// Public License v. 2.0. If a copy of the MPL was not distributed
8
// Public License v. 2.0. If a copy of the MPL was not distributed
8
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10
11
#if defined(EIGEN_TEST_PART_1)
12
  // default
13
#elif defined(EIGEN_TEST_PART_2)
14
  #define EIGEN_MAX_STATIC_ALIGN_BYTES 16
15
  #define EIGEN_MAX_ALIGN_BYTES 16
16
#elif defined(EIGEN_TEST_PART_3)
17
  #define EIGEN_MAX_STATIC_ALIGN_BYTES 32
18
  #define EIGEN_MAX_ALIGN_BYTES 32
19
#elif defined(EIGEN_TEST_PART_4)
20
  #define EIGEN_MAX_STATIC_ALIGN_BYTES 64
21
  #define EIGEN_MAX_ALIGN_BYTES 64
22
#endif
23
10
#include "main.h"
24
#include "main.h"
11
25
12
typedef Matrix<float,  6,1> Vector6f;
26
typedef Matrix<float,  6,1> Vector6f;
13
typedef Matrix<float,  8,1> Vector8f;
27
typedef Matrix<float,  8,1> Vector8f;
14
typedef Matrix<float, 12,1> Vector12f;
28
typedef Matrix<float, 12,1> Vector12f;
15
29
16
typedef Matrix<double, 5,1> Vector5d;
30
typedef Matrix<double, 5,1> Vector5d;
17
typedef Matrix<double, 6,1> Vector6d;
31
typedef Matrix<double, 6,1> Vector6d;
Lines 43-59 struct TestNew4 Link Here
43
  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
57
  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
44
  Vector2d m;
58
  Vector2d m;
45
  float f; // make the struct have sizeof%16!=0 to make it a little more tricky when we allow an array of 2 such objects
59
  float f; // make the struct have sizeof%16!=0 to make it a little more tricky when we allow an array of 2 such objects
46
};
60
};
47
61
48
struct TestNew5
62
struct TestNew5
49
{
63
{
50
  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
64
  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
51
  float f; // try the f at first -- the EIGEN_ALIGN16 attribute of m should make that still work
65
  float f; // try the f at first -- the EIGEN_ALIGN_MAX attribute of m should make that still work
52
  Matrix4f m;
66
  Matrix4f m;
53
};
67
};
54
68
55
struct TestNew6
69
struct TestNew6
56
{
70
{
57
  Matrix<float,2,2,DontAlign> m; // good: no alignment requested
71
  Matrix<float,2,2,DontAlign> m; // good: no alignment requested
58
  float f;
72
  float f;
59
};
73
};
Lines 70-157 void check_unalignedassert_good() Link Here
70
{
84
{
71
  T *x, *y;
85
  T *x, *y;
72
  x = new T;
86
  x = new T;
73
  delete x;
87
  delete x;
74
  y = new T[2];
88
  y = new T[2];
75
  delete[] y;
89
  delete[] y;
76
}
90
}
77
91
78
#if EIGEN_ALIGN_STATICALLY
92
#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
79
template<typename T>
93
template<typename T>
80
void construct_at_boundary(int boundary)
94
void construct_at_boundary(int boundary)
81
{
95
{
82
  char buf[sizeof(T)+256];
96
  char buf[sizeof(T)+256];
83
  size_t _buf = reinterpret_cast<size_t>(buf);
97
  size_t _buf = reinterpret_cast<size_t>(buf);
84
  _buf += (EIGEN_ALIGN_BYTES - (_buf % EIGEN_ALIGN_BYTES)); // make 16/32-byte aligned
98
  _buf += (EIGEN_MAX_ALIGN_BYTES - (_buf % EIGEN_MAX_ALIGN_BYTES)); // make 16/32/...-byte aligned
85
  _buf += boundary; // make exact boundary-aligned
99
  _buf += boundary; // make exact boundary-aligned
86
  T *x = ::new(reinterpret_cast<void*>(_buf)) T;
100
  T *x = ::new(reinterpret_cast<void*>(_buf)) T;
87
  x[0].setZero(); // just in order to silence warnings
101
  x[0].setZero(); // just in order to silence warnings
88
  x->~T();
102
  x->~T();
89
}
103
}
90
#endif
104
#endif
91
105
92
void unalignedassert()
106
void unalignedassert()
93
{
107
{
94
#if EIGEN_ALIGN_STATICALLY
108
#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
95
  construct_at_boundary<Vector2f>(4);
109
  construct_at_boundary<Vector2f>(4);
96
  construct_at_boundary<Vector3f>(4);
110
  construct_at_boundary<Vector3f>(4);
97
  construct_at_boundary<Vector4f>(16);
111
  construct_at_boundary<Vector4f>(16);
98
  construct_at_boundary<Vector6f>(4);
112
  construct_at_boundary<Vector6f>(4);
99
  construct_at_boundary<Vector8f>(EIGEN_ALIGN_BYTES);
113
  construct_at_boundary<Vector8f>(EIGEN_MAX_ALIGN_BYTES);
100
  construct_at_boundary<Vector12f>(16);
114
  construct_at_boundary<Vector12f>(16);
101
  construct_at_boundary<Matrix2f>(16);
115
  construct_at_boundary<Matrix2f>(16);
102
  construct_at_boundary<Matrix3f>(4);
116
  construct_at_boundary<Matrix3f>(4);
103
  construct_at_boundary<Matrix4f>(EIGEN_ALIGN_BYTES);
117
  construct_at_boundary<Matrix4f>(EIGEN_MAX_ALIGN_BYTES);
104
118
105
  construct_at_boundary<Vector2d>(16);
119
  construct_at_boundary<Vector2d>(16);
106
  construct_at_boundary<Vector3d>(4);
120
  construct_at_boundary<Vector3d>(4);
107
  construct_at_boundary<Vector4d>(EIGEN_ALIGN_BYTES);
121
  construct_at_boundary<Vector4d>(EIGEN_MAX_ALIGN_BYTES);
108
  construct_at_boundary<Vector5d>(4);
122
  construct_at_boundary<Vector5d>(4);
109
  construct_at_boundary<Vector6d>(16);
123
  construct_at_boundary<Vector6d>(16);
110
  construct_at_boundary<Vector7d>(4);
124
  construct_at_boundary<Vector7d>(4);
111
  construct_at_boundary<Vector8d>(EIGEN_ALIGN_BYTES);
125
  construct_at_boundary<Vector8d>(EIGEN_MAX_ALIGN_BYTES);
112
  construct_at_boundary<Vector9d>(4);
126
  construct_at_boundary<Vector9d>(4);
113
  construct_at_boundary<Vector10d>(16);
127
  construct_at_boundary<Vector10d>(16);
114
  construct_at_boundary<Vector12d>(EIGEN_ALIGN_BYTES);
128
  construct_at_boundary<Vector12d>(EIGEN_MAX_ALIGN_BYTES);
115
  construct_at_boundary<Matrix2d>(EIGEN_ALIGN_BYTES);
129
  construct_at_boundary<Matrix2d>(EIGEN_MAX_ALIGN_BYTES);
116
  construct_at_boundary<Matrix3d>(4);
130
  construct_at_boundary<Matrix3d>(4);
117
  construct_at_boundary<Matrix4d>(EIGEN_ALIGN_BYTES);
131
  construct_at_boundary<Matrix4d>(EIGEN_MAX_ALIGN_BYTES);
118
132
119
  construct_at_boundary<Vector2cf>(16);
133
  construct_at_boundary<Vector2cf>(16);
120
  construct_at_boundary<Vector3cf>(4);
134
  construct_at_boundary<Vector3cf>(4);
121
  construct_at_boundary<Vector2cd>(EIGEN_ALIGN_BYTES);
135
  construct_at_boundary<Vector2cd>(EIGEN_MAX_ALIGN_BYTES);
122
  construct_at_boundary<Vector3cd>(16);
136
  construct_at_boundary<Vector3cd>(16);
123
#endif
137
#endif
124
138
125
  check_unalignedassert_good<TestNew1>();
139
  check_unalignedassert_good<TestNew1>();
126
  check_unalignedassert_good<TestNew2>();
140
  check_unalignedassert_good<TestNew2>();
127
  check_unalignedassert_good<TestNew3>();
141
  check_unalignedassert_good<TestNew3>();
128
142
129
  check_unalignedassert_good<TestNew4>();
143
  check_unalignedassert_good<TestNew4>();
130
  check_unalignedassert_good<TestNew5>();
144
  check_unalignedassert_good<TestNew5>();
131
  check_unalignedassert_good<TestNew6>();
145
  check_unalignedassert_good<TestNew6>();
132
  check_unalignedassert_good<Depends<true> >();
146
  check_unalignedassert_good<Depends<true> >();
133
147
134
#if EIGEN_ALIGN_STATICALLY
148
#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
135
  if(EIGEN_ALIGN_BYTES>=16)
149
  if(EIGEN_MAX_ALIGN_BYTES>=16)
136
  {
150
  {
137
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4f>(8));
151
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4f>(8));
138
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8f>(8));
152
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8f>(8));
139
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector12f>(8));
153
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector12f>(8));
140
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2d>(8));
154
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2d>(8));
141
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4d>(8));
155
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4d>(8));
142
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector6d>(8));
156
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector6d>(8));
143
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8d>(8));
157
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8d>(8));
144
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector10d>(8));
158
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector10d>(8));
145
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector12d>(8));
159
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector12d>(8));
146
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2cf>(8));
160
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2cf>(8));
147
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4i>(8));
161
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4i>(8));
148
  }
162
  }
149
  for(int b=8; b<EIGEN_ALIGN_BYTES; b+=8)
163
  for(int b=8; b<EIGEN_MAX_ALIGN_BYTES; b+=8)
150
  {
164
  {
151
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8f>(b));
165
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8f>(b));
152
    VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4f>(b));
166
    VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4f>(b));
153
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4d>(b));
167
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4d>(b));
154
    VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix2d>(b));
168
    VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix2d>(b));
155
    VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4d>(b));
169
    VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4d>(b));
156
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2cd>(b));
170
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2cd>(b));
157
  }
171
  }

Return to bug 973