This bugzilla service is closed. All entries have been migrated to https://gitlab.com/libeigen/eigen
View | Details | Raw Unified | Return to bug 256
Collapse All | Expand All

(-)a/Eigen/src/Core/AssignEvaluator.h (-11 / +15 lines)
Lines 76-95 private: Link Here
76
    SrcIsRowMajor = SrcFlags&RowMajorBit,
76
    SrcIsRowMajor = SrcFlags&RowMajorBit,
77
    StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
77
    StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
78
    MightVectorize = bool(StorageOrdersAgree)
78
    MightVectorize = bool(StorageOrdersAgree)
79
                  && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
79
                  && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
80
                  && bool(functor_traits<AssignFunc>::PacketAccess),
80
                  && bool(functor_traits<AssignFunc>::PacketAccess),
81
    MayInnerVectorize  = MightVectorize
81
    MayInnerVectorize  = MightVectorize
82
                       && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
82
                       && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
83
                       && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
83
                       && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
84
                       && int(JointAlignment)>=int(InnerRequiredAlignment),
84
                       && (EIGEN_UNALIGNED_VECTORIZE  || int(JointAlignment)>=int(InnerRequiredAlignment)),
85
    MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
85
    MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
86
    MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
86
    MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
87
                       && ((int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
87
                       && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
88
      /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
88
      /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
89
         so it's only good for large enough sizes. */
89
         so it's only good for large enough sizes. */
90
    MaySliceVectorize  = bool(MightVectorize) && bool(DstHasDirectAccess)
90
    MaySliceVectorize  = bool(MightVectorize) && bool(DstHasDirectAccess)
91
                       && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*InnerPacketSize)
91
                       && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*InnerPacketSize)
92
      /* slice vectorization can be slow, so we only want it if the slices are big, which is
92
      /* slice vectorization can be slow, so we only want it if the slices are big, which is
93
         indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
93
         indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
94
         in a fixed-size matrix */
94
         in a fixed-size matrix */
95
  };
95
  };
Lines 125-142 public: Link Here
125
  enum {
125
  enum {
126
    Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
126
    Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
127
                ? (
127
                ? (
128
                    int(MayUnrollCompletely) ? int(CompleteUnrolling)
128
                    int(MayUnrollCompletely) ? int(CompleteUnrolling)
129
                  : int(MayUnrollInner)      ? int(InnerUnrolling)
129
                  : int(MayUnrollInner)      ? int(InnerUnrolling)
130
                                             : int(NoUnrolling)
130
                                             : int(NoUnrolling)
131
                  )
131
                  )
132
              : int(Traversal) == int(LinearVectorizedTraversal)
132
              : int(Traversal) == int(LinearVectorizedTraversal)
133
                ? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(LinearRequiredAlignment)) ? int(CompleteUnrolling)
133
                ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
134
                                                                                             : int(NoUnrolling) )
134
                          ? int(CompleteUnrolling)
135
                          : int(NoUnrolling) )
135
              : int(Traversal) == int(LinearTraversal)
136
              : int(Traversal) == int(LinearTraversal)
136
                ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) 
137
                ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) 
137
                                              : int(NoUnrolling) )
138
                                              : int(NoUnrolling) )
138
              : int(NoUnrolling)
139
              : int(NoUnrolling)
139
  };
140
  };
140
141
141
#ifdef EIGEN_DEBUG_ASSIGN
142
#ifdef EIGEN_DEBUG_ASSIGN
142
  static void debug()
143
  static void debug()
Lines 151-166 public: Link Here
151
    EIGEN_DEBUG_VAR(SrcAlignment)
152
    EIGEN_DEBUG_VAR(SrcAlignment)
152
    EIGEN_DEBUG_VAR(LinearRequiredAlignment)
153
    EIGEN_DEBUG_VAR(LinearRequiredAlignment)
153
    EIGEN_DEBUG_VAR(InnerRequiredAlignment)
154
    EIGEN_DEBUG_VAR(InnerRequiredAlignment)
154
    EIGEN_DEBUG_VAR(JointAlignment)
155
    EIGEN_DEBUG_VAR(JointAlignment)
155
    EIGEN_DEBUG_VAR(InnerSize)
156
    EIGEN_DEBUG_VAR(InnerSize)
156
    EIGEN_DEBUG_VAR(InnerMaxSize)
157
    EIGEN_DEBUG_VAR(InnerMaxSize)
157
    EIGEN_DEBUG_VAR(LinearPacketSize)
158
    EIGEN_DEBUG_VAR(LinearPacketSize)
158
    EIGEN_DEBUG_VAR(InnerPacketSize)
159
    EIGEN_DEBUG_VAR(InnerPacketSize)
160
    EIGEN_DEBUG_VAR(ActualPacketSize)
159
    EIGEN_DEBUG_VAR(StorageOrdersAgree)
161
    EIGEN_DEBUG_VAR(StorageOrdersAgree)
160
    EIGEN_DEBUG_VAR(MightVectorize)
162
    EIGEN_DEBUG_VAR(MightVectorize)
161
    EIGEN_DEBUG_VAR(MayLinearize)
163
    EIGEN_DEBUG_VAR(MayLinearize)
162
    EIGEN_DEBUG_VAR(MayInnerVectorize)
164
    EIGEN_DEBUG_VAR(MayInnerVectorize)
163
    EIGEN_DEBUG_VAR(MayLinearVectorize)
165
    EIGEN_DEBUG_VAR(MayLinearVectorize)
164
    EIGEN_DEBUG_VAR(MaySliceVectorize)
166
    EIGEN_DEBUG_VAR(MaySliceVectorize)
165
    std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
167
    std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
166
    EIGEN_DEBUG_VAR(UnrollingLimit)
168
    EIGEN_DEBUG_VAR(UnrollingLimit)
Lines 251-294 struct copy_using_evaluator_innervec_Com Link Here
251
  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
253
  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
252
  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
254
  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
253
  typedef typename DstEvaluatorType::XprType DstXprType;
255
  typedef typename DstEvaluatorType::XprType DstXprType;
254
  typedef typename Kernel::PacketType PacketType;
256
  typedef typename Kernel::PacketType PacketType;
255
  
257
  
256
  enum {
258
  enum {
257
    outer = Index / DstXprType::InnerSizeAtCompileTime,
259
    outer = Index / DstXprType::InnerSizeAtCompileTime,
258
    inner = Index % DstXprType::InnerSizeAtCompileTime,
260
    inner = Index % DstXprType::InnerSizeAtCompileTime,
259
    JointAlignment = Kernel::AssignmentTraits::JointAlignment,
261
    SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
260
    DefaultAlignment = unpacket_traits<PacketType>::alignment
262
    DstAlignment = Kernel::AssignmentTraits::DstAlignment
261
  };
263
  };
262
264
263
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
265
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
264
  {
266
  {
265
    kernel.template assignPacketByOuterInner<DefaultAlignment, JointAlignment, PacketType>(outer, inner);
267
    kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
266
    enum { NextIndex = Index + unpacket_traits<PacketType>::size };
268
    enum { NextIndex = Index + unpacket_traits<PacketType>::size };
267
    copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
269
    copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
268
  }
270
  }
269
};
271
};
270
272
271
template<typename Kernel, int Stop>
273
template<typename Kernel, int Stop>
272
struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
274
struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
273
{
275
{
274
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
276
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
275
};
277
};
276
278
277
template<typename Kernel, int Index_, int Stop>
279
template<typename Kernel, int Index_, int Stop>
278
struct copy_using_evaluator_innervec_InnerUnrolling
280
struct copy_using_evaluator_innervec_InnerUnrolling
279
{
281
{
280
  typedef typename Kernel::PacketType PacketType;
282
  typedef typename Kernel::PacketType PacketType;
281
  enum {
283
  enum {
282
    DefaultAlignment = unpacket_traits<PacketType>::alignment
284
    SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
285
    DstAlignment = Kernel::AssignmentTraits::DstAlignment
283
  };
286
  };
284
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
287
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
285
  {
288
  {
286
    kernel.template assignPacketByOuterInner<DefaultAlignment, DefaultAlignment, PacketType>(outer, Index_);
289
    kernel.template assignPacketByOuterInner<SrcAlignment, DstAlignment, PacketType>(outer, Index_);
287
    enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
290
    enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
288
    copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
291
    copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
289
  }
292
  }
290
};
293
};
291
294
292
template<typename Kernel, int Stop>
295
template<typename Kernel, int Stop>
293
struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop>
296
struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop>
294
{
297
{
Lines 433-458 struct dense_assignment_loop<Kernel, Lin Link Here
433
*** Inner vectorization ***
436
*** Inner vectorization ***
434
**************************/
437
**************************/
435
438
436
template<typename Kernel>
439
template<typename Kernel>
437
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
440
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
438
{
441
{
439
  typedef typename Kernel::PacketType PacketType;
442
  typedef typename Kernel::PacketType PacketType;
440
  enum {
443
  enum {
441
    DefaultAlignment = unpacket_traits<PacketType>::alignment
444
    SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
445
    DstAlignment = Kernel::AssignmentTraits::DstAlignment
442
  };
446
  };
443
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
447
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
444
  {
448
  {
445
    const Index innerSize = kernel.innerSize();
449
    const Index innerSize = kernel.innerSize();
446
    const Index outerSize = kernel.outerSize();
450
    const Index outerSize = kernel.outerSize();
447
    const Index packetSize = unpacket_traits<PacketType>::size;
451
    const Index packetSize = unpacket_traits<PacketType>::size;
448
    for(Index outer = 0; outer < outerSize; ++outer)
452
    for(Index outer = 0; outer < outerSize; ++outer)
449
      for(Index inner = 0; inner < innerSize; inner+=packetSize)
453
      for(Index inner = 0; inner < innerSize; inner+=packetSize)
450
        kernel.template assignPacketByOuterInner<DefaultAlignment, DefaultAlignment, PacketType>(outer, inner);
454
        kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
451
  }
455
  }
452
};
456
};
453
457
454
template<typename Kernel>
458
template<typename Kernel>
455
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
459
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
456
{
460
{
457
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
461
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
458
  {
462
  {
(-)a/Eigen/src/Core/Matrix.h (-1 / +1 lines)
Lines 22-38 private: Link Here
22
  typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
22
  typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
23
  enum {
23
  enum {
24
      row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
24
      row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
25
      is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
25
      is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
26
      max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
26
      max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
27
      default_alignment = compute_default_alignment<_Scalar,max_size>::value,
27
      default_alignment = compute_default_alignment<_Scalar,max_size>::value,
28
      actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
28
      actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
29
      required_alignment = unpacket_traits<PacketScalar>::alignment,
29
      required_alignment = unpacket_traits<PacketScalar>::alignment,
30
      packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0
30
      packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0
31
    };
31
    };
32
    
32
    
33
public:
33
public:
34
  typedef _Scalar Scalar;
34
  typedef _Scalar Scalar;
35
  typedef Dense StorageKind;
35
  typedef Dense StorageKind;
36
  typedef Eigen::Index StorageIndex;
36
  typedef Eigen::Index StorageIndex;
37
  typedef MatrixXpr XprKind;
37
  typedef MatrixXpr XprKind;
38
  enum {
38
  enum {
(-)a/Eigen/src/Core/util/Macros.h (+5 lines)
Lines 751-766 namespace Eigen { Link Here
751
#endif
751
#endif
752
752
753
#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
753
#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
754
#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
754
#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
755
#else
755
#else
756
#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
756
#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
757
#endif
757
#endif
758
758
759
760
#ifndef EIGEN_UNALIGNED_VECTORIZE
761
#define EIGEN_UNALIGNED_VECTORIZE 1
762
#endif
763
759
//----------------------------------------------------------------------
764
//----------------------------------------------------------------------
760
765
761
766
762
#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
767
#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
763
  #define EIGEN_RESTRICT
768
  #define EIGEN_RESTRICT
764
#endif
769
#endif
765
#ifndef EIGEN_RESTRICT
770
#ifndef EIGEN_RESTRICT
766
  #define EIGEN_RESTRICT __restrict
771
  #define EIGEN_RESTRICT __restrict
(-)a/test/vectorization_logic.cpp (-11 / +33 lines)
Lines 2-17 Link Here
2
// for linear algebra.
2
// for linear algebra.
3
//
3
//
4
// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
4
// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
5
//
5
//
6
// This Source Code Form is subject to the terms of the Mozilla
6
// This Source Code Form is subject to the terms of the Mozilla
7
// Public License v. 2.0. If a copy of the MPL was not distributed
7
// Public License v. 2.0. If a copy of the MPL was not distributed
8
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
9
10
#ifdef EIGEN_TEST_PART_1
11
#define EIGEN_UNALIGNED_VECTORIZE 1
12
#endif
13
14
#ifdef EIGEN_TEST_PART_2
15
#define EIGEN_UNALIGNED_VECTORIZE 0
16
#endif
17
10
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
18
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
11
#undef EIGEN_DEFAULT_TO_ROW_MAJOR
19
#undef EIGEN_DEFAULT_TO_ROW_MAJOR
12
#endif
20
#endif
13
#define EIGEN_DEBUG_ASSIGN
21
#define EIGEN_DEBUG_ASSIGN
14
#include "main.h"
22
#include "main.h"
15
#include <typeinfo>
23
#include <typeinfo>
16
24
17
using internal::demangle_flags;
25
using internal::demangle_flags;
Lines 139-158 struct vectorization_logic Link Here
139
      InnerVectorizedTraversal,CompleteUnrolling));
147
      InnerVectorizedTraversal,CompleteUnrolling));
140
    VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()),
148
    VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()),
141
      InnerVectorizedTraversal,CompleteUnrolling));
149
      InnerVectorizedTraversal,CompleteUnrolling));
142
150
143
    VERIFY(test_assign(Matrix44(),Matrix44()+Matrix44(),
151
    VERIFY(test_assign(Matrix44(),Matrix44()+Matrix44(),
144
      InnerVectorizedTraversal,InnerUnrolling));
152
      InnerVectorizedTraversal,InnerUnrolling));
145
153
146
    VERIFY(test_assign(Matrix44u(),Matrix44()+Matrix44(),
154
    VERIFY(test_assign(Matrix44u(),Matrix44()+Matrix44(),
147
      LinearTraversal,NoUnrolling));
155
      EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearTraversal,
156
      EIGEN_UNALIGNED_VECTORIZE ? InnerUnrolling : NoUnrolling));
157
158
    VERIFY(test_assign(Matrix1(),Matrix1()+Matrix1(),
159
      (Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal,
160
      CompleteUnrolling));
148
161
149
    VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(),
162
    VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(),
150
      LinearTraversal,CompleteUnrolling));
163
      EIGEN_UNALIGNED_VECTORIZE ? ((Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal, CompleteUnrolling));
151
164
152
    VERIFY(test_assign(Matrix44c().col(1),Matrix44c().col(2)+Matrix44c().col(3),
165
    VERIFY(test_assign(Matrix44c().col(1),Matrix44c().col(2)+Matrix44c().col(3),
153
      InnerVectorizedTraversal,CompleteUnrolling));
166
      InnerVectorizedTraversal,CompleteUnrolling));
154
167
155
    VERIFY(test_assign(Matrix44r().row(2),Matrix44r().row(1)+Matrix44r().row(1),
168
    VERIFY(test_assign(Matrix44r().row(2),Matrix44r().row(1)+Matrix44r().row(1),
156
      InnerVectorizedTraversal,CompleteUnrolling));
169
      InnerVectorizedTraversal,CompleteUnrolling));
157
170
158
    if(PacketSize>1)
171
    if(PacketSize>1)
Lines 162-181 struct vectorization_logic Link Here
162
        LinearTraversal,CompleteUnrolling));
175
        LinearTraversal,CompleteUnrolling));
163
      VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1),
176
      VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1),
164
        LinearTraversal,CompleteUnrolling));
177
        LinearTraversal,CompleteUnrolling));
165
178
166
      VERIFY(test_assign(Matrix3(),Matrix3().cwiseProduct(Matrix3()),
179
      VERIFY(test_assign(Matrix3(),Matrix3().cwiseProduct(Matrix3()),
167
        LinearVectorizedTraversal,CompleteUnrolling));
180
        LinearVectorizedTraversal,CompleteUnrolling));
168
181
169
      VERIFY(test_assign(Matrix<Scalar,17,17>(),Matrix<Scalar,17,17>()+Matrix<Scalar,17,17>(),
182
      VERIFY(test_assign(Matrix<Scalar,17,17>(),Matrix<Scalar,17,17>()+Matrix<Scalar,17,17>(),
170
        HalfPacketSize==1 ? InnerVectorizedTraversal : LinearTraversal,NoUnrolling));
183
        HalfPacketSize==1             ? InnerVectorizedTraversal  :
184
        EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal :
185
                                        LinearTraversal,
186
        NoUnrolling));
187
188
      VERIFY(test_assign(Matrix11(), Matrix11()+Matrix11(),InnerVectorizedTraversal,CompleteUnrolling));
189
171
190
172
      VERIFY(test_assign(Matrix11(),Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(2,3)+Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(8,4),
191
      VERIFY(test_assign(Matrix11(),Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(2,3)+Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(8,4),
173
        DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling));
192
        (EIGEN_UNALIGNED_VECTORIZE) ? InnerVectorizedTraversal : DefaultTraversal,
193
        (EIGEN_UNALIGNED_VECTORIZE || PacketSize<=4) ? CompleteUnrolling : InnerUnrolling ));
174
194
175
      VERIFY(test_assign(Vector1(),Matrix11()*Vector1(),
195
      VERIFY(test_assign(Vector1(),Matrix11()*Vector1(),
176
                         InnerVectorizedTraversal,CompleteUnrolling));
196
                         InnerVectorizedTraversal,CompleteUnrolling));
177
197
178
      VERIFY(test_assign(Matrix11(),Matrix11().lazyProduct(Matrix11()),
198
      VERIFY(test_assign(Matrix11(),Matrix11().lazyProduct(Matrix11()),
179
                         InnerVectorizedTraversal,InnerUnrolling+CompleteUnrolling));
199
                         InnerVectorizedTraversal,InnerUnrolling+CompleteUnrolling));
180
    }
200
    }
181
201
Lines 282-315 struct vectorization_logic_half Link Here
282
      InnerVectorizedTraversal,CompleteUnrolling));
302
      InnerVectorizedTraversal,CompleteUnrolling));
283
    VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()),
303
    VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()),
284
      InnerVectorizedTraversal,CompleteUnrolling));
304
      InnerVectorizedTraversal,CompleteUnrolling));
285
305
286
    VERIFY(test_assign(Matrix57(),Matrix57()+Matrix57(),
306
    VERIFY(test_assign(Matrix57(),Matrix57()+Matrix57(),
287
      InnerVectorizedTraversal,InnerUnrolling));
307
      InnerVectorizedTraversal,InnerUnrolling));
288
308
289
    VERIFY(test_assign(Matrix57u(),Matrix57()+Matrix57(),
309
    VERIFY(test_assign(Matrix57u(),Matrix57()+Matrix57(),
290
      LinearTraversal,NoUnrolling));
310
      EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearTraversal,
311
      EIGEN_UNALIGNED_VECTORIZE ? InnerUnrolling : NoUnrolling));
291
312
292
    VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(),
313
    VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(),
293
      LinearTraversal,CompleteUnrolling));
314
      EIGEN_UNALIGNED_VECTORIZE ? ((Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal,CompleteUnrolling));
294
        
315
        
295
    if(PacketSize>1)
316
    if(PacketSize>1)
296
    {
317
    {
297
      typedef Matrix<Scalar,3,3,ColMajor> Matrix33c;
318
      typedef Matrix<Scalar,3,3,ColMajor> Matrix33c;
298
      VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1),
319
      VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1),
299
        LinearTraversal,CompleteUnrolling));
320
        LinearTraversal,CompleteUnrolling));
300
      VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1),
321
      VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1),
301
        LinearTraversal,CompleteUnrolling));
322
        LinearTraversal,CompleteUnrolling));
302
              
323
              
303
      VERIFY(test_assign(Matrix3(),Matrix3().cwiseQuotient(Matrix3()),
324
      VERIFY(test_assign(Matrix3(),Matrix3().cwiseQuotient(Matrix3()),
304
        PacketTraits::HasDiv ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
325
        PacketTraits::HasDiv ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
305
        
326
        
306
      VERIFY(test_assign(Matrix<Scalar,17,17>(),Matrix<Scalar,17,17>()+Matrix<Scalar,17,17>(),
327
      VERIFY(test_assign(Matrix<Scalar,17,17>(),Matrix<Scalar,17,17>()+Matrix<Scalar,17,17>(),
307
        LinearTraversal,NoUnrolling));
328
        EIGEN_UNALIGNED_VECTORIZE ? (PacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal,
329
        NoUnrolling));
308
        
330
        
309
      VERIFY(test_assign(Matrix11(),Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(2,3)+Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(8,4),
331
      VERIFY(test_assign(Matrix11(),Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(2,3)+Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(8,4),
310
        DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling));
332
        DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling));
311
333
312
      VERIFY(test_assign(Vector1(),Matrix11()*Vector1(),
334
      VERIFY(test_assign(Vector1(),Matrix11()*Vector1(),
313
                         InnerVectorizedTraversal,CompleteUnrolling));
335
                         InnerVectorizedTraversal,CompleteUnrolling));
314
336
315
      VERIFY(test_assign(Matrix11(),Matrix11().lazyProduct(Matrix11()),
337
      VERIFY(test_assign(Matrix11(),Matrix11().lazyProduct(Matrix11()),
Lines 362-386 void test_vectorization_logic() Link Here
362
  CALL_SUBTEST( vectorization_logic_half<float>::run() );
384
  CALL_SUBTEST( vectorization_logic_half<float>::run() );
363
  CALL_SUBTEST( vectorization_logic_half<double>::run() );
385
  CALL_SUBTEST( vectorization_logic_half<double>::run() );
364
  CALL_SUBTEST( vectorization_logic_half<std::complex<float> >::run() );
386
  CALL_SUBTEST( vectorization_logic_half<std::complex<float> >::run() );
365
  CALL_SUBTEST( vectorization_logic_half<std::complex<double> >::run() );
387
  CALL_SUBTEST( vectorization_logic_half<std::complex<double> >::run() );
366
  
388
  
367
  if(internal::packet_traits<float>::Vectorizable)
389
  if(internal::packet_traits<float>::Vectorizable)
368
  {
390
  {
369
    VERIFY(test_assign(Matrix<float,3,3>(),Matrix<float,3,3>()+Matrix<float,3,3>(),
391
    VERIFY(test_assign(Matrix<float,3,3>(),Matrix<float,3,3>()+Matrix<float,3,3>(),
370
      LinearTraversal,CompleteUnrolling));
392
      EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
371
      
393
      
372
    VERIFY(test_redux(Matrix<float,5,2>(),
394
    VERIFY(test_redux(Matrix<float,5,2>(),
373
      DefaultTraversal,CompleteUnrolling));
395
      EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling));
374
  }
396
  }
375
  
397
  
376
  if(internal::packet_traits<double>::Vectorizable)
398
  if(internal::packet_traits<double>::Vectorizable)
377
  {
399
  {
378
    VERIFY(test_assign(Matrix<double,3,3>(),Matrix<double,3,3>()+Matrix<double,3,3>(),
400
    VERIFY(test_assign(Matrix<double,3,3>(),Matrix<double,3,3>()+Matrix<double,3,3>(),
379
      LinearTraversal,CompleteUnrolling));
401
      EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
380
    
402
    
381
    VERIFY(test_redux(Matrix<double,7,3>(),
403
    VERIFY(test_redux(Matrix<double,7,3>(),
382
      DefaultTraversal,CompleteUnrolling));
404
      EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling));
383
  }
405
  }
384
#endif // EIGEN_VECTORIZE
406
#endif // EIGEN_VECTORIZE
385
407
386
}
408
}

Return to bug 256