Eigen  3.3.90 (mercurial changeset d63bd3cb7378)
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Redux.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_REDUX_H
12 #define EIGEN_REDUX_H
13 
14 namespace Eigen {
15 
16 namespace internal {
17 
18 // TODO
19 // * implement other kind of vectorization
20 // * factorize code
21 
22 /***************************************************************************
23 * Part 1 : the logic deciding a strategy for vectorization and unrolling
24 ***************************************************************************/
25 
26 template<typename Func, typename Derived>
27 struct redux_traits
28 {
29 public:
30  typedef typename find_best_packet<typename Derived::Scalar,Derived::SizeAtCompileTime>::type PacketType;
31  enum {
32  PacketSize = unpacket_traits<PacketType>::size,
33  InnerMaxSize = int(Derived::IsRowMajor)
34  ? Derived::MaxColsAtCompileTime
35  : Derived::MaxRowsAtCompileTime
36  };
37 
38  enum {
39  MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
40  && (functor_traits<Func>::PacketAccess),
41  MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit),
42  MaySliceVectorize = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize
43  };
44 
45 public:
46  enum {
47  Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
48  : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
49  : int(DefaultTraversal)
50  };
51 
52 public:
53  enum {
54  Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost
55  : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
56  UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
57  };
58 
59 public:
60  enum {
61  Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
62  };
63 
64 #ifdef EIGEN_DEBUG_ASSIGN
65  static void debug()
66  {
67  std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl;
68  std::cerr.setf(std::ios::hex, std::ios::basefield);
69  EIGEN_DEBUG_VAR(Derived::Flags)
70  std::cerr.unsetf(std::ios::hex);
71  EIGEN_DEBUG_VAR(InnerMaxSize)
72  EIGEN_DEBUG_VAR(PacketSize)
73  EIGEN_DEBUG_VAR(MightVectorize)
74  EIGEN_DEBUG_VAR(MayLinearVectorize)
75  EIGEN_DEBUG_VAR(MaySliceVectorize)
76  EIGEN_DEBUG_VAR(Traversal)
77  EIGEN_DEBUG_VAR(UnrollingLimit)
78  EIGEN_DEBUG_VAR(Unrolling)
79  std::cerr << std::endl;
80  }
81 #endif
82 };
83 
84 /***************************************************************************
85 * Part 2 : unrollers
86 ***************************************************************************/
87 
88 /*** no vectorization ***/
89 
90 template<typename Func, typename Derived, int Start, int Length>
91 struct redux_novec_unroller
92 {
93  enum {
94  HalfLength = Length/2
95  };
96 
97  typedef typename Derived::Scalar Scalar;
98 
99  EIGEN_DEVICE_FUNC
100  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
101  {
102  return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
103  redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
104  }
105 };
106 
107 template<typename Func, typename Derived, int Start>
108 struct redux_novec_unroller<Func, Derived, Start, 1>
109 {
110  enum {
111  outer = Start / Derived::InnerSizeAtCompileTime,
112  inner = Start % Derived::InnerSizeAtCompileTime
113  };
114 
115  typedef typename Derived::Scalar Scalar;
116 
117  EIGEN_DEVICE_FUNC
118  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
119  {
120  return mat.coeffByOuterInner(outer, inner);
121  }
122 };
123 
124 // This is actually dead code and will never be called. It is required
125 // to prevent false warnings regarding failed inlining though
126 // for 0 length run() will never be called at all.
127 template<typename Func, typename Derived, int Start>
128 struct redux_novec_unroller<Func, Derived, Start, 0>
129 {
130  typedef typename Derived::Scalar Scalar;
131  EIGEN_DEVICE_FUNC
132  static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
133 };
134 
135 /*** vectorization ***/
136 
137 template<typename Func, typename Derived, int Start, int Length>
138 struct redux_vec_unroller
139 {
140  enum {
141  PacketSize = redux_traits<Func, Derived>::PacketSize,
142  HalfLength = Length/2
143  };
144 
145  typedef typename Derived::Scalar Scalar;
146  typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
147 
148  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
149  {
150  return func.packetOp(
151  redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
152  redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
153  }
154 };
155 
156 template<typename Func, typename Derived, int Start>
157 struct redux_vec_unroller<Func, Derived, Start, 1>
158 {
159  enum {
160  index = Start * redux_traits<Func, Derived>::PacketSize,
161  outer = index / int(Derived::InnerSizeAtCompileTime),
162  inner = index % int(Derived::InnerSizeAtCompileTime),
163  alignment = Derived::Alignment
164  };
165 
166  typedef typename Derived::Scalar Scalar;
167  typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
168 
169  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
170  {
171  return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
172  }
173 };
174 
175 /***************************************************************************
176 * Part 3 : implementation of all cases
177 ***************************************************************************/
178 
179 template<typename Func, typename Derived,
180  int Traversal = redux_traits<Func, Derived>::Traversal,
181  int Unrolling = redux_traits<Func, Derived>::Unrolling
182 >
183 struct redux_impl;
184 
185 template<typename Func, typename Derived>
186 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
187 {
188  typedef typename Derived::Scalar Scalar;
189  EIGEN_DEVICE_FUNC
190  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
191  {
192  eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
193  Scalar res;
194  res = mat.coeffByOuterInner(0, 0);
195  for(Index i = 1; i < mat.innerSize(); ++i)
196  res = func(res, mat.coeffByOuterInner(0, i));
197  for(Index i = 1; i < mat.outerSize(); ++i)
198  for(Index j = 0; j < mat.innerSize(); ++j)
199  res = func(res, mat.coeffByOuterInner(i, j));
200  return res;
201  }
202 };
203 
204 template<typename Func, typename Derived>
205 struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
206  : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
207 {};
208 
209 template<typename Func, typename Derived>
210 struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
211 {
212  typedef typename Derived::Scalar Scalar;
213  typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
214 
215  static Scalar run(const Derived &mat, const Func& func)
216  {
217  const Index size = mat.size();
218 
219  const Index packetSize = redux_traits<Func, Derived>::PacketSize;
220  const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
221  enum {
222  alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
223  alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
224  };
225  const Index alignedStart = internal::first_default_aligned(mat.nestedExpression());
226  const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
227  const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
228  const Index alignedEnd2 = alignedStart + alignedSize2;
229  const Index alignedEnd = alignedStart + alignedSize;
230  Scalar res;
231  if(alignedSize)
232  {
233  PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart);
234  if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
235  {
236  PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize);
237  for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
238  {
239  packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index));
240  packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize));
241  }
242 
243  packet_res0 = func.packetOp(packet_res0,packet_res1);
244  if(alignedEnd>alignedEnd2)
245  packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2));
246  }
247  res = func.predux(packet_res0);
248 
249  for(Index index = 0; index < alignedStart; ++index)
250  res = func(res,mat.coeff(index));
251 
252  for(Index index = alignedEnd; index < size; ++index)
253  res = func(res,mat.coeff(index));
254  }
255  else // too small to vectorize anything.
256  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
257  {
258  res = mat.coeff(0);
259  for(Index index = 1; index < size; ++index)
260  res = func(res,mat.coeff(index));
261  }
262 
263  return res;
264  }
265 };
266 
267 // NOTE: for SliceVectorizedTraversal we simply bypass unrolling
268 template<typename Func, typename Derived, int Unrolling>
269 struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
270 {
271  typedef typename Derived::Scalar Scalar;
272  typedef typename redux_traits<Func, Derived>::PacketType PacketType;
273 
274  EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
275  {
276  eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
277  const Index innerSize = mat.innerSize();
278  const Index outerSize = mat.outerSize();
279  enum {
280  packetSize = redux_traits<Func, Derived>::PacketSize
281  };
282  const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
283  Scalar res;
284  if(packetedInnerSize)
285  {
286  PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0);
287  for(Index j=0; j<outerSize; ++j)
288  for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
289  packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i));
290 
291  res = func.predux(packet_res);
292  for(Index j=0; j<outerSize; ++j)
293  for(Index i=packetedInnerSize; i<innerSize; ++i)
294  res = func(res, mat.coeffByOuterInner(j,i));
295  }
296  else // too small to vectorize anything.
297  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
298  {
299  res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
300  }
301 
302  return res;
303  }
304 };
305 
306 template<typename Func, typename Derived>
307 struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
308 {
309  typedef typename Derived::Scalar Scalar;
310 
311  typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
312  enum {
313  PacketSize = redux_traits<Func, Derived>::PacketSize,
314  Size = Derived::SizeAtCompileTime,
315  VectorizedSize = (Size / PacketSize) * PacketSize
316  };
317  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
318  {
319  eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
320  if (VectorizedSize > 0) {
321  Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
322  if (VectorizedSize != Size)
323  res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
324  return res;
325  }
326  else {
327  return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func);
328  }
329  }
330 };
331 
332 // evaluator adaptor
333 template<typename _XprType>
334 class redux_evaluator
335 {
336 public:
337  typedef _XprType XprType;
338  EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
339 
340  typedef typename XprType::Scalar Scalar;
341  typedef typename XprType::CoeffReturnType CoeffReturnType;
342  typedef typename XprType::PacketScalar PacketScalar;
343  typedef typename XprType::PacketReturnType PacketReturnType;
344 
345  enum {
346  MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
347  MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
348  // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
349  Flags = evaluator<XprType>::Flags & ~DirectAccessBit,
350  IsRowMajor = XprType::IsRowMajor,
351  SizeAtCompileTime = XprType::SizeAtCompileTime,
352  InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
353  CoeffReadCost = evaluator<XprType>::CoeffReadCost,
354  Alignment = evaluator<XprType>::Alignment
355  };
356 
357  EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
358  EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
359  EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
360  EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
361  EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }
362 
363  EIGEN_DEVICE_FUNC
364  CoeffReturnType coeff(Index row, Index col) const
365  { return m_evaluator.coeff(row, col); }
366 
367  EIGEN_DEVICE_FUNC
368  CoeffReturnType coeff(Index index) const
369  { return m_evaluator.coeff(index); }
370 
371  template<int LoadMode, typename PacketType>
372  PacketType packet(Index row, Index col) const
373  { return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
374 
375  template<int LoadMode, typename PacketType>
376  PacketType packet(Index index) const
377  { return m_evaluator.template packet<LoadMode,PacketType>(index); }
378 
379  EIGEN_DEVICE_FUNC
380  CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
381  { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
382 
383  template<int LoadMode, typename PacketType>
384  PacketType packetByOuterInner(Index outer, Index inner) const
385  { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
386 
387  const XprType & nestedExpression() const { return m_xpr; }
388 
389 protected:
390  internal::evaluator<XprType> m_evaluator;
391  const XprType &m_xpr;
392 };
393 
394 } // end namespace internal
395 
396 /***************************************************************************
397 * Part 4 : public API
398 ***************************************************************************/
399 
400 
408 template<typename Derived>
409 template<typename Func>
410 EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar
411 DenseBase<Derived>::redux(const Func& func) const
412 {
413  eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
414 
415  typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
416  ThisEvaluator thisEval(derived());
417 
418  return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
419 }
420 
424 template<typename Derived>
425 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
427 {
428  return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
429 }
430 
434 template<typename Derived>
435 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
437 {
438  return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
439 }
440 
447 template<typename Derived>
448 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
450 {
451  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
452  return Scalar(0);
453  return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
454 }
455 
460 template<typename Derived>
461 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
463 {
464 #ifdef __INTEL_COMPILER
465  #pragma warning push
466  #pragma warning ( disable : 2259 )
467 #endif
468  return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
469 #ifdef __INTEL_COMPILER
470  #pragma warning pop
471 #endif
472 }
473 
481 template<typename Derived>
482 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
484 {
485  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
486  return Scalar(1);
487  return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
488 }
489 
496 template<typename Derived>
497 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
499 {
500  return derived().diagonal().sum();
501 }
502 
503 } // end namespace Eigen
504 
505 #endif // EIGEN_REDUX_H
internal::traits< Derived >::Scalar Scalar
Definition: DenseBase.h:66
const int HugeCost
Definition: Constants.h:43
Scalar prod() const
Definition: Redux.h:483
const unsigned int DirectAccessBit
Definition: Constants.h:154
internal::traits< Derived >::Scalar maxCoeff() const
Definition: Redux.h:436
Definition: Constants.h:232
Base class for all dense matrices, vectors, and arrays.
Definition: DenseBase.h:41
Scalar mean() const
Definition: Redux.h:462
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:33
Scalar sum() const
Definition: Redux.h:449
internal::traits< Derived >::Scalar minCoeff() const
Definition: Redux.h:426
Scalar trace() const
Definition: Redux.h:498
const int Dynamic
Definition: Constants.h:21
const unsigned int ActualPacketAccessBit
Definition: Constants.h:104
const unsigned int LinearAccessBit
Definition: Constants.h:129