Eigen  3.4.90 (git rev 67eeba6e720c5745abc77ae6c92ce0a44aa7b7ae)
Redux.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_REDUX_H
12 #define EIGEN_REDUX_H
13 
14 #include "./InternalHeaderCheck.h"
15 
16 namespace Eigen {
17 
18 namespace internal {
19 
20 // TODO
21 // * implement other kind of vectorization
22 // * factorize code
23 
24 /***************************************************************************
25 * Part 1 : the logic deciding a strategy for vectorization and unrolling
26 ***************************************************************************/
27 
28 template<typename Func, typename Evaluator>
29 struct redux_traits
30 {
31 public:
32  typedef typename find_best_packet<typename Evaluator::Scalar,Evaluator::SizeAtCompileTime>::type PacketType;
33  enum {
34  PacketSize = unpacket_traits<PacketType>::size,
35  InnerMaxSize = int(Evaluator::IsRowMajor)
36  ? Evaluator::MaxColsAtCompileTime
37  : Evaluator::MaxRowsAtCompileTime,
38  OuterMaxSize = int(Evaluator::IsRowMajor)
39  ? Evaluator::MaxRowsAtCompileTime
40  : Evaluator::MaxColsAtCompileTime,
41  SliceVectorizedWork = int(InnerMaxSize)==Dynamic ? Dynamic
42  : int(OuterMaxSize)==Dynamic ? (int(InnerMaxSize)>=int(PacketSize) ? Dynamic : 0)
43  : (int(InnerMaxSize)/int(PacketSize)) * int(OuterMaxSize)
44  };
45 
46  enum {
47  MightVectorize = (int(Evaluator::Flags)&ActualPacketAccessBit)
48  && (functor_traits<Func>::PacketAccess),
49  MayLinearVectorize = bool(MightVectorize) && (int(Evaluator::Flags)&LinearAccessBit),
50  MaySliceVectorize = bool(MightVectorize) && (int(SliceVectorizedWork)==Dynamic || int(SliceVectorizedWork)>=3)
51  };
52 
53 public:
54  enum {
55  Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
56  : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
57  : int(DefaultTraversal)
58  };
59 
60 public:
61  enum {
62  Cost = Evaluator::SizeAtCompileTime == Dynamic ? HugeCost
63  : int(Evaluator::SizeAtCompileTime) * int(Evaluator::CoeffReadCost) + (Evaluator::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
64  UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
65  };
66 
67 public:
68  enum {
69  Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
70  };
71 
72 #ifdef EIGEN_DEBUG_ASSIGN
73  static void debug()
74  {
75  std::cerr << "Xpr: " << typeid(typename Evaluator::XprType).name() << std::endl;
76  std::cerr.setf(std::ios::hex, std::ios::basefield);
77  EIGEN_DEBUG_VAR(Evaluator::Flags)
78  std::cerr.unsetf(std::ios::hex);
79  EIGEN_DEBUG_VAR(InnerMaxSize)
80  EIGEN_DEBUG_VAR(OuterMaxSize)
81  EIGEN_DEBUG_VAR(SliceVectorizedWork)
82  EIGEN_DEBUG_VAR(PacketSize)
83  EIGEN_DEBUG_VAR(MightVectorize)
84  EIGEN_DEBUG_VAR(MayLinearVectorize)
85  EIGEN_DEBUG_VAR(MaySliceVectorize)
86  std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
87  EIGEN_DEBUG_VAR(UnrollingLimit)
88  std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
89  std::cerr << std::endl;
90  }
91 #endif
92 };
93 
94 /***************************************************************************
95 * Part 2 : unrollers
96 ***************************************************************************/
97 
98 /*** no vectorization ***/
99 
100 template<typename Func, typename Evaluator, int Start, int Length>
101 struct redux_novec_unroller
102 {
103  enum {
104  HalfLength = Length/2
105  };
106 
107  typedef typename Evaluator::Scalar Scalar;
108 
109  EIGEN_DEVICE_FUNC
110  static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func& func)
111  {
112  return func(redux_novec_unroller<Func, Evaluator, Start, HalfLength>::run(eval,func),
113  redux_novec_unroller<Func, Evaluator, Start+HalfLength, Length-HalfLength>::run(eval,func));
114  }
115 };
116 
117 template<typename Func, typename Evaluator, int Start>
118 struct redux_novec_unroller<Func, Evaluator, Start, 1>
119 {
120  enum {
121  outer = Start / Evaluator::InnerSizeAtCompileTime,
122  inner = Start % Evaluator::InnerSizeAtCompileTime
123  };
124 
125  typedef typename Evaluator::Scalar Scalar;
126 
127  EIGEN_DEVICE_FUNC
128  static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func&)
129  {
130  return eval.coeffByOuterInner(outer, inner);
131  }
132 };
133 
134 // This is actually dead code and will never be called. It is required
135 // to prevent false warnings regarding failed inlining though
136 // for 0 length run() will never be called at all.
137 template<typename Func, typename Evaluator, int Start>
138 struct redux_novec_unroller<Func, Evaluator, Start, 0>
139 {
140  typedef typename Evaluator::Scalar Scalar;
141  EIGEN_DEVICE_FUNC
142  static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
143 };
144 
145 /*** vectorization ***/
146 
147 template<typename Func, typename Evaluator, int Start, int Length>
148 struct redux_vec_unroller
149 {
150  template<typename PacketType>
151  EIGEN_DEVICE_FUNC
152  static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func)
153  {
154  enum {
155  PacketSize = unpacket_traits<PacketType>::size,
156  HalfLength = Length/2
157  };
158 
159  return func.packetOp(
160  redux_vec_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval,func),
161  redux_vec_unroller<Func, Evaluator, Start+HalfLength, Length-HalfLength>::template run<PacketType>(eval,func) );
162  }
163 };
164 
165 template<typename Func, typename Evaluator, int Start>
166 struct redux_vec_unroller<Func, Evaluator, Start, 1>
167 {
168  template<typename PacketType>
169  EIGEN_DEVICE_FUNC
170  static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&)
171  {
172  enum {
173  PacketSize = unpacket_traits<PacketType>::size,
174  index = Start * PacketSize,
175  outer = index / int(Evaluator::InnerSizeAtCompileTime),
176  inner = index % int(Evaluator::InnerSizeAtCompileTime),
177  alignment = Evaluator::Alignment
178  };
179  return eval.template packetByOuterInner<alignment,PacketType>(outer, inner);
180  }
181 };
182 
183 /***************************************************************************
184 * Part 3 : implementation of all cases
185 ***************************************************************************/
186 
187 template<typename Func, typename Evaluator,
188  int Traversal = redux_traits<Func, Evaluator>::Traversal,
189  int Unrolling = redux_traits<Func, Evaluator>::Unrolling
190 >
191 struct redux_impl;
192 
193 template<typename Func, typename Evaluator>
194 struct redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>
195 {
196  typedef typename Evaluator::Scalar Scalar;
197 
198  template<typename XprType>
199  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
200  Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
201  {
202  eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
203  Scalar res;
204  res = eval.coeffByOuterInner(0, 0);
205  for(Index i = 1; i < xpr.innerSize(); ++i)
206  res = func(res, eval.coeffByOuterInner(0, i));
207  for(Index i = 1; i < xpr.outerSize(); ++i)
208  for(Index j = 0; j < xpr.innerSize(); ++j)
209  res = func(res, eval.coeffByOuterInner(i, j));
210  return res;
211  }
212 };
213 
214 template<typename Func, typename Evaluator>
215 struct redux_impl<Func,Evaluator, DefaultTraversal, CompleteUnrolling>
216  : redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime>
217 {
218  typedef redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime> Base;
219  typedef typename Evaluator::Scalar Scalar;
220  template<typename XprType>
221  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
222  Scalar run(const Evaluator &eval, const Func& func, const XprType& /*xpr*/)
223  {
224  return Base::run(eval,func);
225  }
226 };
227 
228 template<typename Func, typename Evaluator>
229 struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling>
230 {
231  typedef typename Evaluator::Scalar Scalar;
232  typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
233 
234  template<typename XprType>
235  static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
236  {
237  const Index size = xpr.size();
238 
239  const Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
240  const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
241  enum {
242  alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
243  alignment = plain_enum_max(alignment0, Evaluator::Alignment)
244  };
245  const Index alignedStart = internal::first_default_aligned(xpr);
246  const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
247  const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
248  const Index alignedEnd2 = alignedStart + alignedSize2;
249  const Index alignedEnd = alignedStart + alignedSize;
250  Scalar res;
251  if(alignedSize)
252  {
253  PacketScalar packet_res0 = eval.template packet<alignment,PacketScalar>(alignedStart);
254  if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
255  {
256  PacketScalar packet_res1 = eval.template packet<alignment,PacketScalar>(alignedStart+packetSize);
257  for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
258  {
259  packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(index));
260  packet_res1 = func.packetOp(packet_res1, eval.template packet<alignment,PacketScalar>(index+packetSize));
261  }
262 
263  packet_res0 = func.packetOp(packet_res0,packet_res1);
264  if(alignedEnd>alignedEnd2)
265  packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(alignedEnd2));
266  }
267  res = func.predux(packet_res0);
268 
269  for(Index index = 0; index < alignedStart; ++index)
270  res = func(res,eval.coeff(index));
271 
272  for(Index index = alignedEnd; index < size; ++index)
273  res = func(res,eval.coeff(index));
274  }
275  else // too small to vectorize anything.
276  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
277  {
278  res = eval.coeff(0);
279  for(Index index = 1; index < size; ++index)
280  res = func(res,eval.coeff(index));
281  }
282 
283  return res;
284  }
285 };
286 
287 // NOTE: for SliceVectorizedTraversal we simply bypass unrolling
288 template<typename Func, typename Evaluator, int Unrolling>
289 struct redux_impl<Func, Evaluator, SliceVectorizedTraversal, Unrolling>
290 {
291  typedef typename Evaluator::Scalar Scalar;
292  typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
293 
294  template<typename XprType>
295  EIGEN_DEVICE_FUNC static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
296  {
297  eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
298  const Index innerSize = xpr.innerSize();
299  const Index outerSize = xpr.outerSize();
300  enum {
301  packetSize = redux_traits<Func, Evaluator>::PacketSize
302  };
303  const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
304  Scalar res;
305  if(packetedInnerSize)
306  {
307  PacketType packet_res = eval.template packet<Unaligned,PacketType>(0,0);
308  for(Index j=0; j<outerSize; ++j)
309  for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
310  packet_res = func.packetOp(packet_res, eval.template packetByOuterInner<Unaligned,PacketType>(j,i));
311 
312  res = func.predux(packet_res);
313  for(Index j=0; j<outerSize; ++j)
314  for(Index i=packetedInnerSize; i<innerSize; ++i)
315  res = func(res, eval.coeffByOuterInner(j,i));
316  }
317  else // too small to vectorize anything.
318  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
319  {
320  res = redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>::run(eval, func, xpr);
321  }
322 
323  return res;
324  }
325 };
326 
327 template<typename Func, typename Evaluator>
328 struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, CompleteUnrolling>
329 {
330  typedef typename Evaluator::Scalar Scalar;
331 
332  typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
333  enum {
334  PacketSize = redux_traits<Func, Evaluator>::PacketSize,
335  Size = Evaluator::SizeAtCompileTime,
336  VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize)
337  };
338 
339  template<typename XprType>
340  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
341  Scalar run(const Evaluator &eval, const Func& func, const XprType &xpr)
342  {
343  EIGEN_ONLY_USED_FOR_DEBUG(xpr)
344  eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
345  if (VectorizedSize > 0) {
346  Scalar res = func.predux(redux_vec_unroller<Func, Evaluator, 0, Size / PacketSize>::template run<PacketType>(eval,func));
347  if (VectorizedSize != Size)
348  res = func(res,redux_novec_unroller<Func, Evaluator, VectorizedSize, Size-VectorizedSize>::run(eval,func));
349  return res;
350  }
351  else {
352  return redux_novec_unroller<Func, Evaluator, 0, Size>::run(eval,func);
353  }
354  }
355 };
356 
357 // evaluator adaptor
358 template<typename XprType_>
359 class redux_evaluator : public internal::evaluator<XprType_>
360 {
361  typedef internal::evaluator<XprType_> Base;
362 public:
363  typedef XprType_ XprType;
364  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
365  explicit redux_evaluator(const XprType &xpr) : Base(xpr) {}
366 
367  typedef typename XprType::Scalar Scalar;
368  typedef typename XprType::CoeffReturnType CoeffReturnType;
369  typedef typename XprType::PacketScalar PacketScalar;
370 
371  enum {
372  MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
373  MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
374  // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
375  Flags = Base::Flags & ~DirectAccessBit,
376  IsRowMajor = XprType::IsRowMajor,
377  SizeAtCompileTime = XprType::SizeAtCompileTime,
378  InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime
379  };
380 
381  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
382  CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
383  { return Base::coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
384 
385  template<int LoadMode, typename PacketType>
386  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
387  PacketType packetByOuterInner(Index outer, Index inner) const
388  { return Base::template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
389 
390 };
391 
392 } // end namespace internal
393 
394 /***************************************************************************
395 * Part 4 : public API
396 ***************************************************************************/
397 
398 
408 template<typename Derived>
409 template<typename Func>
410 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
411 DenseBase<Derived>::redux(const Func& func) const
412 {
413  eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
414 
415  typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
416  ThisEvaluator thisEval(derived());
417 
418  // The initial expression is passed to the reducer as an additional argument instead of
419  // passing it as a member of redux_evaluator to help
420  return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func, derived());
421 }
422 
430 template<typename Derived>
431 template<int NaNPropagation>
432 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
434 {
435  return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar, NaNPropagation>());
436 }
437 
445 template<typename Derived>
446 template<int NaNPropagation>
447 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
449 {
450  return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar, NaNPropagation>());
451 }
452 
459 template<typename Derived>
460 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
462 {
463  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
464  return Scalar(0);
465  return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
466 }
467 
472 template<typename Derived>
473 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
475 {
476 #ifdef __INTEL_COMPILER
477  #pragma warning push
478  #pragma warning ( disable : 2259 )
479 #endif
480  return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
481 #ifdef __INTEL_COMPILER
482  #pragma warning pop
483 #endif
484 }
485 
493 template<typename Derived>
494 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
496 {
497  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
498  return Scalar(1);
499  return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
500 }
501 
508 template<typename Derived>
509 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
511 {
512  return derived().diagonal().sum();
513 }
514 
515 } // end namespace Eigen
516 
517 #endif // EIGEN_REDUX_H
Base class for all dense matrices, vectors, and arrays.
Definition: DenseBase.h:42
internal::traits< Derived >::Scalar minCoeff() const
Definition: Redux.h:433
Scalar mean() const
Definition: Redux.h:474
internal::traits< Derived >::Scalar Scalar
Definition: DenseBase.h:61
internal::traits< Derived >::Scalar maxCoeff() const
Definition: Redux.h:448
Scalar sum() const
Definition: Redux.h:461
Scalar prod() const
Definition: Redux.h:495
Scalar trace() const
Definition: Redux.h:510
@ Unaligned
Definition: Constants.h:235
const unsigned int ActualPacketAccessBit
Definition: Constants.h:107
const unsigned int LinearAccessBit
Definition: Constants.h:132
const unsigned int DirectAccessBit
Definition: Constants.h:157
Namespace containing all symbols from the Eigen library.
Definition: Core:139
const int HugeCost
Definition: Constants.h:46
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:59
const int Dynamic
Definition: Constants.h:24