Please, help us to better know about our user community by answering the following short survey: https://forms.gle/wpyrxWi18ox9Z5ae9
Eigen  3.4.99 (git rev 199c5f2b47eb1f8e5a2d20e60f07e97cd95a6ba6)
Redux.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_REDUX_H
12 #define EIGEN_REDUX_H
13 
14 namespace Eigen {
15 
16 namespace internal {
17 
18 // TODO
19 // * implement other kind of vectorization
20 // * factorize code
21 
22 /***************************************************************************
23 * Part 1 : the logic deciding a strategy for vectorization and unrolling
24 ***************************************************************************/
25 
26 template<typename Func, typename Evaluator>
27 struct redux_traits
28 {
29 public:
30  typedef typename find_best_packet<typename Evaluator::Scalar,Evaluator::SizeAtCompileTime>::type PacketType;
31  enum {
32  PacketSize = unpacket_traits<PacketType>::size,
33  InnerMaxSize = int(Evaluator::IsRowMajor)
34  ? Evaluator::MaxColsAtCompileTime
35  : Evaluator::MaxRowsAtCompileTime,
36  OuterMaxSize = int(Evaluator::IsRowMajor)
37  ? Evaluator::MaxRowsAtCompileTime
38  : Evaluator::MaxColsAtCompileTime,
39  SliceVectorizedWork = int(InnerMaxSize)==Dynamic ? Dynamic
40  : int(OuterMaxSize)==Dynamic ? (int(InnerMaxSize)>=int(PacketSize) ? Dynamic : 0)
41  : (int(InnerMaxSize)/int(PacketSize)) * int(OuterMaxSize)
42  };
43 
44  enum {
45  MightVectorize = (int(Evaluator::Flags)&ActualPacketAccessBit)
46  && (functor_traits<Func>::PacketAccess),
47  MayLinearVectorize = bool(MightVectorize) && (int(Evaluator::Flags)&LinearAccessBit),
48  MaySliceVectorize = bool(MightVectorize) && (int(SliceVectorizedWork)==Dynamic || int(SliceVectorizedWork)>=3)
49  };
50 
51 public:
52  enum {
53  Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
54  : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
55  : int(DefaultTraversal)
56  };
57 
58 public:
59  enum {
60  Cost = Evaluator::SizeAtCompileTime == Dynamic ? HugeCost
61  : Evaluator::SizeAtCompileTime * Evaluator::CoeffReadCost + (Evaluator::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
62  UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
63  };
64 
65 public:
66  enum {
67  Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
68  };
69 
70 #ifdef EIGEN_DEBUG_ASSIGN
71  static void debug()
72  {
73  std::cerr << "Xpr: " << typeid(typename Evaluator::XprType).name() << std::endl;
74  std::cerr.setf(std::ios::hex, std::ios::basefield);
75  EIGEN_DEBUG_VAR(Evaluator::Flags)
76  std::cerr.unsetf(std::ios::hex);
77  EIGEN_DEBUG_VAR(InnerMaxSize)
78  EIGEN_DEBUG_VAR(OuterMaxSize)
79  EIGEN_DEBUG_VAR(SliceVectorizedWork)
80  EIGEN_DEBUG_VAR(PacketSize)
81  EIGEN_DEBUG_VAR(MightVectorize)
82  EIGEN_DEBUG_VAR(MayLinearVectorize)
83  EIGEN_DEBUG_VAR(MaySliceVectorize)
84  std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
85  EIGEN_DEBUG_VAR(UnrollingLimit)
86  std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
87  std::cerr << std::endl;
88  }
89 #endif
90 };
91 
92 /***************************************************************************
93 * Part 2 : unrollers
94 ***************************************************************************/
95 
96 /*** no vectorization ***/
97 
98 template<typename Func, typename Evaluator, int Start, int Length>
99 struct redux_novec_unroller
100 {
101  enum {
102  HalfLength = Length/2
103  };
104 
105  typedef typename Evaluator::Scalar Scalar;
106 
107  EIGEN_DEVICE_FUNC
108  static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func& func)
109  {
110  return func(redux_novec_unroller<Func, Evaluator, Start, HalfLength>::run(eval,func),
111  redux_novec_unroller<Func, Evaluator, Start+HalfLength, Length-HalfLength>::run(eval,func));
112  }
113 };
114 
115 template<typename Func, typename Evaluator, int Start>
116 struct redux_novec_unroller<Func, Evaluator, Start, 1>
117 {
118  enum {
119  outer = Start / Evaluator::InnerSizeAtCompileTime,
120  inner = Start % Evaluator::InnerSizeAtCompileTime
121  };
122 
123  typedef typename Evaluator::Scalar Scalar;
124 
125  EIGEN_DEVICE_FUNC
126  static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func&)
127  {
128  return eval.coeffByOuterInner(outer, inner);
129  }
130 };
131 
132 // This is actually dead code and will never be called. It is required
133 // to prevent false warnings regarding failed inlining though
134 // for 0 length run() will never be called at all.
135 template<typename Func, typename Evaluator, int Start>
136 struct redux_novec_unroller<Func, Evaluator, Start, 0>
137 {
138  typedef typename Evaluator::Scalar Scalar;
139  EIGEN_DEVICE_FUNC
140  static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
141 };
142 
143 /*** vectorization ***/
144 
145 template<typename Func, typename Evaluator, int Start, int Length>
146 struct redux_vec_unroller
147 {
148  template<typename PacketType>
149  EIGEN_DEVICE_FUNC
150  static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func)
151  {
152  enum {
153  PacketSize = unpacket_traits<PacketType>::size,
154  HalfLength = Length/2
155  };
156 
157  return func.packetOp(
158  redux_vec_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval,func),
159  redux_vec_unroller<Func, Evaluator, Start+HalfLength, Length-HalfLength>::template run<PacketType>(eval,func) );
160  }
161 };
162 
163 template<typename Func, typename Evaluator, int Start>
164 struct redux_vec_unroller<Func, Evaluator, Start, 1>
165 {
166  template<typename PacketType>
167  EIGEN_DEVICE_FUNC
168  static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&)
169  {
170  enum {
171  PacketSize = unpacket_traits<PacketType>::size,
172  index = Start * PacketSize,
173  outer = index / int(Evaluator::InnerSizeAtCompileTime),
174  inner = index % int(Evaluator::InnerSizeAtCompileTime),
175  alignment = Evaluator::Alignment
176  };
177  return eval.template packetByOuterInner<alignment,PacketType>(outer, inner);
178  }
179 };
180 
181 /***************************************************************************
182 * Part 3 : implementation of all cases
183 ***************************************************************************/
184 
185 template<typename Func, typename Evaluator,
186  int Traversal = redux_traits<Func, Evaluator>::Traversal,
187  int Unrolling = redux_traits<Func, Evaluator>::Unrolling
188 >
189 struct redux_impl;
190 
191 template<typename Func, typename Evaluator>
192 struct redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>
193 {
194  typedef typename Evaluator::Scalar Scalar;
195 
196  template<typename XprType>
197  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
198  Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
199  {
200  eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
201  Scalar res;
202  res = eval.coeffByOuterInner(0, 0);
203  for(Index i = 1; i < xpr.innerSize(); ++i)
204  res = func(res, eval.coeffByOuterInner(0, i));
205  for(Index i = 1; i < xpr.outerSize(); ++i)
206  for(Index j = 0; j < xpr.innerSize(); ++j)
207  res = func(res, eval.coeffByOuterInner(i, j));
208  return res;
209  }
210 };
211 
212 template<typename Func, typename Evaluator>
213 struct redux_impl<Func,Evaluator, DefaultTraversal, CompleteUnrolling>
214  : redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime>
215 {
216  typedef redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime> Base;
217  typedef typename Evaluator::Scalar Scalar;
218  template<typename XprType>
219  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
220  Scalar run(const Evaluator &eval, const Func& func, const XprType& /*xpr*/)
221  {
222  return Base::run(eval,func);
223  }
224 };
225 
226 template<typename Func, typename Evaluator>
227 struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling>
228 {
229  typedef typename Evaluator::Scalar Scalar;
230  typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
231 
232  template<typename XprType>
233  static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
234  {
235  const Index size = xpr.size();
236 
237  const Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
238  const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
239  enum {
240  alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
241  alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Evaluator::Alignment)
242  };
243  const Index alignedStart = internal::first_default_aligned(xpr);
244  const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
245  const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
246  const Index alignedEnd2 = alignedStart + alignedSize2;
247  const Index alignedEnd = alignedStart + alignedSize;
248  Scalar res;
249  if(alignedSize)
250  {
251  PacketScalar packet_res0 = eval.template packet<alignment,PacketScalar>(alignedStart);
252  if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
253  {
254  PacketScalar packet_res1 = eval.template packet<alignment,PacketScalar>(alignedStart+packetSize);
255  for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
256  {
257  packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(index));
258  packet_res1 = func.packetOp(packet_res1, eval.template packet<alignment,PacketScalar>(index+packetSize));
259  }
260 
261  packet_res0 = func.packetOp(packet_res0,packet_res1);
262  if(alignedEnd>alignedEnd2)
263  packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(alignedEnd2));
264  }
265  res = func.predux(packet_res0);
266 
267  for(Index index = 0; index < alignedStart; ++index)
268  res = func(res,eval.coeff(index));
269 
270  for(Index index = alignedEnd; index < size; ++index)
271  res = func(res,eval.coeff(index));
272  }
273  else // too small to vectorize anything.
274  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
275  {
276  res = eval.coeff(0);
277  for(Index index = 1; index < size; ++index)
278  res = func(res,eval.coeff(index));
279  }
280 
281  return res;
282  }
283 };
284 
285 // NOTE: for SliceVectorizedTraversal we simply bypass unrolling
286 template<typename Func, typename Evaluator, int Unrolling>
287 struct redux_impl<Func, Evaluator, SliceVectorizedTraversal, Unrolling>
288 {
289  typedef typename Evaluator::Scalar Scalar;
290  typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
291 
292  template<typename XprType>
293  EIGEN_DEVICE_FUNC static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
294  {
295  eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
296  const Index innerSize = xpr.innerSize();
297  const Index outerSize = xpr.outerSize();
298  enum {
299  packetSize = redux_traits<Func, Evaluator>::PacketSize
300  };
301  const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
302  Scalar res;
303  if(packetedInnerSize)
304  {
305  PacketType packet_res = eval.template packet<Unaligned,PacketType>(0,0);
306  for(Index j=0; j<outerSize; ++j)
307  for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
308  packet_res = func.packetOp(packet_res, eval.template packetByOuterInner<Unaligned,PacketType>(j,i));
309 
310  res = func.predux(packet_res);
311  for(Index j=0; j<outerSize; ++j)
312  for(Index i=packetedInnerSize; i<innerSize; ++i)
313  res = func(res, eval.coeffByOuterInner(j,i));
314  }
315  else // too small to vectorize anything.
316  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
317  {
318  res = redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>::run(eval, func, xpr);
319  }
320 
321  return res;
322  }
323 };
324 
325 template<typename Func, typename Evaluator>
326 struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, CompleteUnrolling>
327 {
328  typedef typename Evaluator::Scalar Scalar;
329 
330  typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
331  enum {
332  PacketSize = redux_traits<Func, Evaluator>::PacketSize,
333  Size = Evaluator::SizeAtCompileTime,
334  VectorizedSize = (Size / PacketSize) * PacketSize
335  };
336 
337  template<typename XprType>
338  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
339  Scalar run(const Evaluator &eval, const Func& func, const XprType &xpr)
340  {
341  EIGEN_ONLY_USED_FOR_DEBUG(xpr)
342  eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
343  if (VectorizedSize > 0) {
344  Scalar res = func.predux(redux_vec_unroller<Func, Evaluator, 0, Size / PacketSize>::template run<PacketType>(eval,func));
345  if (VectorizedSize != Size)
346  res = func(res,redux_novec_unroller<Func, Evaluator, VectorizedSize, Size-VectorizedSize>::run(eval,func));
347  return res;
348  }
349  else {
350  return redux_novec_unroller<Func, Evaluator, 0, Size>::run(eval,func);
351  }
352  }
353 };
354 
355 // evaluator adaptor
356 template<typename _XprType>
357 class redux_evaluator : public internal::evaluator<_XprType>
358 {
359  typedef internal::evaluator<_XprType> Base;
360 public:
361  typedef _XprType XprType;
362  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
363  explicit redux_evaluator(const XprType &xpr) : Base(xpr) {}
364 
365  typedef typename XprType::Scalar Scalar;
366  typedef typename XprType::CoeffReturnType CoeffReturnType;
367  typedef typename XprType::PacketScalar PacketScalar;
368 
369  enum {
370  MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
371  MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
372  // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
373  Flags = Base::Flags & ~DirectAccessBit,
374  IsRowMajor = XprType::IsRowMajor,
375  SizeAtCompileTime = XprType::SizeAtCompileTime,
376  InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime
377  };
378 
379  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
380  CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
381  { return Base::coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
382 
383  template<int LoadMode, typename PacketType>
384  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
385  PacketType packetByOuterInner(Index outer, Index inner) const
386  { return Base::template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
387 
388 };
389 
390 } // end namespace internal
391 
392 /***************************************************************************
393 * Part 4 : public API
394 ***************************************************************************/
395 
396 
406 template<typename Derived>
407 template<typename Func>
408 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
409 DenseBase<Derived>::redux(const Func& func) const
410 {
411  eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
412 
413  typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
414  ThisEvaluator thisEval(derived());
415 
416  // The initial expression is passed to the reducer as an additional argument instead of
417  // passing it as a member of redux_evaluator to help
418  return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func, derived());
419 }
420 
428 template<typename Derived>
429 template<int NaNPropagation>
430 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
432 {
433  return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar, NaNPropagation>());
434 }
435 
443 template<typename Derived>
444 template<int NaNPropagation>
445 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
447 {
448  return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar, NaNPropagation>());
449 }
450 
457 template<typename Derived>
458 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
460 {
461  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
462  return Scalar(0);
463  return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
464 }
465 
470 template<typename Derived>
471 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
473 {
474 #ifdef __INTEL_COMPILER
475  #pragma warning push
476  #pragma warning ( disable : 2259 )
477 #endif
478  return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
479 #ifdef __INTEL_COMPILER
480  #pragma warning pop
481 #endif
482 }
483 
491 template<typename Derived>
492 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
494 {
495  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
496  return Scalar(1);
497  return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
498 }
499 
506 template<typename Derived>
507 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
509 {
510  return derived().diagonal().sum();
511 }
512 
513 } // end namespace Eigen
514 
515 #endif // EIGEN_REDUX_H
Base class for all dense matrices, vectors, and arrays.
Definition: DenseBase.h:47
internal::traits< Derived >::Scalar minCoeff() const
Definition: Redux.h:431
Scalar mean() const
Definition: Redux.h:472
internal::traits< Derived >::Scalar Scalar
Definition: DenseBase.h:66
internal::traits< Derived >::Scalar maxCoeff() const
Definition: Redux.h:446
Scalar sum() const
Definition: Redux.h:459
Scalar prod() const
Definition: Redux.h:493
Scalar trace() const
Definition: Redux.h:508
@ Unaligned
Definition: Constants.h:233
const unsigned int ActualPacketAccessBit
Definition: Constants.h:105
const unsigned int LinearAccessBit
Definition: Constants.h:130
const unsigned int DirectAccessBit
Definition: Constants.h:155
Namespace containing all symbols from the Eigen library.
Definition: Core:134
const int HugeCost
Definition: Constants.h:44
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
const int Dynamic
Definition: Constants.h:22