Eigen  3.3.90 (mercurial changeset af9e8864a4fc)
Redux.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_REDUX_H
12 #define EIGEN_REDUX_H
13 
14 namespace Eigen {
15 
16 namespace internal {
17 
18 // TODO
19 // * implement other kind of vectorization
20 // * factorize code
21 
22 /***************************************************************************
23 * Part 1 : the logic deciding a strategy for vectorization and unrolling
24 ***************************************************************************/
25 
26 template<typename Func, typename Evaluator>
27 struct redux_traits
28 {
29 public:
30  typedef typename find_best_packet<typename Evaluator::Scalar,Evaluator::SizeAtCompileTime>::type PacketType;
31  enum {
32  PacketSize = unpacket_traits<PacketType>::size,
33  InnerMaxSize = int(Evaluator::IsRowMajor)
34  ? Evaluator::MaxColsAtCompileTime
35  : Evaluator::MaxRowsAtCompileTime
36  };
37 
38  enum {
39  MightVectorize = (int(Evaluator::Flags)&ActualPacketAccessBit)
40  && (functor_traits<Func>::PacketAccess),
41  MayLinearVectorize = bool(MightVectorize) && (int(Evaluator::Flags)&LinearAccessBit),
42  MaySliceVectorize = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize
43  };
44 
45 public:
46  enum {
47  Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
48  : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
49  : int(DefaultTraversal)
50  };
51 
52 public:
53  enum {
54  Cost = Evaluator::SizeAtCompileTime == Dynamic ? HugeCost
55  : Evaluator::SizeAtCompileTime * Evaluator::CoeffReadCost + (Evaluator::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
56  UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
57  };
58 
59 public:
60  enum {
61  Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
62  };
63 
64 #ifdef EIGEN_DEBUG_ASSIGN
65  static void debug()
66  {
67  std::cerr << "Xpr: " << typeid(typename Evaluator::XprType).name() << std::endl;
68  std::cerr.setf(std::ios::hex, std::ios::basefield);
69  EIGEN_DEBUG_VAR(Evaluator::Flags)
70  std::cerr.unsetf(std::ios::hex);
71  EIGEN_DEBUG_VAR(InnerMaxSize)
72  EIGEN_DEBUG_VAR(PacketSize)
73  EIGEN_DEBUG_VAR(MightVectorize)
74  EIGEN_DEBUG_VAR(MayLinearVectorize)
75  EIGEN_DEBUG_VAR(MaySliceVectorize)
76  EIGEN_DEBUG_VAR(Traversal)
77  EIGEN_DEBUG_VAR(UnrollingLimit)
78  EIGEN_DEBUG_VAR(Unrolling)
79  std::cerr << std::endl;
80  }
81 #endif
82 };
83 
84 /***************************************************************************
85 * Part 2 : unrollers
86 ***************************************************************************/
87 
88 /*** no vectorization ***/
89 
90 template<typename Func, typename Evaluator, int Start, int Length>
91 struct redux_novec_unroller
92 {
93  enum {
94  HalfLength = Length/2
95  };
96 
97  typedef typename Evaluator::Scalar Scalar;
98 
99  EIGEN_DEVICE_FUNC
100  static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func& func)
101  {
102  return func(redux_novec_unroller<Func, Evaluator, Start, HalfLength>::run(eval,func),
103  redux_novec_unroller<Func, Evaluator, Start+HalfLength, Length-HalfLength>::run(eval,func));
104  }
105 };
106 
107 template<typename Func, typename Evaluator, int Start>
108 struct redux_novec_unroller<Func, Evaluator, Start, 1>
109 {
110  enum {
111  outer = Start / Evaluator::InnerSizeAtCompileTime,
112  inner = Start % Evaluator::InnerSizeAtCompileTime
113  };
114 
115  typedef typename Evaluator::Scalar Scalar;
116 
117  EIGEN_DEVICE_FUNC
118  static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func&)
119  {
120  return eval.coeffByOuterInner(outer, inner);
121  }
122 };
123 
124 // This is actually dead code and will never be called. It is required
125 // to prevent false warnings regarding failed inlining though
126 // for 0 length run() will never be called at all.
127 template<typename Func, typename Evaluator, int Start>
128 struct redux_novec_unroller<Func, Evaluator, Start, 0>
129 {
130  typedef typename Evaluator::Scalar Scalar;
131  EIGEN_DEVICE_FUNC
132  static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
133 };
134 
135 /*** vectorization ***/
136 
137 template<typename Func, typename Evaluator, int Start, int Length>
138 struct redux_vec_unroller
139 {
140  enum {
141  PacketSize = redux_traits<Func, Evaluator>::PacketSize,
142  HalfLength = Length/2
143  };
144 
145  typedef typename Evaluator::Scalar Scalar;
146  typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
147 
148  static EIGEN_STRONG_INLINE PacketScalar run(const Evaluator &eval, const Func& func)
149  {
150  return func.packetOp(
151  redux_vec_unroller<Func, Evaluator, Start, HalfLength>::run(eval,func),
152  redux_vec_unroller<Func, Evaluator, Start+HalfLength, Length-HalfLength>::run(eval,func) );
153  }
154 };
155 
156 template<typename Func, typename Evaluator, int Start>
157 struct redux_vec_unroller<Func, Evaluator, Start, 1>
158 {
159  enum {
160  index = Start * redux_traits<Func, Evaluator>::PacketSize,
161  outer = index / int(Evaluator::InnerSizeAtCompileTime),
162  inner = index % int(Evaluator::InnerSizeAtCompileTime),
163  alignment = Evaluator::Alignment
164  };
165 
166  typedef typename Evaluator::Scalar Scalar;
167  typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
168 
169  static EIGEN_STRONG_INLINE PacketScalar run(const Evaluator &eval, const Func&)
170  {
171  return eval.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
172  }
173 };
174 
175 /***************************************************************************
176 * Part 3 : implementation of all cases
177 ***************************************************************************/
178 
179 template<typename Func, typename Evaluator,
180  int Traversal = redux_traits<Func, Evaluator>::Traversal,
181  int Unrolling = redux_traits<Func, Evaluator>::Unrolling
182 >
183 struct redux_impl;
184 
185 template<typename Func, typename Evaluator>
186 struct redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>
187 {
188  typedef typename Evaluator::Scalar Scalar;
189 
190  template<typename XprType>
191  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
192  Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
193  {
194  eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
195  Scalar res;
196  res = eval.coeffByOuterInner(0, 0);
197  for(Index i = 1; i < xpr.innerSize(); ++i)
198  res = func(res, eval.coeffByOuterInner(0, i));
199  for(Index i = 1; i < xpr.outerSize(); ++i)
200  for(Index j = 0; j < xpr.innerSize(); ++j)
201  res = func(res, eval.coeffByOuterInner(i, j));
202  return res;
203  }
204 };
205 
206 template<typename Func, typename Evaluator>
207 struct redux_impl<Func,Evaluator, DefaultTraversal, CompleteUnrolling>
208  : redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime>
209 {
210  typedef redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime> Base;
211  typedef typename Evaluator::Scalar Scalar;
212  template<typename XprType>
213  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
214  Scalar run(const Evaluator &eval, const Func& func, const XprType& /*xpr*/)
215  {
216  return Base::run(eval,func);
217  }
218 };
219 
220 template<typename Func, typename Evaluator>
221 struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling>
222 {
223  typedef typename Evaluator::Scalar Scalar;
224  typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
225 
226  template<typename XprType>
227  static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
228  {
229  const Index size = xpr.size();
230 
231  const Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
232  const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
233  enum {
234  alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
235  alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Evaluator::Alignment)
236  };
237  const Index alignedStart = internal::first_default_aligned(xpr);
238  const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
239  const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
240  const Index alignedEnd2 = alignedStart + alignedSize2;
241  const Index alignedEnd = alignedStart + alignedSize;
242  Scalar res;
243  if(alignedSize)
244  {
245  PacketScalar packet_res0 = eval.template packet<alignment,PacketScalar>(alignedStart);
246  if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
247  {
248  PacketScalar packet_res1 = eval.template packet<alignment,PacketScalar>(alignedStart+packetSize);
249  for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
250  {
251  packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(index));
252  packet_res1 = func.packetOp(packet_res1, eval.template packet<alignment,PacketScalar>(index+packetSize));
253  }
254 
255  packet_res0 = func.packetOp(packet_res0,packet_res1);
256  if(alignedEnd>alignedEnd2)
257  packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(alignedEnd2));
258  }
259  res = func.predux(packet_res0);
260 
261  for(Index index = 0; index < alignedStart; ++index)
262  res = func(res,eval.coeff(index));
263 
264  for(Index index = alignedEnd; index < size; ++index)
265  res = func(res,eval.coeff(index));
266  }
267  else // too small to vectorize anything.
268  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
269  {
270  res = eval.coeff(0);
271  for(Index index = 1; index < size; ++index)
272  res = func(res,eval.coeff(index));
273  }
274 
275  return res;
276  }
277 };
278 
279 // NOTE: for SliceVectorizedTraversal we simply bypass unrolling
280 template<typename Func, typename Evaluator, int Unrolling>
281 struct redux_impl<Func, Evaluator, SliceVectorizedTraversal, Unrolling>
282 {
283  typedef typename Evaluator::Scalar Scalar;
284  typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
285 
286  template<typename XprType>
287  EIGEN_DEVICE_FUNC static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
288  {
289  eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
290  const Index innerSize = xpr.innerSize();
291  const Index outerSize = xpr.outerSize();
292  enum {
293  packetSize = redux_traits<Func, Evaluator>::PacketSize
294  };
295  const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
296  Scalar res;
297  if(packetedInnerSize)
298  {
299  PacketType packet_res = eval.template packet<Unaligned,PacketType>(0,0);
300  for(Index j=0; j<outerSize; ++j)
301  for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
302  packet_res = func.packetOp(packet_res, eval.template packetByOuterInner<Unaligned,PacketType>(j,i));
303 
304  res = func.predux(packet_res);
305  for(Index j=0; j<outerSize; ++j)
306  for(Index i=packetedInnerSize; i<innerSize; ++i)
307  res = func(res, eval.coeffByOuterInner(j,i));
308  }
309  else // too small to vectorize anything.
310  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
311  {
312  res = redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>::run(eval, func, xpr);
313  }
314 
315  return res;
316  }
317 };
318 
319 template<typename Func, typename Evaluator>
320 struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, CompleteUnrolling>
321 {
322  typedef typename Evaluator::Scalar Scalar;
323 
324  typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
325  enum {
326  PacketSize = redux_traits<Func, Evaluator>::PacketSize,
327  Size = Evaluator::SizeAtCompileTime,
328  VectorizedSize = (Size / PacketSize) * PacketSize
329  };
330 
331  template<typename XprType>
332  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
333  Scalar run(const Evaluator &eval, const Func& func, const XprType &xpr)
334  {
335  EIGEN_ONLY_USED_FOR_DEBUG(xpr)
336  eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
337  if (VectorizedSize > 0) {
338  Scalar res = func.predux(redux_vec_unroller<Func, Evaluator, 0, Size / PacketSize>::run(eval,func));
339  if (VectorizedSize != Size)
340  res = func(res,redux_novec_unroller<Func, Evaluator, VectorizedSize, Size-VectorizedSize>::run(eval,func));
341  return res;
342  }
343  else {
344  return redux_novec_unroller<Func, Evaluator, 0, Size>::run(eval,func);
345  }
346  }
347 };
348 
349 // evaluator adaptor
350 template<typename _XprType>
351 class redux_evaluator : public internal::evaluator<_XprType>
352 {
353  typedef internal::evaluator<_XprType> Base;
354 public:
355  typedef _XprType XprType;
356  EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : Base(xpr) {}
357 
358  typedef typename XprType::Scalar Scalar;
359  typedef typename XprType::CoeffReturnType CoeffReturnType;
360  typedef typename XprType::PacketScalar PacketScalar;
361 
362  enum {
363  MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
364  MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
365  // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
366  Flags = Base::Flags & ~DirectAccessBit,
367  IsRowMajor = XprType::IsRowMajor,
368  SizeAtCompileTime = XprType::SizeAtCompileTime,
369  InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime
370  };
371 
372  EIGEN_DEVICE_FUNC
373  CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
374  { return Base::coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
375 
376  template<int LoadMode, typename PacketType>
377  PacketType packetByOuterInner(Index outer, Index inner) const
378  { return Base::template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
379 
380 };
381 
382 } // end namespace internal
383 
384 /***************************************************************************
385 * Part 4 : public API
386 ***************************************************************************/
387 
388 
396 template<typename Derived>
397 template<typename Func>
398 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
399 DenseBase<Derived>::redux(const Func& func) const
400 {
401  eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
402 
403  typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
404  ThisEvaluator thisEval(derived());
405 
406  // The initial expression is passed to the reducer as an additional argument instead of
407  // passing it as a member of redux_evaluator to help
408  return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func, derived());
409 }
410 
414 template<typename Derived>
415 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
417 {
418  return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
419 }
420 
424 template<typename Derived>
425 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
427 {
428  return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
429 }
430 
437 template<typename Derived>
438 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
440 {
441  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
442  return Scalar(0);
443  return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
444 }
445 
450 template<typename Derived>
451 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
453 {
454 #ifdef __INTEL_COMPILER
455  #pragma warning push
456  #pragma warning ( disable : 2259 )
457 #endif
458  return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
459 #ifdef __INTEL_COMPILER
460  #pragma warning pop
461 #endif
462 }
463 
471 template<typename Derived>
472 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
474 {
475  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
476  return Scalar(1);
477  return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
478 }
479 
486 template<typename Derived>
487 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
489 {
490  return derived().diagonal().sum();
491 }
492 
493 } // end namespace Eigen
494 
495 #endif // EIGEN_REDUX_H
Scalar trace() const
Definition: Redux.h:488
Scalar sum() const
Definition: Redux.h:439
const int HugeCost
Definition: Constants.h:43
const unsigned int DirectAccessBit
Definition: Constants.h:154
Namespace containing all symbols from the Eigen library.
Definition: Core:117
Definition: Constants.h:232
Base class for all dense matrices, vectors, and arrays.
Definition: DenseBase.h:41
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:42
Scalar prod() const
Definition: Redux.h:473
Definition: Eigen_Colamd.h:50
internal::traits< Derived >::Scalar maxCoeff() const
Definition: Redux.h:426
const int Dynamic
Definition: Constants.h:21
const unsigned int ActualPacketAccessBit
Definition: Constants.h:104
const unsigned int LinearAccessBit
Definition: Constants.h:129
internal::traits< Derived >::Scalar minCoeff() const
Definition: Redux.h:416
Scalar mean() const
Definition: Redux.h:452