Eigen  3.4.90 (git rev a4098ac676528a83cfb73d4d26ce1b42ec05f47c)
Redux.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_REDUX_H
12#define EIGEN_REDUX_H
13
14#include "./InternalHeaderCheck.h"
15
16namespace Eigen {
17
18namespace internal {
19
20// TODO
21// * implement other kind of vectorization
22// * factorize code
23
24/***************************************************************************
25* Part 1 : the logic deciding a strategy for vectorization and unrolling
26***************************************************************************/
27
28template<typename Func, typename Evaluator>
29struct redux_traits
30{
31public:
32 typedef typename find_best_packet<typename Evaluator::Scalar,Evaluator::SizeAtCompileTime>::type PacketType;
33 enum {
34 PacketSize = unpacket_traits<PacketType>::size,
35 InnerMaxSize = int(Evaluator::IsRowMajor)
36 ? Evaluator::MaxColsAtCompileTime
37 : Evaluator::MaxRowsAtCompileTime,
38 OuterMaxSize = int(Evaluator::IsRowMajor)
39 ? Evaluator::MaxRowsAtCompileTime
40 : Evaluator::MaxColsAtCompileTime,
41 SliceVectorizedWork = int(InnerMaxSize)==Dynamic ? Dynamic
42 : int(OuterMaxSize)==Dynamic ? (int(InnerMaxSize)>=int(PacketSize) ? Dynamic : 0)
43 : (int(InnerMaxSize)/int(PacketSize)) * int(OuterMaxSize)
44 };
45
46 enum {
47 MightVectorize = (int(Evaluator::Flags)&ActualPacketAccessBit)
48 && (functor_traits<Func>::PacketAccess),
49 MayLinearVectorize = bool(MightVectorize) && (int(Evaluator::Flags)&LinearAccessBit),
50 MaySliceVectorize = bool(MightVectorize) && (int(SliceVectorizedWork)==Dynamic || int(SliceVectorizedWork)>=3)
51 };
52
53public:
54 enum {
55 Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
56 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
57 : int(DefaultTraversal)
58 };
59
60public:
61 enum {
62 Cost = Evaluator::SizeAtCompileTime == Dynamic ? HugeCost
63 : int(Evaluator::SizeAtCompileTime) * int(Evaluator::CoeffReadCost) + (Evaluator::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
64 UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
65 };
66
67public:
68 enum {
69 Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
70 };
71
72#ifdef EIGEN_DEBUG_ASSIGN
73 static void debug()
74 {
75 std::cerr << "Xpr: " << typeid(typename Evaluator::XprType).name() << std::endl;
76 std::cerr.setf(std::ios::hex, std::ios::basefield);
77 EIGEN_DEBUG_VAR(Evaluator::Flags)
78 std::cerr.unsetf(std::ios::hex);
79 EIGEN_DEBUG_VAR(InnerMaxSize)
80 EIGEN_DEBUG_VAR(OuterMaxSize)
81 EIGEN_DEBUG_VAR(SliceVectorizedWork)
82 EIGEN_DEBUG_VAR(PacketSize)
83 EIGEN_DEBUG_VAR(MightVectorize)
84 EIGEN_DEBUG_VAR(MayLinearVectorize)
85 EIGEN_DEBUG_VAR(MaySliceVectorize)
86 std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
87 EIGEN_DEBUG_VAR(UnrollingLimit)
88 std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
89 std::cerr << std::endl;
90 }
91#endif
92};
93
94/***************************************************************************
95* Part 2 : unrollers
96***************************************************************************/
97
98/*** no vectorization ***/
99
100template<typename Func, typename Evaluator, int Start, int Length>
101struct redux_novec_unroller
102{
103 enum {
104 HalfLength = Length/2
105 };
106
107 typedef typename Evaluator::Scalar Scalar;
108
109 EIGEN_DEVICE_FUNC
110 static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func& func)
111 {
112 return func(redux_novec_unroller<Func, Evaluator, Start, HalfLength>::run(eval,func),
113 redux_novec_unroller<Func, Evaluator, Start+HalfLength, Length-HalfLength>::run(eval,func));
114 }
115};
116
117template<typename Func, typename Evaluator, int Start>
118struct redux_novec_unroller<Func, Evaluator, Start, 1>
119{
120 enum {
121 outer = Start / Evaluator::InnerSizeAtCompileTime,
122 inner = Start % Evaluator::InnerSizeAtCompileTime
123 };
124
125 typedef typename Evaluator::Scalar Scalar;
126
127 EIGEN_DEVICE_FUNC
128 static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func&)
129 {
130 return eval.coeffByOuterInner(outer, inner);
131 }
132};
133
134// This is actually dead code and will never be called. It is required
135// to prevent false warnings regarding failed inlining though
136// for 0 length run() will never be called at all.
137template<typename Func, typename Evaluator, int Start>
138struct redux_novec_unroller<Func, Evaluator, Start, 0>
139{
140 typedef typename Evaluator::Scalar Scalar;
141 EIGEN_DEVICE_FUNC
142 static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
143};
144
145/*** vectorization ***/
146
147template<typename Func, typename Evaluator, int Start, int Length>
148struct redux_vec_unroller
149{
150 template<typename PacketType>
151 EIGEN_DEVICE_FUNC
152 static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func)
153 {
154 enum {
155 PacketSize = unpacket_traits<PacketType>::size,
156 HalfLength = Length/2
157 };
158
159 return func.packetOp(
160 redux_vec_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval,func),
161 redux_vec_unroller<Func, Evaluator, Start+HalfLength, Length-HalfLength>::template run<PacketType>(eval,func) );
162 }
163};
164
165template<typename Func, typename Evaluator, int Start>
166struct redux_vec_unroller<Func, Evaluator, Start, 1>
167{
168 template<typename PacketType>
169 EIGEN_DEVICE_FUNC
170 static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&)
171 {
172 enum {
173 PacketSize = unpacket_traits<PacketType>::size,
174 index = Start * PacketSize,
175 outer = index / int(Evaluator::InnerSizeAtCompileTime),
176 inner = index % int(Evaluator::InnerSizeAtCompileTime),
177 alignment = Evaluator::Alignment
178 };
179 return eval.template packetByOuterInner<alignment,PacketType>(outer, inner);
180 }
181};
182
183/***************************************************************************
184* Part 3 : implementation of all cases
185***************************************************************************/
186
187template<typename Func, typename Evaluator,
188 int Traversal = redux_traits<Func, Evaluator>::Traversal,
189 int Unrolling = redux_traits<Func, Evaluator>::Unrolling
190>
191struct redux_impl;
192
193template<typename Func, typename Evaluator>
194struct redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>
195{
196 typedef typename Evaluator::Scalar Scalar;
197
198 template<typename XprType>
199 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
200 Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
201 {
202 eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
203 Scalar res;
204 res = eval.coeffByOuterInner(0, 0);
205 for(Index i = 1; i < xpr.innerSize(); ++i)
206 res = func(res, eval.coeffByOuterInner(0, i));
207 for(Index i = 1; i < xpr.outerSize(); ++i)
208 for(Index j = 0; j < xpr.innerSize(); ++j)
209 res = func(res, eval.coeffByOuterInner(i, j));
210 return res;
211 }
212};
213
214template<typename Func, typename Evaluator>
215struct redux_impl<Func,Evaluator, DefaultTraversal, CompleteUnrolling>
216 : redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime>
217{
218 typedef redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime> Base;
219 typedef typename Evaluator::Scalar Scalar;
220 template<typename XprType>
221 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
222 Scalar run(const Evaluator &eval, const Func& func, const XprType& /*xpr*/)
223 {
224 return Base::run(eval,func);
225 }
226};
227
228template<typename Func, typename Evaluator>
229struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling>
230{
231 typedef typename Evaluator::Scalar Scalar;
232 typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
233
234 template<typename XprType>
235 static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
236 {
237 const Index size = xpr.size();
238
239 const Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
240 const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
241 enum {
242 alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
243 alignment = plain_enum_max(alignment0, Evaluator::Alignment)
244 };
245 const Index alignedStart = internal::first_default_aligned(xpr);
246 const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
247 const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
248 const Index alignedEnd2 = alignedStart + alignedSize2;
249 const Index alignedEnd = alignedStart + alignedSize;
250 Scalar res;
251 if(alignedSize)
252 {
253 PacketScalar packet_res0 = eval.template packet<alignment,PacketScalar>(alignedStart);
254 if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
255 {
256 PacketScalar packet_res1 = eval.template packet<alignment,PacketScalar>(alignedStart+packetSize);
257 for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
258 {
259 packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(index));
260 packet_res1 = func.packetOp(packet_res1, eval.template packet<alignment,PacketScalar>(index+packetSize));
261 }
262
263 packet_res0 = func.packetOp(packet_res0,packet_res1);
264 if(alignedEnd>alignedEnd2)
265 packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(alignedEnd2));
266 }
267 res = func.predux(packet_res0);
268
269 for(Index index = 0; index < alignedStart; ++index)
270 res = func(res,eval.coeff(index));
271
272 for(Index index = alignedEnd; index < size; ++index)
273 res = func(res,eval.coeff(index));
274 }
275 else // too small to vectorize anything.
276 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
277 {
278 res = eval.coeff(0);
279 for(Index index = 1; index < size; ++index)
280 res = func(res,eval.coeff(index));
281 }
282
283 return res;
284 }
285};
286
287// NOTE: for SliceVectorizedTraversal we simply bypass unrolling
288template<typename Func, typename Evaluator, int Unrolling>
289struct redux_impl<Func, Evaluator, SliceVectorizedTraversal, Unrolling>
290{
291 typedef typename Evaluator::Scalar Scalar;
292 typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
293
294 template<typename XprType>
295 EIGEN_DEVICE_FUNC static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
296 {
297 eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
298 const Index innerSize = xpr.innerSize();
299 const Index outerSize = xpr.outerSize();
300 enum {
301 packetSize = redux_traits<Func, Evaluator>::PacketSize
302 };
303 const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
304 Scalar res;
305 if(packetedInnerSize)
306 {
307 PacketType packet_res = eval.template packet<Unaligned,PacketType>(0,0);
308 for(Index j=0; j<outerSize; ++j)
309 for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
310 packet_res = func.packetOp(packet_res, eval.template packetByOuterInner<Unaligned,PacketType>(j,i));
311
312 res = func.predux(packet_res);
313 for(Index j=0; j<outerSize; ++j)
314 for(Index i=packetedInnerSize; i<innerSize; ++i)
315 res = func(res, eval.coeffByOuterInner(j,i));
316 }
317 else // too small to vectorize anything.
318 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
319 {
320 res = redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>::run(eval, func, xpr);
321 }
322
323 return res;
324 }
325};
326
327template<typename Func, typename Evaluator>
328struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, CompleteUnrolling>
329{
330 typedef typename Evaluator::Scalar Scalar;
331
332 typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
333 enum {
334 PacketSize = redux_traits<Func, Evaluator>::PacketSize,
335 Size = Evaluator::SizeAtCompileTime,
336 VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize)
337 };
338
339 template<typename XprType>
340 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
341 Scalar run(const Evaluator &eval, const Func& func, const XprType &xpr)
342 {
343 EIGEN_ONLY_USED_FOR_DEBUG(xpr)
344 eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
345 if (VectorizedSize > 0) {
346 Scalar res = func.predux(redux_vec_unroller<Func, Evaluator, 0, Size / PacketSize>::template run<PacketType>(eval,func));
347 if (VectorizedSize != Size)
348 res = func(res,redux_novec_unroller<Func, Evaluator, VectorizedSize, Size-VectorizedSize>::run(eval,func));
349 return res;
350 }
351 else {
352 return redux_novec_unroller<Func, Evaluator, 0, Size>::run(eval,func);
353 }
354 }
355};
356
357// evaluator adaptor
358template<typename XprType_>
359class redux_evaluator : public internal::evaluator<XprType_>
360{
361 typedef internal::evaluator<XprType_> Base;
362public:
363 typedef XprType_ XprType;
364 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
365 explicit redux_evaluator(const XprType &xpr) : Base(xpr) {}
366
367 typedef typename XprType::Scalar Scalar;
368 typedef typename XprType::CoeffReturnType CoeffReturnType;
369 typedef typename XprType::PacketScalar PacketScalar;
370
371 enum {
372 MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
373 MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
374 // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
375 Flags = Base::Flags & ~DirectAccessBit,
376 IsRowMajor = XprType::IsRowMajor,
377 SizeAtCompileTime = XprType::SizeAtCompileTime,
378 InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime
379 };
380
381 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
382 CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
383 { return Base::coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
384
385 template<int LoadMode, typename PacketType>
386 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
387 PacketType packetByOuterInner(Index outer, Index inner) const
388 { return Base::template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
389
390};
391
392} // end namespace internal
393
394/***************************************************************************
395* Part 4 : public API
396***************************************************************************/
397
398
408template<typename Derived>
409template<typename Func>
410EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
411DenseBase<Derived>::redux(const Func& func) const
412{
413 eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
414
415 typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
416 ThisEvaluator thisEval(derived());
417
418 // The initial expression is passed to the reducer as an additional argument instead of
419 // passing it as a member of redux_evaluator to help
420 return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func, derived());
421}
422
430template<typename Derived>
431template<int NaNPropagation>
432EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
434{
435 return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar, NaNPropagation>());
445template<typename Derived>
446template<int NaNPropagation>
447EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
449{
450 return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar, NaNPropagation>());
451}
452
459template<typename Derived>
460EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
462{
463 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
464 return Scalar(0);
465 return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
466}
467
472template<typename Derived>
473EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
475{
476#ifdef __INTEL_COMPILER
477 #pragma warning push
478 #pragma warning ( disable : 2259 )
479#endif
480 return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
481#ifdef __INTEL_COMPILER
482 #pragma warning pop
483#endif
484}
485
493template<typename Derived>
494EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
496{
497 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
498 return Scalar(1);
499 return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
500}
501
508template<typename Derived>
509EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
511{
512 return derived().diagonal().sum();
513}
514
515} // end namespace Eigen
516
517#endif // EIGEN_REDUX_H
Base class for all dense matrices, vectors, and arrays.
Definition: DenseBase.h:42
internal::traits< Derived >::Scalar minCoeff() const
Definition: Redux.h:433
Scalar mean() const
Definition: Redux.h:474
internal::traits< Derived >::Scalar Scalar
Definition: DenseBase.h:61
internal::traits< Derived >::Scalar maxCoeff() const
Definition: Redux.h:448
Scalar sum() const
Definition: Redux.h:461
Scalar prod() const
Definition: Redux.h:495
Scalar trace() const
Definition: Redux.h:510
@ Unaligned
Definition: Constants.h:235
const unsigned int ActualPacketAccessBit
Definition: Constants.h:107
const unsigned int LinearAccessBit
Definition: Constants.h:132
const unsigned int DirectAccessBit
Definition: Constants.h:157
Namespace containing all symbols from the Eigen library.
Definition: B01_Experimental.dox:1
const int HugeCost
Definition: Constants.h:46
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:59
const int Dynamic
Definition: Constants.h:24