Eigen-unsupported  3.3.90 (mercurial changeset edbe6cfd4430)
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Groups Pages
TensorMorphing.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
12 
13 namespace Eigen {
14 
22 namespace internal {
23 template<typename NewDimensions, typename XprType>
24 struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType>
25 {
26  typedef typename XprType::Scalar Scalar;
27  typedef traits<XprType> XprTraits;
28  typedef typename XprTraits::StorageKind StorageKind;
29  typedef typename XprTraits::Index Index;
30  typedef typename XprType::Nested Nested;
31  typedef typename remove_reference<Nested>::type _Nested;
32  static const int NumDimensions = array_size<NewDimensions>::value;
33  static const int Layout = XprTraits::Layout;
34  typedef typename XprTraits::PointerType PointerType;
35 };
36 
37 template<typename NewDimensions, typename XprType>
38 struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense>
39 {
40  typedef const TensorReshapingOp<NewDimensions, XprType>& type;
41 };
42 
43 template<typename NewDimensions, typename XprType>
44 struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type>
45 {
46  typedef TensorReshapingOp<NewDimensions, XprType> type;
47 };
48 
49 } // end namespace internal
50 
51 
52 
53 template<typename NewDimensions, typename XprType>
54 class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors>
55 {
56  public:
57  typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
58  typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
59  typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
60  typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
61  typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index;
62 
63  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims)
64  : m_xpr(expr), m_dims(dims) {}
65 
66  EIGEN_DEVICE_FUNC
67  const NewDimensions& dimensions() const { return m_dims; }
68 
69  EIGEN_DEVICE_FUNC
70  const typename internal::remove_all<typename XprType::Nested>::type&
71  expression() const { return m_xpr; }
72 
73  EIGEN_DEVICE_FUNC
74  EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const TensorReshapingOp& other)
75  {
76  typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign;
77  Assign assign(*this, other);
78  internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
79  return *this;
80  }
81 
82  template<typename OtherDerived>
83  EIGEN_DEVICE_FUNC
84  EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const OtherDerived& other)
85  {
86  typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign;
87  Assign assign(*this, other);
88  internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
89  return *this;
90  }
91 
92  protected:
93  typename XprType::Nested m_xpr;
94  const NewDimensions m_dims;
95 };
96 
97 
98 // Eval as rvalue
99 template<typename NewDimensions, typename ArgType, typename Device>
100 struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
101 {
102  typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
103  typedef NewDimensions Dimensions;
104 
105  enum {
106  IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
107  PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
108  Layout = TensorEvaluator<ArgType, Device>::Layout,
109  CoordAccess = false, // to be implemented
110  RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
111  };
112 
113  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
114  : m_impl(op.expression(), device), m_dimensions(op.dimensions())
115  {
116  // The total size of the reshaped tensor must be equal to the total size
117  // of the input tensor.
118  eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
119  }
120 
121  typedef typename XprType::Index Index;
122  typedef typename XprType::Scalar Scalar;
123  typedef typename XprType::CoeffReturnType CoeffReturnType;
124  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
125 
126  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
127 
128  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
129  return m_impl.evalSubExprsIfNeeded(data);
130  }
131  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
132  m_impl.cleanup();
133  }
134 
135  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
136  {
137  return m_impl.coeff(index);
138  }
139 
140  template<int LoadMode>
141  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
142  {
143  return m_impl.template packet<LoadMode>(index);
144  }
145 
146  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
147  return m_impl.costPerCoeff(vectorized);
148  }
149 
150  EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return const_cast<Scalar*>(m_impl.data()); }
151 
152  EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
153 
154  protected:
155  TensorEvaluator<ArgType, Device> m_impl;
156  NewDimensions m_dimensions;
157 };
158 
159 
160 // Eval as lvalue
161 template<typename NewDimensions, typename ArgType, typename Device>
162  struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device>
163  : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
164 
165 {
166  typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base;
167  typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
168  typedef NewDimensions Dimensions;
169 
170  enum {
171  IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
172  PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
173  Layout = TensorEvaluator<ArgType, Device>::Layout,
174  CoordAccess = false, // to be implemented
175  RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
176  };
177 
178  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
179  : Base(op, device)
180  { }
181 
182  typedef typename XprType::Index Index;
183  typedef typename XprType::Scalar Scalar;
184  typedef typename XprType::CoeffReturnType CoeffReturnType;
185  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
186 
187  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
188  {
189  return this->m_impl.coeffRef(index);
190  }
191  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
192  void writePacket(Index index, const PacketReturnType& x)
193  {
194  this->m_impl.template writePacket<StoreMode>(index, x);
195  }
196 };
197 
198 
206 namespace internal {
207 template<typename StartIndices, typename Sizes, typename XprType>
208 struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType>
209 {
210  typedef typename XprType::Scalar Scalar;
211  typedef traits<XprType> XprTraits;
212  typedef typename XprTraits::StorageKind StorageKind;
213  typedef typename XprTraits::Index Index;
214  typedef typename XprType::Nested Nested;
215  typedef typename remove_reference<Nested>::type _Nested;
216  static const int NumDimensions = array_size<StartIndices>::value;
217  static const int Layout = XprTraits::Layout;
218  typedef typename XprTraits::PointerType PointerType;
219 };
220 
221 template<typename StartIndices, typename Sizes, typename XprType>
222 struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense>
223 {
224  typedef const TensorSlicingOp<StartIndices, Sizes, XprType>& type;
225 };
226 
227 template<typename StartIndices, typename Sizes, typename XprType>
228 struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type>
229 {
230  typedef TensorSlicingOp<StartIndices, Sizes, XprType> type;
231 };
232 
233 } // end namespace internal
234 
235 
236 
237 template<typename StartIndices, typename Sizes, typename XprType>
238 class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> >
239 {
240  public:
241  typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar;
242  typedef typename XprType::CoeffReturnType CoeffReturnType;
243  typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested;
244  typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind;
245  typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index;
246 
247  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes)
248  : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
249 
250  EIGEN_DEVICE_FUNC
251  const StartIndices& startIndices() const { return m_indices; }
252  EIGEN_DEVICE_FUNC
253  const Sizes& sizes() const { return m_sizes; }
254 
255  EIGEN_DEVICE_FUNC
256  const typename internal::remove_all<typename XprType::Nested>::type&
257  expression() const { return m_xpr; }
258 
259  template<typename OtherDerived>
260  EIGEN_DEVICE_FUNC
261  EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const OtherDerived& other)
262  {
263  typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign;
264  Assign assign(*this, other);
265  internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
266  return *this;
267  }
268 
269  EIGEN_DEVICE_FUNC
270  EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const TensorSlicingOp& other)
271  {
272  typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign;
273  Assign assign(*this, other);
274  internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
275  return *this;
276  }
277 
278 
279  protected:
280  typename XprType::Nested m_xpr;
281  const StartIndices m_indices;
282  const Sizes m_sizes;
283 };
284 
285 
286 // Fixme: figure out the exact threshold
287 namespace {
288 template <typename Index, typename Device> struct MemcpyTriggerForSlicing {
289  EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
290  EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > threshold_; }
291 
292  private:
293  Index threshold_;
294 };
295 
296 // It is very expensive to start the memcpy kernel on GPU: we therefore only
297 // use it for large copies.
298 #ifdef EIGEN_USE_GPU
299 template <typename Index> struct MemcpyTriggerForSlicing<Index, GpuDevice> {
300  EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { }
301  EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; }
302 };
303 #endif
304 
305 // It is very expensive to start the memcpy kernel on GPU: we therefore only
306 // use it for large copies.
307 #ifdef EIGEN_USE_SYCL
308 template <typename Index> struct MemcpyTriggerForSlicing<Index, const Eigen::SyclDevice> {
309  EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const SyclDevice&) { }
310  EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; }
311 };
312 #endif
313 
314 }
315 
316 // Eval as rvalue
317 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
318 struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
319 {
320  typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
321  static const int NumDims = internal::array_size<Sizes>::value;
322 
323  enum {
324  // Alignment can't be guaranteed at compile time since it depends on the
325  // slice offsets and sizes.
326  IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
327  PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
328  Layout = TensorEvaluator<ArgType, Device>::Layout,
329  CoordAccess = false,
330  RawAccess = false
331  };
332 
333  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
334  : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
335  {
336  for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
337  eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]);
338  }
339 
340  const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
341  const Sizes& output_dims = op.sizes();
342  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
343  m_inputStrides[0] = 1;
344  for (int i = 1; i < NumDims; ++i) {
345  m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
346  }
347 
348  // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
349  m_outputStrides[0] = 1;
350  for (int i = 1; i < NumDims; ++i) {
351  m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
352  m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
353  }
354  } else {
355  m_inputStrides[NumDims-1] = 1;
356  for (int i = NumDims - 2; i >= 0; --i) {
357  m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
358  }
359 
360  // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed.
361  m_outputStrides[NumDims-1] = 1;
362  for (int i = NumDims - 2; i >= 0; --i) {
363  m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
364  m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
365  }
366  }
367  }
368 
369  typedef typename XprType::Index Index;
370  typedef typename XprType::Scalar Scalar;
371  typedef typename XprType::CoeffReturnType CoeffReturnType;
372  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
373  typedef Sizes Dimensions;
374 
375  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
376 
377 
378  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
379  m_impl.evalSubExprsIfNeeded(NULL);
380  if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data && m_impl.data()) {
381  Index contiguous_values = 1;
382  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
383  for (int i = 0; i < NumDims; ++i) {
384  contiguous_values *= dimensions()[i];
385  if (dimensions()[i] != m_impl.dimensions()[i]) {
386  break;
387  }
388  }
389  } else {
390  for (int i = NumDims-1; i >= 0; --i) {
391  contiguous_values *= dimensions()[i];
392  if (dimensions()[i] != m_impl.dimensions()[i]) {
393  break;
394  }
395  }
396  }
397  // Use memcpy if it's going to be faster than using the regular evaluation.
398  const MemcpyTriggerForSlicing<Index, Device> trigger(m_device);
399  if (trigger(contiguous_values)) {
400  Scalar* src = (Scalar*)m_impl.data();
401  for (int i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
402  Index offset = srcCoeff(i);
403  m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar));
404  }
405  return false;
406  }
407  }
408  return true;
409  }
410 
411  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
412  m_impl.cleanup();
413  }
414 
415  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
416  {
417  return m_impl.coeff(srcCoeff(index));
418  }
419 
420  template<int LoadMode>
421  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
422  {
423  const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
424  EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
425  eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
426 
427  Index inputIndices[] = {0, 0};
428  Index indices[] = {index, index + packetSize - 1};
429  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
430  for (int i = NumDims - 1; i > 0; --i) {
431  const Index idx0 = indices[0] / m_fastOutputStrides[i];
432  const Index idx1 = indices[1] / m_fastOutputStrides[i];
433  inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
434  inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
435  indices[0] -= idx0 * m_outputStrides[i];
436  indices[1] -= idx1 * m_outputStrides[i];
437  }
438  inputIndices[0] += (indices[0] + m_offsets[0]);
439  inputIndices[1] += (indices[1] + m_offsets[0]);
440  } else {
441  for (int i = 0; i < NumDims - 1; ++i) {
442  const Index idx0 = indices[0] / m_fastOutputStrides[i];
443  const Index idx1 = indices[1] / m_fastOutputStrides[i];
444  inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
445  inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
446  indices[0] -= idx0 * m_outputStrides[i];
447  indices[1] -= idx1 * m_outputStrides[i];
448  }
449  inputIndices[0] += (indices[0] + m_offsets[NumDims-1]);
450  inputIndices[1] += (indices[1] + m_offsets[NumDims-1]);
451  }
452  if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
453  PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
454  return rslt;
455  }
456  else {
457  EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
458  values[0] = m_impl.coeff(inputIndices[0]);
459  values[packetSize-1] = m_impl.coeff(inputIndices[1]);
460  for (int i = 1; i < packetSize-1; ++i) {
461  values[i] = coeff(index+i);
462  }
463  PacketReturnType rslt = internal::pload<PacketReturnType>(values);
464  return rslt;
465  }
466  }
467 
468  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
469  return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims);
470  }
471 
472 
473  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const {
474  Scalar* result = m_impl.data();
475  if (result) {
476  Index offset = 0;
477  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
478  for (int i = 0; i < NumDims; ++i) {
479  if (m_dimensions[i] != m_impl.dimensions()[i]) {
480  offset += m_offsets[i] * m_inputStrides[i];
481  for (int j = i+1; j < NumDims; ++j) {
482  if (m_dimensions[j] > 1) {
483  return NULL;
484  }
485  offset += m_offsets[j] * m_inputStrides[j];
486  }
487  break;
488  }
489  }
490  } else {
491  for (int i = NumDims - 1; i >= 0; --i) {
492  if (m_dimensions[i] != m_impl.dimensions()[i]) {
493  offset += m_offsets[i] * m_inputStrides[i];
494  for (int j = i-1; j >= 0; --j) {
495  if (m_dimensions[j] > 1) {
496  return NULL;
497  }
498  offset += m_offsets[j] * m_inputStrides[j];
499  }
500  break;
501  }
502  }
503  }
504  return result + offset;
505  }
506  return NULL;
507  }
509  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const{
510  return m_impl;
511  }
513  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& startIndices() const{
514  return m_offsets;
515  }
516  protected:
517  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
518  {
519  Index inputIndex = 0;
520  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
521  for (int i = NumDims - 1; i > 0; --i) {
522  const Index idx = index / m_fastOutputStrides[i];
523  inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
524  index -= idx * m_outputStrides[i];
525  }
526  inputIndex += (index + m_offsets[0]);
527  } else {
528  for (int i = 0; i < NumDims - 1; ++i) {
529  const Index idx = index / m_fastOutputStrides[i];
530  inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
531  index -= idx * m_outputStrides[i];
532  }
533  inputIndex += (index + m_offsets[NumDims-1]);
534  }
535  return inputIndex;
536  }
537 
538  array<Index, NumDims> m_outputStrides;
539  array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
540  array<Index, NumDims> m_inputStrides;
541  TensorEvaluator<ArgType, Device> m_impl;
542  const Device& m_device;
543  Dimensions m_dimensions;
544  const StartIndices m_offsets;
545 };
546 
547 
548 // Eval as lvalue
549 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
550 struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
551  : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
552 {
553  typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
554  typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
555  static const int NumDims = internal::array_size<Sizes>::value;
556 
557  enum {
558  IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
559  PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
560  Layout = TensorEvaluator<ArgType, Device>::Layout,
561  CoordAccess = false,
562  RawAccess = false
563  };
564 
565  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
566  : Base(op, device)
567  { }
568 
569  typedef typename XprType::Index Index;
570  typedef typename XprType::Scalar Scalar;
571  typedef typename XprType::CoeffReturnType CoeffReturnType;
572  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
573  typedef Sizes Dimensions;
574 
575  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
576  {
577  return this->m_impl.coeffRef(this->srcCoeff(index));
578  }
579 
580  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
581  void writePacket(Index index, const PacketReturnType& x)
582  {
583  const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
584  Index inputIndices[] = {0, 0};
585  Index indices[] = {index, index + packetSize - 1};
586  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
587  for (int i = NumDims - 1; i > 0; --i) {
588  const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
589  const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
590  inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
591  inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
592  indices[0] -= idx0 * this->m_outputStrides[i];
593  indices[1] -= idx1 * this->m_outputStrides[i];
594  }
595  inputIndices[0] += (indices[0] + this->m_offsets[0]);
596  inputIndices[1] += (indices[1] + this->m_offsets[0]);
597  } else {
598  for (int i = 0; i < NumDims - 1; ++i) {
599  const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
600  const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
601  inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
602  inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
603  indices[0] -= idx0 * this->m_outputStrides[i];
604  indices[1] -= idx1 * this->m_outputStrides[i];
605  }
606  inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]);
607  inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]);
608  }
609  if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
610  this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
611  }
612  else {
613  EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
614  internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
615  this->m_impl.coeffRef(inputIndices[0]) = values[0];
616  this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
617  for (int i = 1; i < packetSize-1; ++i) {
618  this->coeffRef(index+i) = values[i];
619  }
620  }
621  }
622 };
623 
624 
625 
626 namespace internal {
627 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
628 struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > : public traits<XprType>
629 {
630  typedef typename XprType::Scalar Scalar;
631  typedef traits<XprType> XprTraits;
632  typedef typename XprTraits::StorageKind StorageKind;
633  typedef typename XprTraits::Index Index;
634  typedef typename XprType::Nested Nested;
635  typedef typename remove_reference<Nested>::type _Nested;
636  static const int NumDimensions = array_size<StartIndices>::value;
637  static const int Layout = XprTraits::Layout;
638  typedef typename XprTraits::PointerType PointerType;
639 };
640 
641 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
642 struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense>
643 {
644  typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>& type;
645 };
646 
647 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
648 struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type>
649 {
650  typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type;
651 };
652 
653 } // end namespace internal
654 
655 
656 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
657 class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >
658 {
659  public:
660  typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar;
661  typedef typename XprType::CoeffReturnType CoeffReturnType;
662  typedef typename internal::nested<TensorStridingSlicingOp>::type Nested;
663  typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind;
664  typedef typename internal::traits<TensorStridingSlicingOp>::Index Index;
665 
666  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp(
667  const XprType& expr, const StartIndices& startIndices,
668  const StopIndices& stopIndices, const Strides& strides)
669  : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices),
670  m_strides(strides) {}
671 
672  EIGEN_DEVICE_FUNC
673  const StartIndices& startIndices() const { return m_startIndices; }
674  EIGEN_DEVICE_FUNC
675  const StartIndices& stopIndices() const { return m_stopIndices; }
676  EIGEN_DEVICE_FUNC
677  const StartIndices& strides() const { return m_strides; }
678 
679  EIGEN_DEVICE_FUNC
680  const typename internal::remove_all<typename XprType::Nested>::type&
681  expression() const { return m_xpr; }
682 
683  EIGEN_DEVICE_FUNC
684  EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const TensorStridingSlicingOp& other)
685  {
686  typedef TensorAssignOp<TensorStridingSlicingOp, const TensorStridingSlicingOp> Assign;
687  Assign assign(*this, other);
688  internal::TensorExecutor<const Assign, DefaultDevice>::run(
689  assign, DefaultDevice());
690  return *this;
691  }
692 
693  template<typename OtherDerived>
694  EIGEN_DEVICE_FUNC
695  EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const OtherDerived& other)
696  {
697  typedef TensorAssignOp<TensorStridingSlicingOp, const OtherDerived> Assign;
698  Assign assign(*this, other);
699  internal::TensorExecutor<const Assign, DefaultDevice>::run(
700  assign, DefaultDevice());
701  return *this;
702  }
703 
704  protected:
705  typename XprType::Nested m_xpr;
706  const StartIndices m_startIndices;
707  const StopIndices m_stopIndices;
708  const Strides m_strides;
709 };
710 
711 // Eval as rvalue
712 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
713 struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
714 {
715  typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
716  static const int NumDims = internal::array_size<Strides>::value;
717  typedef typename XprType::Index Index;
718  typedef typename XprType::Scalar Scalar;
719  typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
720  typedef typename XprType::CoeffReturnType CoeffReturnType;
721  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
722  typedef Strides Dimensions;
723 
724  enum {
725  // Alignment can't be guaranteed at compile time since it depends on the
726  // slice offsets and sizes.
727  IsAligned = false,
728  PacketAccess = false,
729  BlockAccess = false,
730  Layout = TensorEvaluator<ArgType, Device>::Layout,
731  RawAccess = false
732  };
733 
734  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
735  : m_impl(op.expression(), device), m_device(device), m_strides(op.strides()), m_exprStartIndices(op.startIndices()), m_exprStopIndices(op.stopIndices())
736  {
737  // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
738  DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped;
739  for (size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
740  eigen_assert(m_strides[i] != 0 && "0 stride is invalid");
741  if(m_strides[i]>0){
742  startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
743  stopIndicesClamped[i] = clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
744  }else{
745  /* implies m_strides[i]<0 by assert */
746  startIndicesClamped[i] = clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
747  stopIndicesClamped[i] = clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
748  }
749  m_startIndices[i] = startIndicesClamped[i];
750  }
751 
752  const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
753 
754  // check for degenerate intervals and compute output tensor shape
755  bool degenerate = false;;
756  for(int i = 0; i < NumDims; i++){
757  Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
758  if(interval == 0 || ((interval<0) != (m_strides[i]<0))){
759  m_dimensions[i] = 0;
760  degenerate = true;
761  }else{
762  m_dimensions[i] = interval / m_strides[i]
763  + (interval % m_strides[i] != 0 ? 1 : 0);
764  eigen_assert(m_dimensions[i] >= 0);
765  }
766  }
767  Strides output_dims = m_dimensions;
768 
769  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
770  m_inputStrides[0] = m_strides[0];
771  m_offsets[0] = startIndicesClamped[0];
772  Index previousDimProduct = 1;
773  for (int i = 1; i < NumDims; ++i) {
774  previousDimProduct *= input_dims[i-1];
775  m_inputStrides[i] = previousDimProduct * m_strides[i];
776  m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
777  }
778 
779  // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
780  m_outputStrides[0] = 1;
781  for (int i = 1; i < NumDims; ++i) {
782  m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
783  // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
784  m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
785  }
786  } else {
787  m_inputStrides[NumDims-1] = m_strides[NumDims-1];
788  m_offsets[NumDims-1] = startIndicesClamped[NumDims-1];
789  Index previousDimProduct = 1;
790  for (int i = NumDims - 2; i >= 0; --i) {
791  previousDimProduct *= input_dims[i+1];
792  m_inputStrides[i] = previousDimProduct * m_strides[i];
793  m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
794  }
795 
796  m_outputStrides[NumDims-1] = 1;
797  for (int i = NumDims - 2; i >= 0; --i) {
798  m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
799  // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
800  m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
801  }
802  }
803  m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1),
804  device.lastLevelCacheSize() /
805  sizeof(Scalar));
806  }
807 
808  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
809 
810 
811  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
812  m_impl.evalSubExprsIfNeeded(NULL);
813  return true;
814  }
815 
816  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
817  m_impl.cleanup();
818  }
819 
820  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
821  {
822  return m_impl.coeff(srcCoeff(index));
823  }
824 
825  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
826  return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims);
827  }
828 
829  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const {
830  return NULL;
831  }
832 
833  //use by sycl
834  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& exprStartIndices() const { return m_exprStartIndices; }
835  //use by sycl
836  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& exprStopIndices() const { return m_exprStopIndices; }
837  //use by sycl
838  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& strides() const { return m_strides; }
840  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const{return m_impl;}
841 
842  protected:
843  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
844  {
845  Index inputIndex = 0;
846  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
847  for (int i = NumDims - 1; i >= 0; --i) {
848  const Index idx = index / m_fastOutputStrides[i];
849  inputIndex += idx * m_inputStrides[i] + m_offsets[i];
850  index -= idx * m_outputStrides[i];
851  }
852  } else {
853  for (int i = 0; i < NumDims; ++i) {
854  const Index idx = index / m_fastOutputStrides[i];
855  inputIndex += idx * m_inputStrides[i] + m_offsets[i];
856  index -= idx * m_outputStrides[i];
857  }
858  }
859  return inputIndex;
860  }
861 
862  static EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
863 #ifndef __SYCL_DEVICE_ONLY__
864  return numext::maxi(min, numext::mini(max,value));
865 #else
866  return cl::sycl::clamp(value, min, max);
867 #endif
868  }
869 
870  array<Index, NumDims> m_outputStrides;
871  array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
872  array<Index, NumDims> m_inputStrides;
873  TensorEvaluator<ArgType, Device> m_impl;
874  const Device& m_device;
875  DSizes<Index, NumDims> m_startIndices; // clamped startIndices
876  DSizes<Index, NumDims> m_dimensions;
877  DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
878  const Strides m_strides;
879  std::size_t m_block_total_size_max;
880  //use by sycl
881  const StartIndices m_exprStartIndices;
882  //use by sycl
883  const StopIndices m_exprStopIndices;
884 };
885 
886 // Eval as lvalue
887 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
888 struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
889  : public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
890 {
891  typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base;
892  typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
893  static const int NumDims = internal::array_size<Strides>::value;
894 
895  enum {
896  IsAligned = false,
897  PacketAccess = false,
898  BlockAccess = false,
899  Layout = TensorEvaluator<ArgType, Device>::Layout,
900  CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
901  RawAccess = false
902  };
903 
904  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
905  : Base(op, device)
906  { }
907 
908  typedef typename XprType::Index Index;
909  typedef typename XprType::Scalar Scalar;
910  typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
911  typedef typename XprType::CoeffReturnType CoeffReturnType;
912  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
913  typedef Strides Dimensions;
914 
915  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
916  {
917  return this->m_impl.coeffRef(this->srcCoeff(index));
918  }
919 };
920 
921 
922 } // end namespace Eigen
923 
924 #endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
const Device & device() const
required by sycl in order to construct sycl buffer from raw pointer
Definition: TensorEvaluator.h:119