10#ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
13#include "./InternalHeaderCheck.h"
25template<
typename NewDimensions,
typename XprType>
26struct traits<TensorReshapingOp<NewDimensions, XprType> > :
public traits<XprType>
28 typedef typename XprType::Scalar Scalar;
29 typedef traits<XprType> XprTraits;
30 typedef typename XprTraits::StorageKind StorageKind;
31 typedef typename XprTraits::Index
Index;
32 typedef typename XprType::Nested Nested;
33 typedef typename remove_reference<Nested>::type _Nested;
34 static const int NumDimensions = array_size<NewDimensions>::value;
35 static const int Layout = XprTraits::Layout;
36 typedef typename XprTraits::PointerType PointerType;
39template<
typename NewDimensions,
typename XprType>
40struct eval<TensorReshapingOp<NewDimensions, XprType>,
Eigen::Dense>
42 typedef const TensorReshapingOp<NewDimensions, XprType>EIGEN_DEVICE_REF type;
45template<
typename NewDimensions,
typename XprType>
46struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type>
48 typedef TensorReshapingOp<NewDimensions, XprType> type;
55template<
typename NewDimensions,
typename XprType>
56class TensorReshapingOp :
public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors>
59 typedef TensorBase<TensorReshapingOp<NewDimensions, XprType>,
WriteAccessors> Base;
60 typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
61 typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
62 typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
63 typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
64 typedef typename Eigen::internal::traits<TensorReshapingOp>::Index
Index;
66 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(
const XprType& expr,
const NewDimensions& dims)
67 : m_xpr(expr), m_dims(dims) {}
70 const NewDimensions& dimensions()
const {
return m_dims; }
73 const typename internal::remove_all<typename XprType::Nested>::type&
74 expression()
const {
return m_xpr; }
76 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReshapingOp)
79 typename XprType::Nested m_xpr;
80 const NewDimensions m_dims;
85template<
typename NewDimensions,
typename ArgType,
typename Device>
86struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
88 typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
89 typedef NewDimensions Dimensions;
91 typedef typename XprType::Index Index;
92 typedef typename XprType::Scalar Scalar;
93 typedef typename XprType::CoeffReturnType CoeffReturnType;
94 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
95 typedef StorageMemory<CoeffReturnType, Device> Storage;
96 typedef typename Storage::Type EvaluatorPointerType;
97 typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
99 static const int NumOutputDims = internal::array_size<Dimensions>::value;
100 static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
111 static const ReshapingKind kind =
112#if defined(EIGEN_HAS_INDEX_LIST)
113 (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(0, 1)) ? OneByN
114 : (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(1, 1)) ? NByOne
122 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
123 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
127 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess &&
128 NumInputDims > 0 && NumOutputDims > 0,
129 PreferBlockAccess =
false,
130 Layout = TensorEvaluator<ArgType, Device>::Layout,
132 RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
135 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
138 typedef internal::TensorBlockDescriptor<NumOutputDims, Index> TensorBlockDesc;
139 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
142 typename internal::TensorMaterializedBlock<ScalarNoConst, NumOutputDims,
147 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
148 : m_impl(op.expression(), device), m_dimensions(op.dimensions())
152 eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
155 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
157#ifdef EIGEN_USE_THREADS
158 template <
typename EvalSubExprsCallback>
159 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
160 EvaluatorPointerType data, EvalSubExprsCallback done) {
161 m_impl.evalSubExprsIfNeededAsync(data, std::move(done));
165 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
166 return m_impl.evalSubExprsIfNeeded(data);
168 EIGEN_STRONG_INLINE
void cleanup() {
172 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const
174 return m_impl.coeff(index);
177 template<
int LoadMode>
178 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
180 return m_impl.template packet<LoadMode>(index);
183 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
184 return m_impl.costPerCoeff(vectorized);
187 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
188 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
189 return internal::TensorBlockResourceRequirements::any();
194 struct BlockIteratorState {
201 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
202 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
203 bool =
false)
const {
204 eigen_assert(m_impl.data() != NULL);
205 eigen_assert((kind == Runtime) ||
206 (kind == OneByN && desc.dimensions()[0] == 1) ||
207 (kind == NByOne && desc.dimensions()[1] == 1));
209 if (kind == OneByN || kind == NByOne) {
212 return TensorBlock(internal::TensorBlockKind::kView,
213 m_impl.data() + desc.offset(), desc.dimensions());
217 return TensorBlock::materialize(m_impl.data(), m_dimensions, desc,
222 EIGEN_DEVICE_FUNC
typename Storage::Type data()
const {
223 return constCast(m_impl.data());
226 EIGEN_DEVICE_FUNC
const TensorEvaluator<ArgType, Device>& impl()
const {
return m_impl; }
228 #ifdef EIGEN_USE_SYCL
230 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
235 TensorEvaluator<ArgType, Device> m_impl;
236 NewDimensions m_dimensions;
241template<
typename NewDimensions,
typename ArgType,
typename Device>
242 struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device>
243 :
public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
246 typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base;
247 typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
248 typedef NewDimensions Dimensions;
251 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
252 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
253 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
254 PreferBlockAccess =
false,
255 Layout = TensorEvaluator<ArgType, Device>::Layout,
257 RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
260 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
264 typedef typename XprType::Index
Index;
265 typedef typename XprType::Scalar Scalar;
266 typedef typename XprType::CoeffReturnType CoeffReturnType;
267 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
270 typedef internal::TensorBlockDescriptor<TensorEvaluator::NumOutputDims, Index>
274 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
276 return this->m_impl.coeffRef(index);
279 template <
int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
280 void writePacket(Index index,
const PacketReturnType& x)
282 this->m_impl.template writePacket<StoreMode>(index, x);
285 template <
typename TensorBlock>
286 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writeBlock(
287 const TensorBlockDesc& desc,
const TensorBlock& block) {
288 assert(this->m_impl.data() != NULL);
290 typedef typename TensorBlock::XprType TensorBlockExpr;
291 typedef internal::TensorBlockAssignment<
292 Scalar, TensorEvaluator::NumOutputDims, TensorBlockExpr,
Index>
295 TensorBlockAssign::Run(
296 TensorBlockAssign::target(desc.dimensions(),
297 internal::strides<Layout>(this->dimensions()),
298 this->m_impl.data(), desc.offset()),
312template<
typename StartIndices,
typename Sizes,
typename XprType>
313struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > :
public traits<XprType>
315 typedef typename XprType::Scalar Scalar;
316 typedef traits<XprType> XprTraits;
317 typedef typename XprTraits::StorageKind StorageKind;
318 typedef typename XprTraits::Index
Index;
319 typedef typename XprType::Nested Nested;
320 typedef typename remove_reference<Nested>::type _Nested;
321 static const int NumDimensions = array_size<StartIndices>::value;
322 static const int Layout = XprTraits::Layout;
323 typedef typename XprTraits::PointerType PointerType;
326template<
typename StartIndices,
typename Sizes,
typename XprType>
327struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>,
Eigen::Dense>
329 typedef const TensorSlicingOp<StartIndices, Sizes, XprType>EIGEN_DEVICE_REF type;
332template<
typename StartIndices,
typename Sizes,
typename XprType>
333struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type>
335 typedef TensorSlicingOp<StartIndices, Sizes, XprType> type;
342template<
typename StartIndices,
typename Sizes,
typename XprType>
343class TensorSlicingOp :
public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> >
346 typedef TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> > Base;
347 typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar;
348 typedef typename XprType::CoeffReturnType CoeffReturnType;
349 typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested;
350 typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind;
351 typedef typename Eigen::internal::traits<TensorSlicingOp>::Index
Index;
353 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(
const XprType& expr,
const StartIndices& indices,
const Sizes& sizes)
354 : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
357 const StartIndices& startIndices()
const {
return m_indices; }
359 const Sizes& sizes()
const {
return m_sizes; }
362 const typename internal::remove_all<typename XprType::Nested>::type&
363 expression()
const {
return m_xpr; }
365 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorSlicingOp)
368 typename XprType::Nested m_xpr;
369 const StartIndices m_indices;
376template <
typename Index,
typename Device,
bool BlockAccess>
struct MemcpyTriggerForSlicing {
377 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(
const Device& device) : threshold_(2 * device.numThreads()) { }
378 EIGEN_DEVICE_FUNC
bool operator ()(
Index total,
Index contiguous)
const {
379 const bool prefer_block_evaluation = BlockAccess && total > 32*1024;
380 return !prefer_block_evaluation && contiguous > threshold_;
390template <
typename Index,
bool BlockAccess>
struct MemcpyTriggerForSlicing<
Index, GpuDevice, BlockAccess> {
391 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(
const GpuDevice&) { }
392 EIGEN_DEVICE_FUNC
bool operator ()(
Index,
Index contiguous)
const {
return contiguous > 4*1024*1024; }
399template <
typename Index,
bool BlockAccess>
struct MemcpyTriggerForSlicing<
Index,
Eigen::SyclDevice, BlockAccess> {
400 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(
const SyclDevice&) { }
401 EIGEN_DEVICE_FUNC
bool operator ()(
Index,
Index contiguous)
const {
return contiguous > 4*1024*1024; }
408template<
typename StartIndices,
typename Sizes,
typename ArgType,
typename Device>
409struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
411 typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
412 static const int NumDims = internal::array_size<Sizes>::value;
414 typedef typename XprType::Index
Index;
415 typedef typename XprType::Scalar Scalar;
416 typedef typename XprType::CoeffReturnType CoeffReturnType;
417 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
418 typedef Sizes Dimensions;
419 typedef StorageMemory<CoeffReturnType, Device> Storage;
420 typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
421 typedef typename Storage::Type EvaluatorPointerType;
427 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
428 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess &&
430 !internal::is_same<typename internal::remove_const<Scalar>::type,
bool>::value,
431 PreferBlockAccess =
true,
432 Layout = TensorEvaluator<ArgType, Device>::Layout,
437 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
440 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
441 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
444 typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
448 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
449 : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
451 m_is_identity =
true;
452 for (
int i = 0; i < internal::array_size<Dimensions>::value; ++i) {
453 eigen_assert(m_impl.dimensions()[i] >=
454 op.sizes()[i] + op.startIndices()[i]);
455 if (m_impl.dimensions()[i] != op.sizes()[i] ||
456 op.startIndices()[i] != 0) {
457 m_is_identity =
false;
462 if (NumDims == 0)
return;
464 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
465 const Sizes& output_dims = op.sizes();
466 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
467 m_inputStrides[0] = 1;
468 for (
int i = 1; i < NumDims; ++i) {
469 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
473 m_outputStrides[0] = 1;
474 for (
int i = 1; i < NumDims; ++i) {
475 m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
476 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
479 m_inputStrides[NumDims-1] = 1;
480 for (
int i = NumDims - 2; i >= 0; --i) {
481 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
485 m_outputStrides[NumDims-1] = 1;
486 for (
int i = NumDims - 2; i >= 0; --i) {
487 m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
488 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
493 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
495 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
496 m_impl.evalSubExprsIfNeeded(NULL);
497 if (!NumTraits<
typename internal::remove_const<Scalar>::type>::RequireInitialization
498 && data && m_impl.data()) {
499 Index contiguous_values = 1;
500 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
501 for (
int i = 0; i < NumDims; ++i) {
502 contiguous_values *= dimensions()[i];
503 if (dimensions()[i] != m_impl.dimensions()[i]) {
508 for (
int i = NumDims-1; i >= 0; --i) {
509 contiguous_values *= dimensions()[i];
510 if (dimensions()[i] != m_impl.dimensions()[i]) {
516 const MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
517 if (trigger(internal::array_prod(dimensions()), contiguous_values)) {
518 EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
519 for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
520 Index offset = srcCoeff(i);
521 m_device.memcpy((
void*)(m_device.get(data + i)), m_device.get(src+offset), contiguous_values *
sizeof(Scalar));
529#ifdef EIGEN_USE_THREADS
530 template <
typename EvalSubExprsCallback>
531 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
532 EvaluatorPointerType , EvalSubExprsCallback done) {
533 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
537 EIGEN_STRONG_INLINE
void cleanup() {
541 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const
544 return m_impl.coeff(index);
546 return m_impl.coeff(srcCoeff(index));
550 template<
int LoadMode>
551 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
553 const int packetSize = PacketType<CoeffReturnType, Device>::size;
554 EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
555 eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
558 return m_impl.template packet<LoadMode>(index);
561 Index inputIndices[] = {0, 0};
562 Index indices[] = {index, index + packetSize - 1};
563 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
565 for (
int i = NumDims - 1; i > 0; --i) {
566 const Index idx0 = indices[0] / m_fastOutputStrides[i];
567 const Index idx1 = indices[1] / m_fastOutputStrides[i];
568 inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
569 inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
570 indices[0] -= idx0 * m_outputStrides[i];
571 indices[1] -= idx1 * m_outputStrides[i];
573 inputIndices[0] += (indices[0] + m_offsets[0]);
574 inputIndices[1] += (indices[1] + m_offsets[0]);
577 for (
int i = 0; i < NumDims - 1; ++i) {
578 const Index idx0 = indices[0] / m_fastOutputStrides[i];
579 const Index idx1 = indices[1] / m_fastOutputStrides[i];
580 inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
581 inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
582 indices[0] -= idx0 * m_outputStrides[i];
583 indices[1] -= idx1 * m_outputStrides[i];
585 inputIndices[0] += (indices[0] + m_offsets[NumDims-1]);
586 inputIndices[1] += (indices[1] + m_offsets[NumDims-1]);
588 if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
589 PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
593 EIGEN_ALIGN_MAX
typename internal::remove_const<CoeffReturnType>::type values[packetSize];
594 values[0] = m_impl.coeff(inputIndices[0]);
595 values[packetSize-1] = m_impl.coeff(inputIndices[1]);
597 for (
int i = 1; i < packetSize-1; ++i) {
598 values[i] = coeff(index+i);
600 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
605 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
606 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
609 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
610 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
611 const size_t target_size = m_device.lastLevelCacheSize();
612 return internal::TensorBlockResourceRequirements::merge(
613 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
614 m_impl.getResourceRequirements());
617 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
618 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
619 bool =
false)
const {
620 TensorBlockDesc arg_desc = desc.WithOffset(srcCoeff(desc.offset()));
621 TensorBlock block = m_impl.block(arg_desc, scratch);
622 if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer();
626 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename Storage::Type data()
const {
627 typename Storage::Type result = constCast(m_impl.data());
630 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
631 for (
int i = 0; i < NumDims; ++i) {
632 if (m_dimensions[i] != m_impl.dimensions()[i]) {
633 offset += m_offsets[i] * m_inputStrides[i];
634 for (
int j = i+1; j < NumDims; ++j) {
635 if (m_dimensions[j] > 1) {
638 offset += m_offsets[j] * m_inputStrides[j];
644 for (
int i = NumDims - 1; i >= 0; --i) {
645 if (m_dimensions[i] != m_impl.dimensions()[i]) {
646 offset += m_offsets[i] * m_inputStrides[i];
647 for (
int j = i-1; j >= 0; --j) {
648 if (m_dimensions[j] > 1) {
651 offset += m_offsets[j] * m_inputStrides[j];
657 return result + offset;
663 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
669 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Index srcCoeff(Index index)
const
671 Index inputIndex = 0;
672 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
674 for (
int i = NumDims - 1; i > 0; --i) {
675 const Index idx = index / m_fastOutputStrides[i];
676 inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
677 index -= idx * m_outputStrides[i];
679 inputIndex += (index + m_offsets[0]);
682 for (
int i = 0; i < NumDims - 1; ++i) {
683 const Index idx = index / m_fastOutputStrides[i];
684 inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
685 index -= idx * m_outputStrides[i];
687 inputIndex += (index + m_offsets[NumDims-1]);
692 array<Index, NumDims> m_outputStrides;
693 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
694 array<Index, NumDims> m_inputStrides;
695 TensorEvaluator<ArgType, Device> m_impl;
696 const Device EIGEN_DEVICE_REF m_device;
697 Dimensions m_dimensions;
699 const StartIndices m_offsets;
704template<
typename StartIndices,
typename Sizes,
typename ArgType,
typename Device>
705struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
706 :
public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
708 typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
709 typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
710 static const int NumDims = internal::array_size<Sizes>::value;
712 typedef typename XprType::Index
Index;
713 typedef typename XprType::Scalar Scalar;
714 typedef typename XprType::CoeffReturnType CoeffReturnType;
715 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
716 typedef Sizes Dimensions;
720 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
721 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
722 PreferBlockAccess =
true,
723 Layout = TensorEvaluator<ArgType, Device>::Layout,
725 RawAccess = (NumDims == 1) & TensorEvaluator<ArgType, Device>::RawAccess
728 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
731 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
732 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
735 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
739 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
741 if (this->m_is_identity) {
742 return this->m_impl.coeffRef(index);
744 return this->m_impl.coeffRef(this->srcCoeff(index));
748 template <
int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
749 void writePacket(Index index,
const PacketReturnType& x)
751 if (this->m_is_identity) {
752 this->m_impl.template writePacket<StoreMode>(index, x);
756 const int packetSize = PacketType<CoeffReturnType, Device>::size;
757 Index inputIndices[] = {0, 0};
758 Index indices[] = {index, index + packetSize - 1};
759 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
761 for (
int i = NumDims - 1; i > 0; --i) {
762 const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
763 const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
764 inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
765 inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
766 indices[0] -= idx0 * this->m_outputStrides[i];
767 indices[1] -= idx1 * this->m_outputStrides[i];
769 inputIndices[0] += (indices[0] + this->m_offsets[0]);
770 inputIndices[1] += (indices[1] + this->m_offsets[0]);
773 for (
int i = 0; i < NumDims - 1; ++i) {
774 const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
775 const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
776 inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
777 inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
778 indices[0] -= idx0 * this->m_outputStrides[i];
779 indices[1] -= idx1 * this->m_outputStrides[i];
781 inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]);
782 inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]);
784 if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
785 this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
788 EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
789 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
790 this->m_impl.coeffRef(inputIndices[0]) = values[0];
791 this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
793 for (
int i = 1; i < packetSize-1; ++i) {
794 this->coeffRef(index+i) = values[i];
799 template<
typename TensorBlock>
800 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writeBlock(
801 const TensorBlockDesc& desc,
const TensorBlock& block) {
802 TensorBlockDesc arg_desc = desc.WithOffset(this->srcCoeff(desc.offset()));
803 this->m_impl.writeBlock(arg_desc, block);
808template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
809struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > :
public traits<XprType>
811 typedef typename XprType::Scalar Scalar;
812 typedef traits<XprType> XprTraits;
813 typedef typename XprTraits::StorageKind StorageKind;
814 typedef typename XprTraits::Index
Index;
815 typedef typename XprType::Nested Nested;
816 typedef typename remove_reference<Nested>::type _Nested;
817 static const int NumDimensions = array_size<StartIndices>::value;
818 static const int Layout = XprTraits::Layout;
819 typedef typename XprTraits::PointerType PointerType;
822template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
823struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>,
Eigen::Dense>
825 typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>EIGEN_DEVICE_REF type;
828template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
829struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type>
831 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type;
837template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
838class TensorStridingSlicingOp :
public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >
841 typedef TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > Base;
842 typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar;
843 typedef typename XprType::CoeffReturnType CoeffReturnType;
844 typedef typename internal::nested<TensorStridingSlicingOp>::type Nested;
845 typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind;
846 typedef typename internal::traits<TensorStridingSlicingOp>::Index
Index;
848 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp(
849 const XprType& expr,
const StartIndices& startIndices,
850 const StopIndices& stopIndices,
const Strides& strides)
851 : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices),
852 m_strides(strides) {}
855 const StartIndices& startIndices()
const {
return m_startIndices; }
857 const StartIndices& stopIndices()
const {
return m_stopIndices; }
859 const StartIndices& strides()
const {
return m_strides; }
862 const typename internal::remove_all<typename XprType::Nested>::type&
863 expression()
const {
return m_xpr; }
865 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingSlicingOp)
868 typename XprType::Nested m_xpr;
869 const StartIndices m_startIndices;
870 const StopIndices m_stopIndices;
871 const Strides m_strides;
875template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename ArgType,
typename Device>
876struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
878 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
879 static const int NumDims = internal::array_size<Strides>::value;
880 typedef typename XprType::Index
Index;
881 typedef typename XprType::Scalar Scalar;
882 typedef typename XprType::CoeffReturnType CoeffReturnType;
883 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
884 typedef StorageMemory<CoeffReturnType, Device> Storage;
885 typedef typename Storage::Type EvaluatorPointerType;
886 typedef Strides Dimensions;
892 PacketAccess =
false,
894 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
895 Layout = TensorEvaluator<ArgType, Device>::Layout,
900 typedef internal::TensorBlockNotImplemented TensorBlock;
903 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
904 : m_impl(op.expression(), device),
906 m_strides(op.strides())
909 DSizes<Index, NumDims> startIndicesClamped, stopIndicesClamped;
910 for (ptrdiff_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
911 eigen_assert(m_strides[i] != 0 &&
"0 stride is invalid");
912 if (m_strides[i] > 0) {
913 startIndicesClamped[i] =
914 clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
915 stopIndicesClamped[i] =
916 clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
919 startIndicesClamped[i] =
920 clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
921 stopIndicesClamped[i] =
922 clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
924 m_startIndices[i] = startIndicesClamped[i];
927 typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
928 const InputDimensions& input_dims = m_impl.dimensions();
931 m_is_identity =
true;
932 for (
int i = 0; i < NumDims; i++) {
933 Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
934 if (interval == 0 || ((interval < 0) != (m_strides[i] < 0))) {
938 (interval / m_strides[i]) + (interval % m_strides[i] != 0 ? 1 : 0);
939 eigen_assert(m_dimensions[i] >= 0);
941 if (m_strides[i] != 1 || interval != m_impl.dimensions()[i]) {
942 m_is_identity =
false;
946 Strides output_dims = m_dimensions;
948 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
949 m_inputStrides[0] = m_strides[0];
950 m_offsets[0] = startIndicesClamped[0];
951 Index previousDimProduct = 1;
952 for (
int i = 1; i < NumDims; ++i) {
953 previousDimProduct *= input_dims[i-1];
954 m_inputStrides[i] = previousDimProduct * m_strides[i];
955 m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
959 m_outputStrides[0] = 1;
960 for (
int i = 1; i < NumDims; ++i) {
961 m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
962 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
965 m_inputStrides[NumDims-1] = m_strides[NumDims-1];
966 m_offsets[NumDims-1] = startIndicesClamped[NumDims-1];
967 Index previousDimProduct = 1;
968 for (
int i = NumDims - 2; i >= 0; --i) {
969 previousDimProduct *= input_dims[i+1];
970 m_inputStrides[i] = previousDimProduct * m_strides[i];
971 m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
974 m_outputStrides[NumDims-1] = 1;
975 for (
int i = NumDims - 2; i >= 0; --i) {
976 m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
977 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
982 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
985 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
986 m_impl.evalSubExprsIfNeeded(NULL);
990 EIGEN_STRONG_INLINE
void cleanup() {
994 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const
997 return m_impl.coeff(index);
999 return m_impl.coeff(srcCoeff(index));
1003 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
1004 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
1007 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename Storage::Type data()
const {
1010#ifdef EIGEN_USE_SYCL
1012 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
1017 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Index srcCoeff(Index index)
const
1019 Index inputIndex = 0;
1020 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
1022 for (
int i = NumDims - 1; i >= 0; --i) {
1023 const Index idx = index / m_fastOutputStrides[i];
1024 inputIndex += idx * m_inputStrides[i] + m_offsets[i];
1025 index -= idx * m_outputStrides[i];
1029 for (
int i = 0; i < NumDims; ++i) {
1030 const Index idx = index / m_fastOutputStrides[i];
1031 inputIndex += idx * m_inputStrides[i] + m_offsets[i];
1032 index -= idx * m_outputStrides[i];
1038 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Index clamp(Index value, Index min, Index max) {
1039#ifndef SYCL_DEVICE_ONLY
1040 return numext::maxi(min, numext::mini(max,value));
1042 return cl::sycl::clamp(value, min, max);
1046 array<Index, NumDims> m_outputStrides;
1047 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
1048 array<Index, NumDims> m_inputStrides;
1050 TensorEvaluator<ArgType, Device> m_impl;
1051 const Device EIGEN_DEVICE_REF m_device;
1052 DSizes<Index, NumDims> m_startIndices;
1053 DSizes<Index, NumDims> m_dimensions;
1054 DSizes<Index, NumDims> m_offsets;
1055 const Strides m_strides;
1059template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename ArgType,
typename Device>
1060struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
1061 :
public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
1063 typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base;
1064 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
1065 static const int NumDims = internal::array_size<Strides>::value;
1069 PacketAccess =
false,
1070 BlockAccess =
false,
1071 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
1072 Layout = TensorEvaluator<ArgType, Device>::Layout,
1073 CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
1078 typedef internal::TensorBlockNotImplemented TensorBlock;
1081 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
1085 typedef typename XprType::Index
Index;
1086 typedef typename XprType::Scalar Scalar;
1087 typedef typename XprType::CoeffReturnType CoeffReturnType;
1088 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
1089 typedef Strides Dimensions;
1091 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
1093 if (this->m_is_identity) {
1094 return this->m_impl.coeffRef(index);
1096 return this->m_impl.coeffRef(this->srcCoeff(index));
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index