10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
13 #include "./InternalHeaderCheck.h"
25 template<
typename Str
ides,
typename XprType>
26 struct traits<TensorStridingOp<Strides, XprType> > :
public traits<XprType>
28 typedef typename XprType::Scalar Scalar;
29 typedef traits<XprType> XprTraits;
30 typedef typename XprTraits::StorageKind StorageKind;
31 typedef typename XprTraits::Index
Index;
32 typedef typename XprType::Nested Nested;
33 typedef std::remove_reference_t<Nested> Nested_;
34 static constexpr
int NumDimensions = XprTraits::NumDimensions;
35 static constexpr
int Layout = XprTraits::Layout;
36 typedef typename XprTraits::PointerType PointerType;
39 template<
typename Str
ides,
typename XprType>
40 struct eval<TensorStridingOp<Strides, XprType>,
Eigen::Dense>
42 typedef const TensorStridingOp<Strides, XprType>EIGEN_DEVICE_REF type;
45 template<
typename Str
ides,
typename XprType>
46 struct nested<TensorStridingOp<Strides, XprType>, 1, typename eval<TensorStridingOp<Strides, XprType> >::type>
48 typedef TensorStridingOp<Strides, XprType> type;
55 template<
typename Str
ides,
typename XprType>
56 class TensorStridingOp :
public TensorBase<TensorStridingOp<Strides, XprType> >
59 typedef TensorBase<TensorStridingOp<Strides, XprType> > Base;
60 typedef typename Eigen::internal::traits<TensorStridingOp>::Scalar Scalar;
62 typedef typename XprType::CoeffReturnType CoeffReturnType;
63 typedef typename Eigen::internal::nested<TensorStridingOp>::type Nested;
64 typedef typename Eigen::internal::traits<TensorStridingOp>::StorageKind StorageKind;
65 typedef typename Eigen::internal::traits<TensorStridingOp>::Index
Index;
67 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingOp(
const XprType& expr,
const Strides& dims)
68 : m_xpr(expr), m_dims(dims) {}
71 const Strides& strides()
const {
return m_dims; }
74 const internal::remove_all_t<typename XprType::Nested>&
75 expression()
const {
return m_xpr; }
77 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingOp)
80 typename XprType::Nested m_xpr;
86 template<
typename Str
ides,
typename ArgType,
typename Device>
87 struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
89 typedef TensorStridingOp<Strides, ArgType> XprType;
90 typedef typename XprType::Index Index;
91 static constexpr
int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
92 typedef DSizes<Index, NumDims> Dimensions;
93 typedef typename XprType::Scalar Scalar;
94 typedef typename XprType::CoeffReturnType CoeffReturnType;
95 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
96 static constexpr
int PacketSize = PacketType<CoeffReturnType, Device>::size;
97 typedef StorageMemory<CoeffReturnType, Device> Storage;
98 typedef typename Storage::Type EvaluatorPointerType;
100 static constexpr
int Layout = TensorEvaluator<ArgType, Device>::Layout;
103 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
105 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
111 typedef internal::TensorBlockNotImplemented TensorBlock;
114 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
115 : m_impl(op.expression(), device)
117 m_dimensions = m_impl.dimensions();
118 for (
int i = 0; i < NumDims; ++i) {
119 m_dimensions[i] =Eigen::numext::ceil(
static_cast<float>(m_dimensions[i]) / op.strides()[i]);
122 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
123 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
124 m_outputStrides[0] = 1;
125 m_inputStrides[0] = 1;
126 for (
int i = 1; i < NumDims; ++i) {
127 m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
128 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
129 m_inputStrides[i-1] *= op.strides()[i-1];
131 m_inputStrides[NumDims-1] *= op.strides()[NumDims-1];
133 m_outputStrides[NumDims-1] = 1;
134 m_inputStrides[NumDims-1] = 1;
135 for (
int i = NumDims - 2; i >= 0; --i) {
136 m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
137 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
138 m_inputStrides[i+1] *= op.strides()[i+1];
140 m_inputStrides[0] *= op.strides()[0];
145 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
147 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
148 m_impl.evalSubExprsIfNeeded(NULL);
151 EIGEN_STRONG_INLINE
void cleanup() {
155 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const
157 return m_impl.coeff(srcCoeff(index));
160 template<
int LoadMode>
161 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
163 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
164 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
166 Index inputIndices[] = {0, 0};
167 Index indices[] = {index, index + PacketSize - 1};
168 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
170 for (
int i = NumDims - 1; i > 0; --i) {
171 const Index idx0 = indices[0] / m_outputStrides[i];
172 const Index idx1 = indices[1] / m_outputStrides[i];
173 inputIndices[0] += idx0 * m_inputStrides[i];
174 inputIndices[1] += idx1 * m_inputStrides[i];
175 indices[0] -= idx0 * m_outputStrides[i];
176 indices[1] -= idx1 * m_outputStrides[i];
178 inputIndices[0] += indices[0] * m_inputStrides[0];
179 inputIndices[1] += indices[1] * m_inputStrides[0];
182 for (
int i = 0; i < NumDims - 1; ++i) {
183 const Index idx0 = indices[0] / m_outputStrides[i];
184 const Index idx1 = indices[1] / m_outputStrides[i];
185 inputIndices[0] += idx0 * m_inputStrides[i];
186 inputIndices[1] += idx1 * m_inputStrides[i];
187 indices[0] -= idx0 * m_outputStrides[i];
188 indices[1] -= idx1 * m_outputStrides[i];
190 inputIndices[0] += indices[0] * m_inputStrides[NumDims-1];
191 inputIndices[1] += indices[1] * m_inputStrides[NumDims-1];
193 if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
194 PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
198 EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
199 values[0] = m_impl.coeff(inputIndices[0]);
200 values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
202 for (
int i = 1; i < PacketSize-1; ++i) {
203 values[i] = coeff(index+i);
205 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
210 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
211 double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost<Index>() +
212 TensorOpCost::MulCost<Index>() +
213 TensorOpCost::DivCost<Index>()) +
214 TensorOpCost::MulCost<Index>();
218 const int innerDim = (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) ? 0 : (NumDims - 1);
219 return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) +
221 TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
224 EIGEN_DEVICE_FUNC
typename Storage::Type data()
const {
return NULL; }
226 #ifdef EIGEN_USE_SYCL
228 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
233 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Index srcCoeff(Index index)
const
235 Index inputIndex = 0;
236 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
238 for (
int i = NumDims - 1; i > 0; --i) {
239 const Index idx = index / m_outputStrides[i];
240 inputIndex += idx * m_inputStrides[i];
241 index -= idx * m_outputStrides[i];
243 inputIndex += index * m_inputStrides[0];
246 for (
int i = 0; i < NumDims - 1; ++i) {
247 const Index idx = index / m_outputStrides[i];
248 inputIndex += idx * m_inputStrides[i];
249 index -= idx * m_outputStrides[i];
251 inputIndex += index * m_inputStrides[NumDims-1];
256 Dimensions m_dimensions;
257 array<Index, NumDims> m_outputStrides;
258 array<Index, NumDims> m_inputStrides;
259 TensorEvaluator<ArgType, Device> m_impl;
263 template<
typename Str
ides,
typename ArgType,
typename Device>
264 struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
265 :
public TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
267 typedef TensorStridingOp<Strides, ArgType> XprType;
268 typedef TensorEvaluator<const XprType, Device> Base;
270 static constexpr
int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
273 static constexpr
int Layout = TensorEvaluator<ArgType, Device>::Layout;
276 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
277 PreferBlockAccess =
false,
282 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
283 : Base(op, device) { }
285 typedef typename XprType::Index
Index;
286 typedef typename XprType::Scalar Scalar;
287 typedef typename XprType::CoeffReturnType CoeffReturnType;
288 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
289 static constexpr
int PacketSize = PacketType<CoeffReturnType, Device>::size;
291 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
293 return this->m_impl.coeffRef(this->srcCoeff(index));
296 template <
int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
297 void writePacket(Index index,
const PacketReturnType& x)
299 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
300 eigen_assert(index+PacketSize-1 < this->dimensions().TotalSize());
302 Index inputIndices[] = {0, 0};
303 Index indices[] = {index, index + PacketSize - 1};
304 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
306 for (
int i = NumDims - 1; i > 0; --i) {
307 const Index idx0 = indices[0] / this->m_outputStrides[i];
308 const Index idx1 = indices[1] / this->m_outputStrides[i];
309 inputIndices[0] += idx0 * this->m_inputStrides[i];
310 inputIndices[1] += idx1 * this->m_inputStrides[i];
311 indices[0] -= idx0 * this->m_outputStrides[i];
312 indices[1] -= idx1 * this->m_outputStrides[i];
314 inputIndices[0] += indices[0] * this->m_inputStrides[0];
315 inputIndices[1] += indices[1] * this->m_inputStrides[0];
318 for (
int i = 0; i < NumDims - 1; ++i) {
319 const Index idx0 = indices[0] / this->m_outputStrides[i];
320 const Index idx1 = indices[1] / this->m_outputStrides[i];
321 inputIndices[0] += idx0 * this->m_inputStrides[i];
322 inputIndices[1] += idx1 * this->m_inputStrides[i];
323 indices[0] -= idx0 * this->m_outputStrides[i];
324 indices[1] -= idx1 * this->m_outputStrides[i];
326 inputIndices[0] += indices[0] * this->m_inputStrides[NumDims-1];
327 inputIndices[1] += indices[1] * this->m_inputStrides[NumDims-1];
329 if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
330 this->m_impl.template writePacket<Unaligned>(inputIndices[0], x);
333 EIGEN_ALIGN_MAX Scalar values[PacketSize];
334 internal::pstore<Scalar, PacketReturnType>(values, x);
335 this->m_impl.coeffRef(inputIndices[0]) = values[0];
336 this->m_impl.coeffRef(inputIndices[1]) = values[PacketSize-1];
338 for (
int i = 1; i < PacketSize-1; ++i) {
339 this->coeffRef(index+i) = values[i];
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index