This bugzilla service is closed. All entries have been migrated to https://gitlab.com/libeigen/eigen
View | Details | Raw Unified | Return to bug 359 | Differences between
and this patch

Collapse All | Expand All

(-)a/Eigen/src/Core/Assign.h (-1 / +1 lines)
Lines 43-59 private: Link Here
43
    PacketSize = packet_traits<typename Derived::Scalar>::size
43
    PacketSize = packet_traits<typename Derived::Scalar>::size
44
  };
44
  };
45
45
46
  enum {
46
  enum {
47
    StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
47
    StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
48
    MightVectorize = StorageOrdersAgree
48
    MightVectorize = StorageOrdersAgree
49
                  && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
49
                  && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
50
    MayInnerVectorize  = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
50
    MayInnerVectorize  = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
51
                       && int(DstIsAligned) && int(SrcIsAligned),
51
                       && int(DstIsAligned),
52
    MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
52
    MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
53
    MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
53
    MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
54
                       && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
54
                       && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
55
      /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
55
      /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
56
         so it's only good for large enough sizes. */
56
         so it's only good for large enough sizes. */
57
    MaySliceVectorize  = MightVectorize && DstHasDirectAccess
57
    MaySliceVectorize  = MightVectorize && DstHasDirectAccess
58
                       && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
58
                       && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
59
      /* slice vectorization can be slow, so we only want it if the slices are big, which is
59
      /* slice vectorization can be slow, so we only want it if the slices are big, which is
(-)a/Eigen/src/Core/products/CoeffBasedProduct.h (-3 / +4 lines)
Lines 300-316 struct product_coeff_impl<InnerVectorize Link Here
300
{
300
{
301
  typedef typename Lhs::PacketScalar Packet;
301
  typedef typename Lhs::PacketScalar Packet;
302
  typedef typename Lhs::Index Index;
302
  typedef typename Lhs::Index Index;
303
  enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
303
  enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
304
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
304
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
305
  {
305
  {
306
    Packet pres;
306
    Packet pres;
307
    product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
307
    product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
308
    product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
309
    res = predux(pres);
308
    res = predux(pres);
310
  }
309
  }
311
};
310
};
312
311
313
template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
312
template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
314
struct product_coeff_vectorized_dyn_selector
313
struct product_coeff_vectorized_dyn_selector
315
{
314
{
316
  typedef typename Lhs::Index Index;
315
  typedef typename Lhs::Index Index;
Lines 376-395 struct product_packet_impl<RowMajor, Unr Link Here
376
    res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
375
    res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
377
  }
376
  }
378
};
377
};
379
378
380
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
379
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
381
struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
380
struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
382
{
381
{
383
  typedef typename Lhs::Index Index;
382
  typedef typename Lhs::Index Index;
383
  enum {LhsLoadMode = Lhs::Flags  & ActualPacketAccessBit ? Aligned : Unaligned};
384
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
384
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
385
  {
385
  {
386
    product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
386
    product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
387
    res =  pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
387
    res =  pmadd(lhs.template packet<LhsLoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
388
  }
388
  }
389
};
389
};
390
390
391
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
391
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
392
struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
392
struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
393
{
393
{
394
  typedef typename Lhs::Index Index;
394
  typedef typename Lhs::Index Index;
395
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
395
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
Lines 397-415 struct product_packet_impl<RowMajor, 0, Link Here
397
    res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
397
    res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
398
  }
398
  }
399
};
399
};
400
400
401
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
401
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
402
struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
402
struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
403
{
403
{
404
  typedef typename Lhs::Index Index;
404
  typedef typename Lhs::Index Index;
405
  enum {LhsLoadMode = Lhs::Flags  & ActualPacketAccessBit ? Aligned : Unaligned};
405
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
406
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
406
  {
407
  {
407
    res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
408
    res = pmul(lhs.template packet<LhsLoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
408
  }
409
  }
409
};
410
};
410
411
411
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
412
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
412
struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
413
struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
413
{
414
{
414
  typedef typename Lhs::Index Index;
415
  typedef typename Lhs::Index Index;
415
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
416
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)

Return to bug 359