# HG changeset patch # User Christoph Hertzberg # Date 1382358357 -7200 # Node ID efe2382651d64815be0164851b33f55faf35833c # Parent 0720fd0684a9463b62bf6682f65b7fed29f4f650 Bug 359: Allow vectorized products of (aligned result) = (Aligned matrix) * (unaligned expression); diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -43,17 +43,17 @@ private: PacketSize = packet_traits::size }; enum { StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)), MightVectorize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit), MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 - && int(DstIsAligned) && int(SrcIsAligned), + && int(DstIsAligned), MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess && (DstIsAligned || MaxSizeAtCompileTime == Dynamic), /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, so it's only good for large enough sizes. */ MaySliceVectorize = MightVectorize && DstHasDirectAccess && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize) /* slice vectorization can be slow, so we only want it if the slices are big, which is diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h --- a/Eigen/src/Core/products/CoeffBasedProduct.h +++ b/Eigen/src/Core/products/CoeffBasedProduct.h @@ -300,17 +300,16 @@ struct product_coeff_impl::size }; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) { Packet pres; product_coeff_vectorized_unroller::run(row, col, lhs, rhs, pres); - product_coeff_impl::run(row, col, lhs, rhs, res); res = predux(pres); } }; template struct product_coeff_vectorized_dyn_selector { typedef typename Lhs::Index Index; @@ -376,20 +375,21 @@ struct product_packet_impl(lhs.coeff(row, UnrollingIndex)), rhs.template packet(UnrollingIndex, col), res); } }; template struct product_packet_impl { typedef typename Lhs::Index Index; + enum {LhsLoadMode = Lhs::Flags & ActualPacketAccessBit ? Aligned : Unaligned}; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) { product_packet_impl::run(row, col, lhs, rhs, res); - res = pmadd(lhs.template packet(row, UnrollingIndex), pset1(rhs.coeff(UnrollingIndex, col)), res); + res = pmadd(lhs.template packet(row, UnrollingIndex), pset1(rhs.coeff(UnrollingIndex, col)), res); } }; template struct product_packet_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) @@ -397,19 +397,20 @@ struct product_packet_impl(lhs.coeff(row, 0)),rhs.template packet(0, col)); } }; template struct product_packet_impl { typedef typename Lhs::Index Index; + enum {LhsLoadMode = Lhs::Flags & ActualPacketAccessBit ? Aligned : Unaligned}; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) { - res = pmul(lhs.template packet(row, 0), pset1(rhs.coeff(0, col))); + res = pmul(lhs.template packet(row, 0), pset1(rhs.coeff(0, col))); } }; template struct product_packet_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)