# HG changeset patch # User Benoit Steiner # Date 1387306183 28800 # Node ID 58831f55ae353766d70b656254cf0dbdab885926 # Parent 2e597973fede8cd01abefd5857d1dc2b6a01371e Use SSE instructions to pack the rhs matrix whenever possible. diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -1252,16 +1252,17 @@ EIGEN_DONT_INLINE void gemm_pack_rhs struct gemm_pack_rhs { + typedef typename packet_traits::type Packet; enum { PacketSize = packet_traits::size }; EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0); }; template EIGEN_DONT_INLINE void gemm_pack_rhs ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset) { @@ -1271,22 +1272,28 @@ EIGEN_DONT_INLINE void gemm_pack_rhs(&rhs[k*rhsStride + j2]); + pstoreu(blockB+count, cj.pconj(A)); + count += PacketSize; + } else { + const Scalar* b0 = &rhs[k*rhsStride + j2]; + blockB[count+0] = cj(b0[0]); + blockB[count+1] = cj(b0[1]); + if(nr==4) blockB[count+2] = cj(b0[2]); + if(nr==4) blockB[count+3] = cj(b0[3]); + count += nr; + } } // skip what we have after if(PanelMode) count += nr * (stride-offset-depth); } // copy the remaining columns one at a time (nr==1) for(Index j2=packet_cols; j2