Bugzilla – Attachment 408 Details for
Bug 717
Use vectorization to pack matrix operands
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
Log In
[x]
|
Forgot Password
Login:
[x]
This bugzilla service is closed. All entries have been migrated to
https://gitlab.com/libeigen/eigen
[patch]
patch against eigen 3.2
rhs_packing.patch (text/plain), 2.57 KB, created by
Benoit Steiner
on 2013-12-17 19:57:28 UTC
(
hide
)
Description:
patch against eigen 3.2
Filename:
MIME Type:
Creator:
Benoit Steiner
Created:
2013-12-17 19:57:28 UTC
Size:
2.57 KB
patch
obsolete
># HG changeset patch ># User Benoit Steiner <benoit.steiner.goog@gmail.com> ># Date 1387306183 28800 ># Node ID 58831f55ae353766d70b656254cf0dbdab885926 ># Parent 2e597973fede8cd01abefd5857d1dc2b6a01371e >Use SSE instructions to pack the rhs matrix whenever possible. > >diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h >--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h >+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h >@@ -1252,16 +1252,17 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Sca > if(PanelMode) count += (stride-offset-depth); > } > } > > // this version is optimized for row major matrices > template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode> > struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode> > { >+ typedef typename packet_traits<Scalar>::type Packet; > enum { PacketSize = packet_traits<Scalar>::size }; > EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0); > }; > > template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode> > EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode> > ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset) > { >@@ -1271,22 +1272,28 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Sca > Index packet_cols = (cols/nr) * nr; > Index count = 0; > for(Index j2=0; j2<packet_cols; j2+=nr) > { > // skip what we have before > if(PanelMode) count += nr * offset; > for(Index k=0; k<depth; k++) > { >- const Scalar* b0 = &rhs[k*rhsStride + j2]; >- blockB[count+0] = cj(b0[0]); >- blockB[count+1] = cj(b0[1]); >- if(nr==4) blockB[count+2] = cj(b0[2]); >- if(nr==4) blockB[count+3] = cj(b0[3]); >- count += nr; >+ if (nr == PacketSize) { >+ Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]); >+ pstoreu(blockB+count, cj.pconj(A)); >+ count += PacketSize; >+ } else { >+ const Scalar* b0 = &rhs[k*rhsStride + j2]; >+ blockB[count+0] = cj(b0[0]); >+ blockB[count+1] = cj(b0[1]); >+ if(nr==4) blockB[count+2] = cj(b0[2]); >+ if(nr==4) blockB[count+3] = cj(b0[3]); >+ count += nr; >+ } > } > // skip what we have after > if(PanelMode) count += nr * (stride-offset-depth); > } > // copy the remaining columns one at a time (nr==1) > for(Index j2=packet_cols; j2<cols; ++j2) > { > if(PanelMode) count += offset;
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 717
: 408