This bugzilla service is closed. All entries have been migrated to https://gitlab.com/libeigen/eigen
View | Details | Raw Unified | Return to bug 717
Collapse All | Expand All

(-)a/Eigen/src/Core/products/GeneralBlockPanelKernel.h (-6 / +13 lines)
Lines 1252-1267 EIGEN_DONT_INLINE void gemm_pack_rhs<Sca Link Here
1252
    if(PanelMode) count += (stride-offset-depth);
1252
    if(PanelMode) count += (stride-offset-depth);
1253
  }
1253
  }
1254
}
1254
}
1255
1255
1256
// this version is optimized for row major matrices
1256
// this version is optimized for row major matrices
1257
template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
1257
template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
1258
struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
1258
struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
1259
{
1259
{
1260
  typedef typename packet_traits<Scalar>::type Packet;
1260
  enum { PacketSize = packet_traits<Scalar>::size };
1261
  enum { PacketSize = packet_traits<Scalar>::size };
1261
  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0);
1262
  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0);
1262
};
1263
};
1263
1264
1264
template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
1265
template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
1265
EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
1266
EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
1266
  ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
1267
  ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
1267
{
1268
{
Lines 1271-1292 EIGEN_DONT_INLINE void gemm_pack_rhs<Sca Link Here
1271
  Index packet_cols = (cols/nr) * nr;
1272
  Index packet_cols = (cols/nr) * nr;
1272
  Index count = 0;
1273
  Index count = 0;
1273
  for(Index j2=0; j2<packet_cols; j2+=nr)
1274
  for(Index j2=0; j2<packet_cols; j2+=nr)
1274
  {
1275
  {
1275
    // skip what we have before
1276
    // skip what we have before
1276
    if(PanelMode) count += nr * offset;
1277
    if(PanelMode) count += nr * offset;
1277
    for(Index k=0; k<depth; k++)
1278
    for(Index k=0; k<depth; k++)
1278
    {
1279
    {
1279
      const Scalar* b0 = &rhs[k*rhsStride + j2];
1280
      if (nr == PacketSize) {
1280
                blockB[count+0] = cj(b0[0]);
1281
        Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]);
1281
                blockB[count+1] = cj(b0[1]);
1282
        pstoreu(blockB+count, cj.pconj(A));
1282
      if(nr==4) blockB[count+2] = cj(b0[2]);
1283
        count += PacketSize;
1283
      if(nr==4) blockB[count+3] = cj(b0[3]);
1284
      } else {
1284
      count += nr;
1285
        const Scalar* b0 = &rhs[k*rhsStride + j2];
1286
                  blockB[count+0] = cj(b0[0]);
1287
                  blockB[count+1] = cj(b0[1]);
1288
        if(nr==4) blockB[count+2] = cj(b0[2]);
1289
        if(nr==4) blockB[count+3] = cj(b0[3]);
1290
        count += nr;
1291
      }
1285
    }
1292
    }
1286
    // skip what we have after
1293
    // skip what we have after
1287
    if(PanelMode) count += nr * (stride-offset-depth);
1294
    if(PanelMode) count += nr * (stride-offset-depth);
1288
  }
1295
  }
1289
  // copy the remaining columns one at a time (nr==1)
1296
  // copy the remaining columns one at a time (nr==1)
1290
  for(Index j2=packet_cols; j2<cols; ++j2)
1297
  for(Index j2=packet_cols; j2<cols; ++j2)
1291
  {
1298
  {
1292
    if(PanelMode) count += offset;
1299
    if(PanelMode) count += offset;

Return to bug 717