This bugzilla service is closed. All entries have been migrated to https://gitlab.com/libeigen/eigen
View | Details | Raw Unified | Return to bug 931 | Differences between
and this patch

Collapse All | Expand All

(-)a/Eigen/src/Core/products/GeneralBlockPanelKernel.h (-25 / +38 lines)
Lines 41-82 void computeProductBlockingSizes(SizeTyp Link Here
41
  // Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and
41
  // Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and
42
  // mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed
42
  // mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed
43
  // per kc x nr vertical small panels where nr is the blocking size along the n dimension
43
  // per kc x nr vertical small panels where nr is the blocking size along the n dimension
44
  // at the register level. For vectorization purpose, these small vertical panels are unpacked,
44
  // at the register level. For vectorization purpose, these small vertical panels are unpacked,
45
  // e.g., each coefficient is replicated to fit a packet. This small vertical panel has to
45
  // e.g., each coefficient is replicated to fit a packet. This small vertical panel has to
46
  // stay in L1 cache.
46
  // stay in L1 cache.
47
47
48
  typedef gebp_traits<LhsScalar,RhsScalar> Traits;
48
  typedef gebp_traits<LhsScalar,RhsScalar> Traits;
49
  enum {
50
    kdiv = KcFactor * 2 * Traits::nr
51
         * Traits::RhsProgress * sizeof(RhsScalar),
52
    mr = gebp_traits<LhsScalar,RhsScalar>::mr,
53
    mr_mask = (0xffffffff/mr)*mr
54
  };
55
49
56
//   k = std::min<SizeType>(k, l1/kdiv);
50
  // First, we compute kc
57
//   SizeType _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
51
  SizeType kdiv = KcFactor * 2 * Traits::nr * Traits::RhsProgress * sizeof(RhsScalar);
58
//   if(_m<m) m = _m & mr_mask;
52
  SizeType kc = CacheSizes::L1() / kdiv;
59
  
53
60
  // In unit tests we do not want to use extra large matrices,
54
  // This optimization seems specific to x86/x86-64. It harms performance on a
61
  // so we reduce the block size to check the blocking strategy is not flawed
55
  // Nexus 4 (ARM) device where the optimal value of kc is around 512 or 1024.
62
#ifndef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
56
  #if EIGEN_ARCH_i386_OR_x86_64
63
//   k = std::min<SizeType>(k,240);
57
    #ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
64
//   n = std::min<SizeType>(n,3840/sizeof(RhsScalar));
58
      // Limit block size in tests to cover blocking logic without using huge matrices
65
//   m = std::min<SizeType>(m,3840/sizeof(RhsScalar));
59
      kc = std::min<SizeType>(kc,24);
66
  
60
    #else
67
  k = std::min<SizeType>(k,sizeof(LhsScalar)<=4 ? 360 : 240);
61
      kc = std::min<SizeType>(kc,sizeof(LhsScalar)<=4 ? 360 : 240);
68
  n = std::min<SizeType>(n,3840/sizeof(RhsScalar));
62
    #endif
69
  m = std::min<SizeType>(m,3840/sizeof(RhsScalar));
63
  #endif
70
#else
64
71
  k = std::min<SizeType>(k,24);
65
  // Clamp k to kc
72
  n = std::min<SizeType>(n,384/sizeof(RhsScalar));
66
  k = std::min<SizeType>(k, kc);
73
  m = std::min<SizeType>(m,384/sizeof(RhsScalar));
67
74
#endif
68
  // Next, compute mc and nc, using the clamped value of k, so that if k is very small,
69
  // we can go bigger in the n and m dimensions.
70
  SizeType mc = CacheSizes::TopLevel() / (4 * sizeof(LhsScalar) * k);
71
  mc -= mc % Traits::mr;
72
  m = std::min<SizeType>(m, mc);
73
  SizeType nc = CacheSizes::TopLevel() / (4 * sizeof(RhsScalar) * k);
74
  nc -= nc % Traits::nr;
75
  n = std::min<SizeType>(n, nc);
76
77
  // Similar optimization as above - seems specific to x86/x86-64.
78
  #if EIGEN_ARCH_i386_OR_x86_64
79
    #ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
80
      // Limit block size in tests to cover blocking logic without using huge matrices
81
      m = std::min<SizeType>(m,384/sizeof(RhsScalar));
82
      n = std::min<SizeType>(n,384/sizeof(RhsScalar));
83
    #else
84
      m = std::min<SizeType>(m,3840/sizeof(RhsScalar));
85
      n = std::min<SizeType>(n,3840/sizeof(RhsScalar));
86
    #endif
87
  #endif
75
}
88
}
76
89
77
template<typename LhsScalar, typename RhsScalar, typename SizeType>
90
template<typename LhsScalar, typename RhsScalar, typename SizeType>
78
inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
91
inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
79
{
92
{
80
  computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n);
93
  computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n);
81
}
94
}
82
95

Return to bug 931