This bugzilla service is closed. All entries have been migrated to https://gitlab.com/libeigen/eigen
View | Details | Raw Unified | Return to bug 931 | Differences between
and this patch

Collapse All | Expand All

(-)a/Eigen/src/Core/products/GeneralBlockPanelKernel.h (-25 / +34 lines)
Lines 41-82 void computeProductBlockingSizes(SizeTyp Link Here
41
  // Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and
41
  // Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and
42
  // mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed
42
  // mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed
43
  // per kc x nr vertical small panels where nr is the blocking size along the n dimension
43
  // per kc x nr vertical small panels where nr is the blocking size along the n dimension
44
  // at the register level. For vectorization purpose, these small vertical panels are unpacked,
44
  // at the register level. For vectorization purpose, these small vertical panels are unpacked,
45
  // e.g., each coefficient is replicated to fit a packet. This small vertical panel has to
45
  // e.g., each coefficient is replicated to fit a packet. This small vertical panel has to
46
  // stay in L1 cache.
46
  // stay in L1 cache.
47
47
48
  typedef gebp_traits<LhsScalar,RhsScalar> Traits;
48
  typedef gebp_traits<LhsScalar,RhsScalar> Traits;
49
  enum {
50
    kdiv = KcFactor * 2 * Traits::nr
51
         * Traits::RhsProgress * sizeof(RhsScalar),
52
    mr = gebp_traits<LhsScalar,RhsScalar>::mr,
53
    mr_mask = (0xffffffff/mr)*mr
54
  };
55
49
56
//   k = std::min<SizeType>(k, l1/kdiv);
50
  // First, we compute kc
57
//   SizeType _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
51
  SizeType kdiv = KcFactor * 2 * Traits::nr * Traits::RhsProgress * sizeof(RhsScalar);
58
//   if(_m<m) m = _m & mr_mask;
52
  SizeType kc = CacheSizes::L1() / kdiv;
59
  
53
60
  // In unit tests we do not want to use extra large matrices,
54
  #ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
61
  // so we reduce the block size to check the blocking strategy is not flawed
55
    // Limit block size in tests to cover blocking logic without using huge matrices
62
#ifndef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
56
    kc = std::min<SizeType>(kc,24);
63
//   k = std::min<SizeType>(k,240);
57
  #elif EIGEN_ARCH_i386_OR_x86_64
64
//   n = std::min<SizeType>(n,3840/sizeof(RhsScalar));
58
    // This optimization seems specific to x86/x86-64. It harms performance on a
65
//   m = std::min<SizeType>(m,3840/sizeof(RhsScalar));
59
    // Nexus 4 (ARM) device where the optimal value of kc is around 512 or 1024.
66
  
60
    kc = std::min<SizeType>(kc,sizeof(LhsScalar)<=4 ? 360 : 240);
67
  k = std::min<SizeType>(k,sizeof(LhsScalar)<=4 ? 360 : 240);
61
  #endif
68
  n = std::min<SizeType>(n,3840/sizeof(RhsScalar));
62
69
  m = std::min<SizeType>(m,3840/sizeof(RhsScalar));
63
  // Clamp k to kc
70
#else
64
  k = std::min<SizeType>(k, kc);
71
  k = std::min<SizeType>(k,24);
65
72
  n = std::min<SizeType>(n,384/sizeof(RhsScalar));
66
  // Next, compute mc and nc, using the clamped value of k, so that if k is very small,
73
  m = std::min<SizeType>(m,384/sizeof(RhsScalar));
67
  // we can go bigger in the n and m dimensions.
74
#endif
68
  SizeType mc = CacheSizes::TopLevel() / (4 * sizeof(LhsScalar) * k);
69
  mc -= mc % Traits::mr;
70
  m = std::min<SizeType>(m, mc);
71
  SizeType nc = CacheSizes::TopLevel() / (4 * sizeof(RhsScalar) * k);
72
  nc -= nc % Traits::nr;
73
  n = std::min<SizeType>(n, nc);
74
75
  #ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
76
    // Limit block size in tests to cover blocking logic without using huge matrices
77
    m = std::min<SizeType>(m,384/sizeof(RhsScalar));
78
    n = std::min<SizeType>(n,384/sizeof(RhsScalar));
79
  #elif EIGEN_ARCH_i386_OR_x86_64
80
    // Similar optimization as above, seems specific to x86/x86-64.
81
    m = std::min<SizeType>(m,3840/sizeof(RhsScalar));
82
    n = std::min<SizeType>(n,3840/sizeof(RhsScalar));
83
  #endif
75
}
84
}
76
85
77
template<typename LhsScalar, typename RhsScalar, typename SizeType>
86
template<typename LhsScalar, typename RhsScalar, typename SizeType>
78
inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
87
inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
79
{
88
{
80
  computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n);
89
  computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n);
81
}
90
}
82
91

Return to bug 931