Lines 13-65
Link Here
|
13 |
|
13 |
|
14 |
namespace Eigen { |
14 |
namespace Eigen { |
15 |
|
15 |
|
16 |
namespace internal { |
16 |
namespace internal { |
17 |
|
17 |
|
18 |
template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false> |
18 |
template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false> |
19 |
class gebp_traits; |
19 |
class gebp_traits; |
20 |
|
20 |
|
21 |
|
|
|
22 |
/** \internal \returns b if a<=0, and returns a otherwise. */ |
23 |
inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b) |
24 |
{ |
25 |
return a<=0 ? b : a; |
26 |
} |
27 |
|
28 |
/** \internal */ |
29 |
inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0) |
30 |
{ |
31 |
static std::ptrdiff_t m_l1CacheSize = 0; |
32 |
static std::ptrdiff_t m_l2CacheSize = 0; |
33 |
if(m_l2CacheSize==0) |
34 |
{ |
35 |
m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024); |
36 |
m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024); |
37 |
} |
38 |
|
39 |
if(action==SetAction) |
40 |
{ |
41 |
// set the cpu cache size and cache all block sizes from a global cache size in byte |
42 |
eigen_internal_assert(l1!=0 && l2!=0); |
43 |
m_l1CacheSize = *l1; |
44 |
m_l2CacheSize = *l2; |
45 |
} |
46 |
else if(action==GetAction) |
47 |
{ |
48 |
eigen_internal_assert(l1!=0 && l2!=0); |
49 |
*l1 = m_l1CacheSize; |
50 |
*l2 = m_l2CacheSize; |
51 |
} |
52 |
else |
53 |
{ |
54 |
eigen_internal_assert(false); |
55 |
} |
56 |
} |
57 |
|
58 |
/** \brief Computes the blocking parameters for a m x k times k x n matrix product |
21 |
/** \brief Computes the blocking parameters for a m x k times k x n matrix product |
59 |
* |
22 |
* |
60 |
* \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension. |
23 |
* \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension. |
61 |
* \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension. |
24 |
* \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension. |
62 |
* \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension. |
25 |
* \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension. |
63 |
* |
26 |
* |
64 |
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar, |
27 |
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar, |
65 |
* this function computes the blocking size parameters along the respective dimensions |
28 |
* this function computes the blocking size parameters along the respective dimensions |
Lines 76-103
void computeProductBlockingSizes(SizeTyp
Link Here
|
76 |
EIGEN_UNUSED_VARIABLE(n); |
39 |
EIGEN_UNUSED_VARIABLE(n); |
77 |
// Explanations: |
40 |
// Explanations: |
78 |
// Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and |
41 |
// Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and |
79 |
// mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed |
42 |
// mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed |
80 |
// per kc x nr vertical small panels where nr is the blocking size along the n dimension |
43 |
// per kc x nr vertical small panels where nr is the blocking size along the n dimension |
81 |
// at the register level. For vectorization purpose, these small vertical panels are unpacked, |
44 |
// at the register level. For vectorization purpose, these small vertical panels are unpacked, |
82 |
// e.g., each coefficient is replicated to fit a packet. This small vertical panel has to |
45 |
// e.g., each coefficient is replicated to fit a packet. This small vertical panel has to |
83 |
// stay in L1 cache. |
46 |
// stay in L1 cache. |
84 |
std::ptrdiff_t l1, l2; |
|
|
85 |
|
47 |
|
86 |
typedef gebp_traits<LhsScalar,RhsScalar> Traits; |
48 |
typedef gebp_traits<LhsScalar,RhsScalar> Traits; |
87 |
enum { |
49 |
enum { |
88 |
kdiv = KcFactor * 2 * Traits::nr |
50 |
kdiv = KcFactor * 2 * Traits::nr |
89 |
* Traits::RhsProgress * sizeof(RhsScalar), |
51 |
* Traits::RhsProgress * sizeof(RhsScalar), |
90 |
mr = gebp_traits<LhsScalar,RhsScalar>::mr, |
52 |
mr = gebp_traits<LhsScalar,RhsScalar>::mr, |
91 |
mr_mask = (0xffffffff/mr)*mr |
53 |
mr_mask = (0xffffffff/mr)*mr |
92 |
}; |
54 |
}; |
93 |
|
55 |
|
94 |
manage_caching_sizes(GetAction, &l1, &l2); |
|
|
95 |
|
96 |
// k = std::min<SizeType>(k, l1/kdiv); |
56 |
// k = std::min<SizeType>(k, l1/kdiv); |
97 |
// SizeType _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0; |
57 |
// SizeType _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0; |
98 |
// if(_m<m) m = _m & mr_mask; |
58 |
// if(_m<m) m = _m & mr_mask; |
99 |
|
59 |
|
100 |
// In unit tests we do not want to use extra large matrices, |
60 |
// In unit tests we do not want to use extra large matrices, |
101 |
// so we reduce the block size to check the blocking strategy is not flawed |
61 |
// so we reduce the block size to check the blocking strategy is not flawed |
102 |
#ifndef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS |
62 |
#ifndef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS |
103 |
// k = std::min<SizeType>(k,240); |
63 |
// k = std::min<SizeType>(k,240); |
Lines 1848-1886
EIGEN_DONT_INLINE void gemm_pack_rhs<Sca
Link Here
|
1848 |
count += 1; |
1808 |
count += 1; |
1849 |
} |
1809 |
} |
1850 |
if(PanelMode) count += stride-offset-depth; |
1810 |
if(PanelMode) count += stride-offset-depth; |
1851 |
} |
1811 |
} |
1852 |
} |
1812 |
} |
1853 |
|
1813 |
|
1854 |
} // end namespace internal |
1814 |
} // end namespace internal |
1855 |
|
1815 |
|
1856 |
/** \returns the currently set level 1 cpu cache size (in bytes) used to estimate the ideal blocking size parameters. |
|
|
1857 |
* \sa setCpuCacheSize */ |
1858 |
inline std::ptrdiff_t l1CacheSize() |
1859 |
{ |
1860 |
std::ptrdiff_t l1, l2; |
1861 |
internal::manage_caching_sizes(GetAction, &l1, &l2); |
1862 |
return l1; |
1863 |
} |
1864 |
|
1865 |
/** \returns the currently set level 2 cpu cache size (in bytes) used to estimate the ideal blocking size parameters. |
1866 |
* \sa setCpuCacheSize */ |
1867 |
inline std::ptrdiff_t l2CacheSize() |
1868 |
{ |
1869 |
std::ptrdiff_t l1, l2; |
1870 |
internal::manage_caching_sizes(GetAction, &l1, &l2); |
1871 |
return l2; |
1872 |
} |
1873 |
|
1874 |
/** Set the cpu L1 and L2 cache sizes (in bytes). |
1875 |
* These values are use to adjust the size of the blocks |
1876 |
* for the algorithms working per blocks. |
1877 |
* |
1878 |
* \sa computeProductBlockingSizes */ |
1879 |
inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2) |
1880 |
{ |
1881 |
internal::manage_caching_sizes(SetAction, &l1, &l2); |
1882 |
} |
1883 |
|
1884 |
} // end namespace Eigen |
1816 |
} // end namespace Eigen |
1885 |
|
1817 |
|
1886 |
#endif // EIGEN_GENERAL_BLOCK_PANEL_H |
1818 |
#endif // EIGEN_GENERAL_BLOCK_PANEL_H |