This bugzilla service is closed. All entries have been migrated to https://gitlab.com/libeigen/eigen
View | Details | Raw Unified | Return to bug 931 | Differences between
and this patch

Collapse All | Expand All

(-)a/Eigen/Core (+1 lines)
Lines 280-295 using std::ptrdiff_t; Link Here
280
  */
280
  */
281
281
282
#include "src/Core/util/Constants.h"
282
#include "src/Core/util/Constants.h"
283
#include "src/Core/util/Meta.h"
283
#include "src/Core/util/Meta.h"
284
#include "src/Core/util/ForwardDeclarations.h"
284
#include "src/Core/util/ForwardDeclarations.h"
285
#include "src/Core/util/StaticAssert.h"
285
#include "src/Core/util/StaticAssert.h"
286
#include "src/Core/util/XprHelper.h"
286
#include "src/Core/util/XprHelper.h"
287
#include "src/Core/util/Memory.h"
287
#include "src/Core/util/Memory.h"
288
#include "src/Core/util/CacheSizes.h"
288
289
289
#include "src/Core/NumTraits.h"
290
#include "src/Core/NumTraits.h"
290
#include "src/Core/MathFunctions.h"
291
#include "src/Core/MathFunctions.h"
291
#include "src/Core/GenericPacketMath.h"
292
#include "src/Core/GenericPacketMath.h"
292
293
293
#if defined EIGEN_VECTORIZE_AVX
294
#if defined EIGEN_VECTORIZE_AVX
294
  // Use AVX for floats and doubles, SSE for integers
295
  // Use AVX for floats and doubles, SSE for integers
295
  #include "src/Core/arch/SSE/PacketMath.h"
296
  #include "src/Core/arch/SSE/PacketMath.h"
(-)a/Eigen/src/Core/products/GeneralBlockPanelKernel.h (-68 lines)
Lines 13-65 Link Here
13
13
14
namespace Eigen { 
14
namespace Eigen { 
15
  
15
  
16
namespace internal {
16
namespace internal {
17
17
18
template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
18
template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
19
class gebp_traits;
19
class gebp_traits;
20
20
21
22
/** \internal \returns b if a<=0, and returns a otherwise. */
23
inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b)
24
{
25
  return a<=0 ? b : a;
26
}
27
28
/** \internal */
29
inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0)
30
{
31
  static std::ptrdiff_t m_l1CacheSize = 0;
32
  static std::ptrdiff_t m_l2CacheSize = 0;
33
  if(m_l2CacheSize==0)
34
  {
35
    m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024);
36
    m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024);
37
  }
38
  
39
  if(action==SetAction)
40
  {
41
    // set the cpu cache size and cache all block sizes from a global cache size in byte
42
    eigen_internal_assert(l1!=0 && l2!=0);
43
    m_l1CacheSize = *l1;
44
    m_l2CacheSize = *l2;
45
  }
46
  else if(action==GetAction)
47
  {
48
    eigen_internal_assert(l1!=0 && l2!=0);
49
    *l1 = m_l1CacheSize;
50
    *l2 = m_l2CacheSize;
51
  }
52
  else
53
  {
54
    eigen_internal_assert(false);
55
  }
56
}
57
58
/** \brief Computes the blocking parameters for a m x k times k x n matrix product
21
/** \brief Computes the blocking parameters for a m x k times k x n matrix product
59
  *
22
  *
60
  * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
23
  * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
61
  * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
24
  * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
62
  * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
25
  * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
63
  *
26
  *
64
  * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
27
  * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
65
  * this function computes the blocking size parameters along the respective dimensions
28
  * this function computes the blocking size parameters along the respective dimensions
Lines 76-103 void computeProductBlockingSizes(SizeTyp Link Here
76
  EIGEN_UNUSED_VARIABLE(n);
39
  EIGEN_UNUSED_VARIABLE(n);
77
  // Explanations:
40
  // Explanations:
78
  // Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and
41
  // Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and
79
  // mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed
42
  // mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed
80
  // per kc x nr vertical small panels where nr is the blocking size along the n dimension
43
  // per kc x nr vertical small panels where nr is the blocking size along the n dimension
81
  // at the register level. For vectorization purpose, these small vertical panels are unpacked,
44
  // at the register level. For vectorization purpose, these small vertical panels are unpacked,
82
  // e.g., each coefficient is replicated to fit a packet. This small vertical panel has to
45
  // e.g., each coefficient is replicated to fit a packet. This small vertical panel has to
83
  // stay in L1 cache.
46
  // stay in L1 cache.
84
  std::ptrdiff_t l1, l2;
85
47
86
  typedef gebp_traits<LhsScalar,RhsScalar> Traits;
48
  typedef gebp_traits<LhsScalar,RhsScalar> Traits;
87
  enum {
49
  enum {
88
    kdiv = KcFactor * 2 * Traits::nr
50
    kdiv = KcFactor * 2 * Traits::nr
89
         * Traits::RhsProgress * sizeof(RhsScalar),
51
         * Traits::RhsProgress * sizeof(RhsScalar),
90
    mr = gebp_traits<LhsScalar,RhsScalar>::mr,
52
    mr = gebp_traits<LhsScalar,RhsScalar>::mr,
91
    mr_mask = (0xffffffff/mr)*mr
53
    mr_mask = (0xffffffff/mr)*mr
92
  };
54
  };
93
55
94
  manage_caching_sizes(GetAction, &l1, &l2);
95
96
//   k = std::min<SizeType>(k, l1/kdiv);
56
//   k = std::min<SizeType>(k, l1/kdiv);
97
//   SizeType _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
57
//   SizeType _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
98
//   if(_m<m) m = _m & mr_mask;
58
//   if(_m<m) m = _m & mr_mask;
99
  
59
  
100
  // In unit tests we do not want to use extra large matrices,
60
  // In unit tests we do not want to use extra large matrices,
101
  // so we reduce the block size to check the blocking strategy is not flawed
61
  // so we reduce the block size to check the blocking strategy is not flawed
102
#ifndef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
62
#ifndef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
103
//   k = std::min<SizeType>(k,240);
63
//   k = std::min<SizeType>(k,240);
Lines 1848-1886 EIGEN_DONT_INLINE void gemm_pack_rhs<Sca Link Here
1848
      count += 1;
1808
      count += 1;
1849
    }
1809
    }
1850
    if(PanelMode) count += stride-offset-depth;
1810
    if(PanelMode) count += stride-offset-depth;
1851
  }
1811
  }
1852
}
1812
}
1853
1813
1854
} // end namespace internal
1814
} // end namespace internal
1855
1815
1856
/** \returns the currently set level 1 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
1857
  * \sa setCpuCacheSize */
1858
inline std::ptrdiff_t l1CacheSize()
1859
{
1860
  std::ptrdiff_t l1, l2;
1861
  internal::manage_caching_sizes(GetAction, &l1, &l2);
1862
  return l1;
1863
}
1864
1865
/** \returns the currently set level 2 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
1866
  * \sa setCpuCacheSize */
1867
inline std::ptrdiff_t l2CacheSize()
1868
{
1869
  std::ptrdiff_t l1, l2;
1870
  internal::manage_caching_sizes(GetAction, &l1, &l2);
1871
  return l2;
1872
}
1873
1874
/** Set the cpu L1 and L2 cache sizes (in bytes).
1875
  * These values are use to adjust the size of the blocks
1876
  * for the algorithms working per blocks.
1877
  *
1878
  * \sa computeProductBlockingSizes */
1879
inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2)
1880
{
1881
  internal::manage_caching_sizes(SetAction, &l1, &l2);
1882
}
1883
1884
} // end namespace Eigen
1816
} // end namespace Eigen
1885
1817
1886
#endif // EIGEN_GENERAL_BLOCK_PANEL_H
1818
#endif // EIGEN_GENERAL_BLOCK_PANEL_H
(-)a/Eigen/src/Core/products/Parallelizer.h (-2 / +1 lines)
Lines 44-61 inline void manage_multi_threading(Actio Link Here
44
44
45
}
45
}
46
46
47
/** Must be call first when calling Eigen from multiple threads */
47
/** Must be call first when calling Eigen from multiple threads */
48
inline void initParallel()
48
inline void initParallel()
49
{
49
{
50
  int nbt;
50
  int nbt;
51
  internal::manage_multi_threading(GetAction, &nbt);
51
  internal::manage_multi_threading(GetAction, &nbt);
52
  std::ptrdiff_t l1, l2;
52
  CacheSizes::EnsureInitialized();
53
  internal::manage_caching_sizes(GetAction, &l1, &l2);
54
}
53
}
55
54
56
/** \returns the max number of threads reserved for Eigen
55
/** \returns the max number of threads reserved for Eigen
57
  * \sa setNbThreads */
56
  * \sa setNbThreads */
58
inline int nbThreads()
57
inline int nbThreads()
59
{
58
{
60
  int ret;
59
  int ret;
61
  internal::manage_multi_threading(GetAction, &ret);
60
  internal::manage_multi_threading(GetAction, &ret);
(-)a/Eigen/src/Core/products/TriangularSolverMatrix.h (-2 / +1 lines)
Lines 72-89 EIGEN_DONT_INLINE void triangular_solve_ Link Here
72
72
73
    conj_if<Conjugate> conj;
73
    conj_if<Conjugate> conj;
74
    gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
74
    gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
75
    gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
75
    gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
76
    gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs;
76
    gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs;
77
77
78
    // the goal here is to subdivise the Rhs panels such that we keep some cache
78
    // the goal here is to subdivise the Rhs panels such that we keep some cache
79
    // coherence when accessing the rhs elements
79
    // coherence when accessing the rhs elements
80
    std::ptrdiff_t l1, l2;
80
    std::ptrdiff_t l2 = CacheSizes::L2();
81
    manage_caching_sizes(GetAction, &l1, &l2);
82
    Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * otherStride) : 0;
81
    Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * otherStride) : 0;
83
    subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);
82
    subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);
84
83
85
    for(Index k2=IsLower ? 0 : size;
84
    for(Index k2=IsLower ? 0 : size;
86
        IsLower ? k2<size : k2>0;
85
        IsLower ? k2<size : k2>0;
87
        IsLower ? k2+=kc : k2-=kc)
86
        IsLower ? k2+=kc : k2-=kc)
88
    {
87
    {
89
      const Index actual_kc = (std::min)(IsLower ? size-k2 : k2, kc);
88
      const Index actual_kc = (std::min)(IsLower ? size-k2 : k2, kc);
(-)a/Eigen/src/Core/util/CacheSizes.h (+133 lines)
Line 0 Link Here
1
// This file is part of Eigen, a lightweight C++ template library
2
// for linear algebra.
3
//
4
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
6
//
7
// This Source Code Form is subject to the terms of the Mozilla
8
// Public License v. 2.0. If a copy of the MPL was not distributed
9
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11
#ifndef EIGEN_CACHE_SIZES_H
12
#define EIGEN_CACHE_SIZES_H
13
14
namespace Eigen {
15
16
class CacheSizes
17
{
18
private:
19
  static const size_t MaxNumberOfLevels = 4;
20
  size_t m_numberOfLevels;
21
  size_t m_levels[MaxNumberOfLevels];
22
23
  void clear()
24
  {
25
    m_numberOfLevels = 0;
26
    for (size_t i = 0; i < MaxNumberOfLevels; i++) {
27
      m_levels[i] = 0;
28
    }
29
  }
30
31
  void push_back(size_t size)
32
  {
33
    eigen_assert(m_numberOfLevels < MaxNumberOfLevels);
34
    m_levels[m_numberOfLevels++] = size;
35
  }
36
37
  CacheSizes()
38
  {
39
    clear();
40
    int l1, l2, l3;
41
    internal::queryCacheSizes(l1, l2, l3);
42
    if (l1 > 0) push_back(l1);
43
    if (l2 > 0) push_back(l2);
44
    if (l3 > 0) push_back(l3);
45
    ensureAtLeastL1AndL2();
46
    assertValid();
47
  }
48
49
  void ensureAtLeastL1AndL2()
50
  {
51
    // We have code relying on there always existing L1 and L2 caches,
52
    // which is indeed the case in practice in most hardware, so for now let's
53
    // ensure that this assumption is always true, by adding reasonable
54
    // default values for L1 and L2 if they are missing
55
    if (m_numberOfLevels == 0) {
56
      push_back(16 * kilobyte); // default L1
57
    }
58
    if (m_numberOfLevels == 1) {
59
      push_back(16 * m_levels[0]); // default L2
60
    }
61
  }
62
63
  void assertValid()
64
  {
65
    // Must have at least two levels, and all cache sizes must be nonzero
66
    // and in nondecreasing order
67
    eigen_assert(m_numberOfLevels >= 2);
68
    size_t prev = 0;
69
    for (size_t i = 0; i != m_numberOfLevels; i++) {
70
      eigen_assert(m_levels[i]);
71
      eigen_assert(m_levels[i] >= prev);
72
      prev = m_levels[i];
73
    }
74
    EIGEN_ONLY_USED_FOR_DEBUG(prev);
75
  }
76
77
  static CacheSizes& Singleton()
78
  {
79
    static CacheSizes uniqueInstance;
80
    return uniqueInstance;
81
  }
82
83
public:
84
85
  static void EnsureInitialized()
86
  {
87
    Singleton();
88
  }
89
90
  static void Set(size_t number_of_levels, const size_t* levels)
91
  {
92
    if (number_of_levels > MaxNumberOfLevels) {
93
      number_of_levels = MaxNumberOfLevels;
94
    }
95
    Singleton().clear();
96
    for (size_t i = 0; i < number_of_levels; i++) {
97
      eigen_assert(levels[i] > 0);
98
      Singleton().push_back(levels[i]);
99
    }
100
    Singleton().ensureAtLeastL1AndL2();
101
    Singleton().assertValid();
102
  }
103
104
  static size_t NumberOfLevels()
105
  {
106
    return Singleton().m_numberOfLevels;
107
  }
108
109
  static size_t Level(size_t index)
110
  {
111
    eigen_assert(index >= 1 && index <= NumberOfLevels());
112
    return Singleton().m_levels[index - 1];
113
  }
114
115
  static size_t L1()
116
  {
117
    return Singleton().m_levels[0];
118
  }
119
120
  static size_t L2()
121
  {
122
    return Singleton().m_levels[1];
123
  }
124
125
  static size_t TopLevel()
126
  {
127
    return Singleton().m_levels[Singleton().m_numberOfLevels - 1];
128
  }
129
};
130
131
}
132
133
#endif // EIGEN_CACHE_SIZES_H
(-)a/Eigen/src/Core/util/Constants.h (+3 lines)
Lines 8-23 Link Here
8
// Public License v. 2.0. If a copy of the MPL was not distributed
8
// Public License v. 2.0. If a copy of the MPL was not distributed
9
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
10
11
#ifndef EIGEN_CONSTANTS_H
11
#ifndef EIGEN_CONSTANTS_H
12
#define EIGEN_CONSTANTS_H
12
#define EIGEN_CONSTANTS_H
13
13
14
namespace Eigen {
14
namespace Eigen {
15
15
16
const size_t megabyte = 1 << 20;
17
const size_t kilobyte = 1 << 10;
18
16
/** This value means that a positive quantity (e.g., a size) is not known at compile-time, and that instead the value is
19
/** This value means that a positive quantity (e.g., a size) is not known at compile-time, and that instead the value is
17
  * stored in some runtime variable.
20
  * stored in some runtime variable.
18
  *
21
  *
19
  * Changing the value of Dynamic breaks the ABI, as Dynamic is often used as a template parameter for Matrix.
22
  * Changing the value of Dynamic breaks the ABI, as Dynamic is often used as a template parameter for Matrix.
20
  */
23
  */
21
const int Dynamic = -1;
24
const int Dynamic = -1;
22
25
23
/** This value means that a signed quantity (e.g., a signed index) is not known at compile-time, and that instead its value
26
/** This value means that a signed quantity (e.g., a signed index) is not known at compile-time, and that instead its value
(-)a/Eigen/src/Core/util/Memory.h (-18 lines)
Lines 1007-1037 inline void queryCacheSizes(int& l1, int Link Here
1007
//   ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
1007
//   ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
1008
//   ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
1008
//   ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
1009
//   ||cpuid_is_vendor(abcd,"NexGenDriven")
1009
//   ||cpuid_is_vendor(abcd,"NexGenDriven")
1010
  #else
1010
  #else
1011
  l1 = l2 = l3 = -1;
1011
  l1 = l2 = l3 = -1;
1012
  #endif
1012
  #endif
1013
}
1013
}
1014
1014
1015
/** \internal
1016
 * \returns the size in Bytes of the L1 data cache */
1017
inline int queryL1CacheSize()
1018
{
1019
  int l1(-1), l2, l3;
1020
  queryCacheSizes(l1,l2,l3);
1021
  return l1;
1022
}
1023
1024
/** \internal
1025
 * \returns the size in Bytes of the L2 or L3 cache if this later is present */
1026
inline int queryTopLevelCacheSize()
1027
{
1028
  int l1, l2(-1), l3(-1);
1029
  queryCacheSizes(l1,l2,l3);
1030
  return (std::max)(l2,l3);
1031
}
1032
1033
} // end namespace internal
1015
} // end namespace internal
1034
1016
1035
} // end namespace Eigen
1017
} // end namespace Eigen
1036
1018
1037
#endif // EIGEN_MEMORY_H
1019
#endif // EIGEN_MEMORY_H
(-)a/test/product_large.cpp (-5 / +8 lines)
Lines 33-53 void test_product_large() Link Here
33
    // test deferred resizing in Matrix::operator=
33
    // test deferred resizing in Matrix::operator=
34
    MatrixXf a = MatrixXf::Random(10,4), b = MatrixXf::Random(4,10), c = a;
34
    MatrixXf a = MatrixXf::Random(10,4), b = MatrixXf::Random(4,10), c = a;
35
    VERIFY_IS_APPROX((a = a * b), (c * b).eval());
35
    VERIFY_IS_APPROX((a = a * b), (c * b).eval());
36
  }
36
  }
37
37
38
  {
38
  {
39
    // check the functions to setup blocking sizes compile and do not segfault
39
    // check the functions to setup blocking sizes compile and do not segfault
40
    // FIXME check they do what they are supposed to do !!
40
    // FIXME check they do what they are supposed to do !!
41
    std::ptrdiff_t l1 = internal::random<int>(10000,20000);
41
    size_t l1 = internal::random<int>(10000,20000);
42
    std::ptrdiff_t l2 = internal::random<int>(1000000,2000000);
42
    size_t l2 = internal::random<int>(1000000,2000000);
43
    setCpuCacheSizes(l1,l2);
43
    size_t sizes[] = {l1, l2};
44
    VERIFY(l1==l1CacheSize());
44
    CacheSizes::Set(2, sizes);
45
    VERIFY(l2==l2CacheSize());
45
    VERIFY(CacheSizes::NumberOfLevels() == 2);
46
    VERIFY(CacheSizes::L1() == l1);
47
    VERIFY(CacheSizes::L2() == l2);
48
46
    std::ptrdiff_t k1 = internal::random<int>(10,100)*16;
49
    std::ptrdiff_t k1 = internal::random<int>(10,100)*16;
47
    std::ptrdiff_t m1 = internal::random<int>(10,100)*16;
50
    std::ptrdiff_t m1 = internal::random<int>(10,100)*16;
48
    std::ptrdiff_t n1 = internal::random<int>(10,100)*16;
51
    std::ptrdiff_t n1 = internal::random<int>(10,100)*16;
49
    // only makes sure it compiles fine
52
    // only makes sure it compiles fine
50
    internal::computeProductBlockingSizes<float,float>(k1,m1,n1);
53
    internal::computeProductBlockingSizes<float,float>(k1,m1,n1);
51
  }
54
  }
52
55
53
  {
56
  {
(-)a/unsupported/Eigen/CXX11/src/MeasureCacheSizes/MeasureCacheSizes.h (-3 lines)
Lines 28-46 Link Here
28
namespace Eigen {
28
namespace Eigen {
29
namespace internal {
29
namespace internal {
30
30
31
using std::size_t;
31
using std::size_t;
32
using std::ptrdiff_t;
32
using std::ptrdiff_t;
33
using std::uint8_t;
33
using std::uint8_t;
34
using std::uint64_t;
34
using std::uint64_t;
35
35
36
const size_t megabyte = 1 << 20;
37
const size_t kilobyte = 1 << 10;
38
39
// We must repeat memory accesses enough times to give
36
// We must repeat memory accesses enough times to give
40
// the CPU a good chance to have our stuff in cache for
37
// the CPU a good chance to have our stuff in cache for
41
// most of the measurements, so that the effect of caches
38
// most of the measurements, so that the effect of caches
42
// can be measured. Since our accesses are by large memcpy's,
39
// can be measured. Since our accesses are by large memcpy's,
43
// the number of times that each cache line is touched is
40
// the number of times that each cache line is touched is
44
// roughly equal to the number of times that each byte is touched.
41
// roughly equal to the number of times that each byte is touched.
45
const int minimum_times_accessed_each_byte = 8;
42
const int minimum_times_accessed_each_byte = 8;
46
43

Return to bug 931