Bugzilla – Attachment 517 Details for
Bug 931
Fix tracking and use of cache sizes
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
Log In
[x]
|
Forgot Password
Login:
[x]
This bugzilla service is closed. All entries have been migrated to
https://gitlab.com/libeigen/eigen
[patch]
Part 1: refactor the cache sizes tracking
CacheSizes (text/plain), 14.20 KB, created by
Benoit Jacob
on 2015-01-19 22:03:46 UTC
(
hide
)
Description:
Part 1: refactor the cache sizes tracking
Filename:
MIME Type:
Creator:
Benoit Jacob
Created:
2015-01-19 22:03:46 UTC
Size:
14.20 KB
patch
obsolete
># HG changeset patch ># Parent 9442a1057e9edf3c18bd7597efa183e7ea3f138d > >diff --git a/Eigen/Core b/Eigen/Core >--- a/Eigen/Core >+++ b/Eigen/Core >@@ -280,16 +280,17 @@ using std::ptrdiff_t; > */ > > #include "src/Core/util/Constants.h" > #include "src/Core/util/Meta.h" > #include "src/Core/util/ForwardDeclarations.h" > #include "src/Core/util/StaticAssert.h" > #include "src/Core/util/XprHelper.h" > #include "src/Core/util/Memory.h" >+#include "src/Core/util/CacheSizes.h" > > #include "src/Core/NumTraits.h" > #include "src/Core/MathFunctions.h" > #include "src/Core/GenericPacketMath.h" > > #if defined EIGEN_VECTORIZE_AVX > // Use AVX for floats and doubles, SSE for integers > #include "src/Core/arch/SSE/PacketMath.h" >diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h >--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h >+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h >@@ -13,53 +13,16 @@ > > namespace Eigen { > > namespace internal { > > template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false> > class gebp_traits; > >- >-/** \internal \returns b if a<=0, and returns a otherwise. */ >-inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b) >-{ >- return a<=0 ? b : a; >-} >- >-/** \internal */ >-inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0) >-{ >- static std::ptrdiff_t m_l1CacheSize = 0; >- static std::ptrdiff_t m_l2CacheSize = 0; >- if(m_l2CacheSize==0) >- { >- m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024); >- m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024); >- } >- >- if(action==SetAction) >- { >- // set the cpu cache size and cache all block sizes from a global cache size in byte >- eigen_internal_assert(l1!=0 && l2!=0); >- m_l1CacheSize = *l1; >- m_l2CacheSize = *l2; >- } >- else if(action==GetAction) >- { >- eigen_internal_assert(l1!=0 && l2!=0); >- *l1 = m_l1CacheSize; >- *l2 = m_l2CacheSize; >- } >- else >- { >- eigen_internal_assert(false); >- } >-} >- > /** \brief Computes the blocking parameters for a m x k times k x n matrix product > * > * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension. > * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension. > * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension. > * > * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar, > * this function computes the blocking size parameters along the respective dimensions >@@ -76,28 +39,25 @@ void computeProductBlockingSizes(SizeTyp > EIGEN_UNUSED_VARIABLE(n); > // Explanations: > // Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and > // mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed > // per kc x nr vertical small panels where nr is the blocking size along the n dimension > // at the register level. For vectorization purpose, these small vertical panels are unpacked, > // e.g., each coefficient is replicated to fit a packet. This small vertical panel has to > // stay in L1 cache. >- std::ptrdiff_t l1, l2; > > typedef gebp_traits<LhsScalar,RhsScalar> Traits; > enum { > kdiv = KcFactor * 2 * Traits::nr > * Traits::RhsProgress * sizeof(RhsScalar), > mr = gebp_traits<LhsScalar,RhsScalar>::mr, > mr_mask = (0xffffffff/mr)*mr > }; > >- manage_caching_sizes(GetAction, &l1, &l2); >- > // k = std::min<SizeType>(k, l1/kdiv); > // SizeType _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0; > // if(_m<m) m = _m & mr_mask; > > // In unit tests we do not want to use extra large matrices, > // so we reduce the block size to check the blocking strategy is not flawed > #ifndef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS > // k = std::min<SizeType>(k,240); >@@ -1848,39 +1808,11 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Sca > count += 1; > } > if(PanelMode) count += stride-offset-depth; > } > } > > } // end namespace internal > >-/** \returns the currently set level 1 cpu cache size (in bytes) used to estimate the ideal blocking size parameters. >- * \sa setCpuCacheSize */ >-inline std::ptrdiff_t l1CacheSize() >-{ >- std::ptrdiff_t l1, l2; >- internal::manage_caching_sizes(GetAction, &l1, &l2); >- return l1; >-} >- >-/** \returns the currently set level 2 cpu cache size (in bytes) used to estimate the ideal blocking size parameters. >- * \sa setCpuCacheSize */ >-inline std::ptrdiff_t l2CacheSize() >-{ >- std::ptrdiff_t l1, l2; >- internal::manage_caching_sizes(GetAction, &l1, &l2); >- return l2; >-} >- >-/** Set the cpu L1 and L2 cache sizes (in bytes). >- * These values are use to adjust the size of the blocks >- * for the algorithms working per blocks. >- * >- * \sa computeProductBlockingSizes */ >-inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2) >-{ >- internal::manage_caching_sizes(SetAction, &l1, &l2); >-} >- > } // end namespace Eigen > > #endif // EIGEN_GENERAL_BLOCK_PANEL_H >diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h >--- a/Eigen/src/Core/products/Parallelizer.h >+++ b/Eigen/src/Core/products/Parallelizer.h >@@ -44,18 +44,17 @@ inline void manage_multi_threading(Actio > > } > > /** Must be call first when calling Eigen from multiple threads */ > inline void initParallel() > { > int nbt; > internal::manage_multi_threading(GetAction, &nbt); >- std::ptrdiff_t l1, l2; >- internal::manage_caching_sizes(GetAction, &l1, &l2); >+ CacheSizes::EnsureInitialized(); > } > > /** \returns the max number of threads reserved for Eigen > * \sa setNbThreads */ > inline int nbThreads() > { > int ret; > internal::manage_multi_threading(GetAction, &ret); >diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h >--- a/Eigen/src/Core/products/TriangularSolverMatrix.h >+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h >@@ -72,18 +72,17 @@ EIGEN_DONT_INLINE void triangular_solve_ > > conj_if<Conjugate> conj; > gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel; > gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs; > gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs; > > // the goal here is to subdivise the Rhs panels such that we keep some cache > // coherence when accessing the rhs elements >- std::ptrdiff_t l1, l2; >- manage_caching_sizes(GetAction, &l1, &l2); >+ std::ptrdiff_t l2 = CacheSizes::L2(); > Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * otherStride) : 0; > subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr); > > for(Index k2=IsLower ? 0 : size; > IsLower ? k2<size : k2>0; > IsLower ? k2+=kc : k2-=kc) > { > const Index actual_kc = (std::min)(IsLower ? size-k2 : k2, kc); >diff --git a/Eigen/src/Core/util/CacheSizes.h b/Eigen/src/Core/util/CacheSizes.h >new file mode 100644 >--- /dev/null >+++ b/Eigen/src/Core/util/CacheSizes.h >@@ -0,0 +1,133 @@ >+// This file is part of Eigen, a lightweight C++ template library >+// for linear algebra. >+// >+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> >+// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com> >+// >+// This Source Code Form is subject to the terms of the Mozilla >+// Public License v. 2.0. If a copy of the MPL was not distributed >+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. >+ >+#ifndef EIGEN_CACHE_SIZES_H >+#define EIGEN_CACHE_SIZES_H >+ >+namespace Eigen { >+ >+class CacheSizes >+{ >+private: >+ static const size_t MaxNumberOfLevels = 4; >+ size_t m_numberOfLevels; >+ size_t m_levels[MaxNumberOfLevels]; >+ >+ void clear() >+ { >+ m_numberOfLevels = 0; >+ for (size_t i = 0; i < MaxNumberOfLevels; i++) { >+ m_levels[i] = 0; >+ } >+ } >+ >+ void push_back(size_t size) >+ { >+ eigen_assert(m_numberOfLevels < MaxNumberOfLevels); >+ m_levels[m_numberOfLevels++] = size; >+ } >+ >+ CacheSizes() >+ { >+ clear(); >+ int l1, l2, l3; >+ internal::queryCacheSizes(l1, l2, l3); >+ if (l1 > 0) push_back(l1); >+ if (l2 > 0) push_back(l2); >+ if (l3 > 0) push_back(l3); >+ ensureAtLeastL1AndL2(); >+ assertValid(); >+ } >+ >+ void ensureAtLeastL1AndL2() >+ { >+ // We have code relying on there always existing L1 and L2 caches, >+ // which is indeed the case in practice in most hardware, so for now let's >+ // ensure that this assumption is always true, by adding reasonable >+ // default values for L1 and L2 if they are missing >+ if (m_numberOfLevels == 0) { >+ push_back(16 * kilobyte); // default L1 >+ } >+ if (m_numberOfLevels == 1) { >+ push_back(16 * m_levels[0]); // default L2 >+ } >+ } >+ >+ void assertValid() >+ { >+ // Must have at least two levels, and all cache sizes must be nonzero >+ // and in nondecreasing order >+ eigen_assert(m_numberOfLevels >= 2); >+ size_t prev = 0; >+ for (size_t i = 0; i != m_numberOfLevels; i++) { >+ eigen_assert(m_levels[i]); >+ eigen_assert(m_levels[i] >= prev); >+ prev = m_levels[i]; >+ } >+ EIGEN_ONLY_USED_FOR_DEBUG(prev); >+ } >+ >+ static CacheSizes& Singleton() >+ { >+ static CacheSizes uniqueInstance; >+ return uniqueInstance; >+ } >+ >+public: >+ >+ static void EnsureInitialized() >+ { >+ Singleton(); >+ } >+ >+ static void Set(size_t number_of_levels, const size_t* levels) >+ { >+ if (number_of_levels > MaxNumberOfLevels) { >+ number_of_levels = MaxNumberOfLevels; >+ } >+ Singleton().clear(); >+ for (size_t i = 0; i < number_of_levels; i++) { >+ eigen_assert(levels[i] > 0); >+ Singleton().push_back(levels[i]); >+ } >+ Singleton().ensureAtLeastL1AndL2(); >+ Singleton().assertValid(); >+ } >+ >+ static size_t NumberOfLevels() >+ { >+ return Singleton().m_numberOfLevels; >+ } >+ >+ static size_t Level(size_t index) >+ { >+ eigen_assert(index >= 1 && index <= NumberOfLevels()); >+ return Singleton().m_levels[index - 1]; >+ } >+ >+ static size_t L1() >+ { >+ return Singleton().m_levels[0]; >+ } >+ >+ static size_t L2() >+ { >+ return Singleton().m_levels[1]; >+ } >+ >+ static size_t TopLevel() >+ { >+ return Singleton().m_levels[Singleton().m_numberOfLevels - 1]; >+ } >+}; >+ >+} >+ >+#endif // EIGEN_CACHE_SIZES_H >diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h >--- a/Eigen/src/Core/util/Constants.h >+++ b/Eigen/src/Core/util/Constants.h >@@ -8,16 +8,19 @@ > // Public License v. 2.0. If a copy of the MPL was not distributed > // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. > > #ifndef EIGEN_CONSTANTS_H > #define EIGEN_CONSTANTS_H > > namespace Eigen { > >+const size_t megabyte = 1 << 20; >+const size_t kilobyte = 1 << 10; >+ > /** This value means that a positive quantity (e.g., a size) is not known at compile-time, and that instead the value is > * stored in some runtime variable. > * > * Changing the value of Dynamic breaks the ABI, as Dynamic is often used as a template parameter for Matrix. > */ > const int Dynamic = -1; > > /** This value means that a signed quantity (e.g., a signed index) is not known at compile-time, and that instead its value >diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h >--- a/Eigen/src/Core/util/Memory.h >+++ b/Eigen/src/Core/util/Memory.h >@@ -1007,31 +1007,13 @@ inline void queryCacheSizes(int& l1, int > // ||cpuid_is_vendor(abcd,"SiS SiS SiS ") > // ||cpuid_is_vendor(abcd,"UMC UMC UMC ") > // ||cpuid_is_vendor(abcd,"NexGenDriven") > #else > l1 = l2 = l3 = -1; > #endif > } > >-/** \internal >- * \returns the size in Bytes of the L1 data cache */ >-inline int queryL1CacheSize() >-{ >- int l1(-1), l2, l3; >- queryCacheSizes(l1,l2,l3); >- return l1; >-} >- >-/** \internal >- * \returns the size in Bytes of the L2 or L3 cache if this later is present */ >-inline int queryTopLevelCacheSize() >-{ >- int l1, l2(-1), l3(-1); >- queryCacheSizes(l1,l2,l3); >- return (std::max)(l2,l3); >-} >- > } // end namespace internal > > } // end namespace Eigen > > #endif // EIGEN_MEMORY_H >diff --git a/test/product_large.cpp b/test/product_large.cpp >--- a/test/product_large.cpp >+++ b/test/product_large.cpp >@@ -33,21 +33,24 @@ void test_product_large() > // test deferred resizing in Matrix::operator= > MatrixXf a = MatrixXf::Random(10,4), b = MatrixXf::Random(4,10), c = a; > VERIFY_IS_APPROX((a = a * b), (c * b).eval()); > } > > { > // check the functions to setup blocking sizes compile and do not segfault > // FIXME check they do what they are supposed to do !! >- std::ptrdiff_t l1 = internal::random<int>(10000,20000); >- std::ptrdiff_t l2 = internal::random<int>(1000000,2000000); >- setCpuCacheSizes(l1,l2); >- VERIFY(l1==l1CacheSize()); >- VERIFY(l2==l2CacheSize()); >+ size_t l1 = internal::random<int>(10000,20000); >+ size_t l2 = internal::random<int>(1000000,2000000); >+ size_t sizes[] = {l1, l2}; >+ CacheSizes::Set(2, sizes); >+ VERIFY(CacheSizes::NumberOfLevels() == 2); >+ VERIFY(CacheSizes::L1() == l1); >+ VERIFY(CacheSizes::L2() == l2); >+ > std::ptrdiff_t k1 = internal::random<int>(10,100)*16; > std::ptrdiff_t m1 = internal::random<int>(10,100)*16; > std::ptrdiff_t n1 = internal::random<int>(10,100)*16; > // only makes sure it compiles fine > internal::computeProductBlockingSizes<float,float>(k1,m1,n1); > } > > { >diff --git a/unsupported/Eigen/CXX11/src/MeasureCacheSizes/MeasureCacheSizes.h b/unsupported/Eigen/CXX11/src/MeasureCacheSizes/MeasureCacheSizes.h >--- a/unsupported/Eigen/CXX11/src/MeasureCacheSizes/MeasureCacheSizes.h >+++ b/unsupported/Eigen/CXX11/src/MeasureCacheSizes/MeasureCacheSizes.h >@@ -28,19 +28,16 @@ > namespace Eigen { > namespace internal { > > using std::size_t; > using std::ptrdiff_t; > using std::uint8_t; > using std::uint64_t; > >-const size_t megabyte = 1 << 20; >-const size_t kilobyte = 1 << 10; >- > // We must repeat memory accesses enough times to give > // the CPU a good chance to have our stuff in cache for > // most of the measurements, so that the effect of caches > // can be measured. Since our accesses are by large memcpy's, > // the number of times that each cache line is touched is > // roughly equal to the number of times that each byte is touched. > const int minimum_times_accessed_each_byte = 8; >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 931
: 517 |
518
|
519
|
520