This bugzilla service is closed. All entries have been migrated to https://gitlab.com/libeigen/eigen
View | Details | Raw Unified | Return to bug 931 | Differences between
and this patch

Collapse All | Expand All

(-)a/Eigen/src/Core/util/CacheSizes.h (-1 / +205 lines)
Lines 1-23 Link Here
1
// This file is part of Eigen, a lightweight C++ template library
1
// This file is part of Eigen, a lightweight C++ template library
2
// for linear algebra.
2
// for linear algebra.
3
//
3
//
4
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
4
// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
5
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
5
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
6
//
6
//
7
// This Source Code Form is subject to the terms of the Mozilla
7
// This Source Code Form is subject to the terms of the Mozilla
8
// Public License v. 2.0. If a copy of the MPL was not distributed
8
// Public License v. 2.0. If a copy of the MPL was not distributed
9
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
10
11
#ifndef EIGEN_CACHE_SIZES_H
11
#ifndef EIGEN_CACHE_SIZES_H
12
#define EIGEN_CACHE_SIZES_H
12
#define EIGEN_CACHE_SIZES_H
13
13
14
namespace Eigen {
14
namespace Eigen {
15
15
16
#if !defined(EIGEN_NO_CPUID)
17
#  if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
18
#    if defined(__PIC__) && EIGEN_ARCH_i386
19
       // Case for x86 with PIC
20
#      define EIGEN_CPUID(abcd,func,id) \
21
         __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
22
#    elif defined(__PIC__) && EIGEN_ARCH_x86_64
23
       // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.
24
       // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.
25
#      define EIGEN_CPUID(abcd,func,id) \
26
        __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
27
#    else
28
       // Case for x86_64 or x86 w/o PIC
29
#      define EIGEN_CPUID(abcd,func,id) \
30
         __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
31
#    endif
32
#  elif EIGEN_COMP_MSVC
33
#    if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64
34
#      define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
35
#    endif
36
#  endif
37
#endif
38
39
namespace internal {
40
41
#ifdef EIGEN_CPUID
42
43
inline bool cpuid_is_vendor(int abcd[4], const int vendor[3])
44
{
45
  return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2];
46
}
47
48
inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
49
{
50
  int abcd[4];
51
  l1 = l2 = l3 = 0;
52
  int cache_id = 0;
53
  int cache_type = 0;
54
  do {
55
    abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
56
    EIGEN_CPUID(abcd,0x4,cache_id);
57
    cache_type  = (abcd[0] & 0x0F) >> 0;
58
    if(cache_type==1||cache_type==3) // data or unified cache
59
    {
60
      int cache_level = (abcd[0] & 0xE0) >> 5;  // A[7:5]
61
      int ways        = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
62
      int partitions  = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
63
      int line_size   = (abcd[1] & 0x00000FFF) >>  0; // B[11:0]
64
      int sets        = (abcd[2]);                    // C[31:0]
65
66
      int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
67
68
      switch(cache_level)
69
      {
70
        case 1: l1 = cache_size; break;
71
        case 2: l2 = cache_size; break;
72
        case 3: l3 = cache_size; break;
73
        default: break;
74
      }
75
    }
76
    cache_id++;
77
  } while(cache_type>0 && cache_id<16);
78
}
79
80
inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
81
{
82
  int abcd[4];
83
  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
84
  l1 = l2 = l3 = 0;
85
  EIGEN_CPUID(abcd,0x00000002,0);
86
  unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
87
  bool check_for_p2_core2 = false;
88
  for(int i=0; i<14; ++i)
89
  {
90
    switch(bytes[i])
91
    {
92
      case 0x0A: l1 = 8; break;   // 0Ah   data L1 cache, 8 KB, 2 ways, 32 byte lines
93
      case 0x0C: l1 = 16; break;  // 0Ch   data L1 cache, 16 KB, 4 ways, 32 byte lines
94
      case 0x0E: l1 = 24; break;  // 0Eh   data L1 cache, 24 KB, 6 ways, 64 byte lines
95
      case 0x10: l1 = 16; break;  // 10h   data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
96
      case 0x15: l1 = 16; break;  // 15h   code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
97
      case 0x2C: l1 = 32; break;  // 2Ch   data L1 cache, 32 KB, 8 ways, 64 byte lines
98
      case 0x30: l1 = 32; break;  // 30h   code L1 cache, 32 KB, 8 ways, 64 byte lines
99
      case 0x60: l1 = 16; break;  // 60h   data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
100
      case 0x66: l1 = 8; break;   // 66h   data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
101
      case 0x67: l1 = 16; break;  // 67h   data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
102
      case 0x68: l1 = 32; break;  // 68h   data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
103
      case 0x1A: l2 = 96; break;   // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
104
      case 0x22: l3 = 512; break;   // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
105
      case 0x23: l3 = 1024; break;   // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
106
      case 0x25: l3 = 2048; break;   // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
107
      case 0x29: l3 = 4096; break;   // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
108
      case 0x39: l2 = 128; break;   // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
109
      case 0x3A: l2 = 192; break;   // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
110
      case 0x3B: l2 = 128; break;   // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
111
      case 0x3C: l2 = 256; break;   // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
112
      case 0x3D: l2 = 384; break;   // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
113
      case 0x3E: l2 = 512; break;   // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
114
      case 0x40: l2 = 0; break;   // no integrated L2 cache (P6 core) or L3 cache (P4 core)
115
      case 0x41: l2 = 128; break;   // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
116
      case 0x42: l2 = 256; break;   // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
117
      case 0x43: l2 = 512; break;   // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
118
      case 0x44: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
119
      case 0x45: l2 = 2048; break;   // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
120
      case 0x46: l3 = 4096; break;   // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
121
      case 0x47: l3 = 8192; break;   // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
122
      case 0x48: l2 = 3072; break;   // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
123
      case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;// code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
124
      case 0x4A: l3 = 6144; break;   // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
125
      case 0x4B: l3 = 8192; break;   // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
126
      case 0x4C: l3 = 12288; break;   // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
127
      case 0x4D: l3 = 16384; break;   // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
128
      case 0x4E: l2 = 6144; break;   // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
129
      case 0x78: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
130
      case 0x79: l2 = 128; break;   // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
131
      case 0x7A: l2 = 256; break;   // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
132
      case 0x7B: l2 = 512; break;   // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
133
      case 0x7C: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
134
      case 0x7D: l2 = 2048; break;   // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
135
      case 0x7E: l2 = 256; break;   // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
136
      case 0x7F: l2 = 512; break;   // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
137
      case 0x80: l2 = 512; break;   // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
138
      case 0x81: l2 = 128; break;   // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
139
      case 0x82: l2 = 256; break;   // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
140
      case 0x83: l2 = 512; break;   // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
141
      case 0x84: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
142
      case 0x85: l2 = 2048; break;   // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
143
      case 0x86: l2 = 512; break;   // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
144
      case 0x87: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
145
      case 0x88: l3 = 2048; break;   // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
146
      case 0x89: l3 = 4096; break;   // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
147
      case 0x8A: l3 = 8192; break;   // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
148
      case 0x8D: l3 = 3072; break;   // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
149
150
      default: break;
151
    }
152
  }
153
  if(check_for_p2_core2 && l2 == l3)
154
    l3 = 0;
155
  l1 *= 1024;
156
  l2 *= 1024;
157
  l3 *= 1024;
158
}
159
160
inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
161
{
162
  if(max_std_funcs>=4)
163
    queryCacheSizes_intel_direct(l1,l2,l3);
164
  else
165
    queryCacheSizes_intel_codes(l1,l2,l3);
166
}
167
168
inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
169
{
170
  int abcd[4];
171
  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
172
  EIGEN_CPUID(abcd,0x80000005,0);
173
  l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
174
  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
175
  EIGEN_CPUID(abcd,0x80000006,0);
176
  l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
177
  l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
178
}
179
#endif
180
181
/** \internal
182
 * Queries and returns the cache sizes in Bytes of the L1, L2, and L3 data caches respectively */
183
inline void queryCacheSizes(int& l1, int& l2, int& l3)
184
{
185
  #ifdef EIGEN_CPUID
186
  int abcd[4];
187
  const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e};
188
  const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163};
189
  const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!"
190
191
  // identify the CPU vendor
192
  EIGEN_CPUID(abcd,0x0,0);
193
  int max_std_funcs = abcd[1];
194
  if(cpuid_is_vendor(abcd,GenuineIntel))
195
    queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
196
  else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
197
    queryCacheSizes_amd(l1,l2,l3);
198
  else
199
    // by default let's use Intel's API
200
    queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
201
202
  // here is the list of other vendors:
203
//   ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
204
//   ||cpuid_is_vendor(abcd,"CyrixInstead")
205
//   ||cpuid_is_vendor(abcd,"CentaurHauls")
206
//   ||cpuid_is_vendor(abcd,"GenuineTMx86")
207
//   ||cpuid_is_vendor(abcd,"TransmetaCPU")
208
//   ||cpuid_is_vendor(abcd,"RiseRiseRise")
209
//   ||cpuid_is_vendor(abcd,"Geode by NSC")
210
//   ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
211
//   ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
212
//   ||cpuid_is_vendor(abcd,"NexGenDriven")
213
  #else
214
  l1 = l2 = l3 = -1;
215
  #endif
216
}
217
218
} // end namespace internal
219
16
class CacheSizes
220
class CacheSizes
17
{
221
{
18
private:
222
private:
19
  static const size_t MaxNumberOfLevels = 4;
223
  static const size_t MaxNumberOfLevels = 4;
20
  size_t m_numberOfLevels;
224
  size_t m_numberOfLevels;
21
  size_t m_levels[MaxNumberOfLevels];
225
  size_t m_levels[MaxNumberOfLevels];
22
226
23
  void clear()
227
  void clear()
(-)a/Eigen/src/Core/util/Memory.h (-206 lines)
Lines 803-1019 public: Link Here
803
  }
803
  }
804
804
805
  void deallocate(pointer p, size_type /*num*/)
805
  void deallocate(pointer p, size_type /*num*/)
806
  {
806
  {
807
    internal::aligned_free(p);
807
    internal::aligned_free(p);
808
  }
808
  }
809
};
809
};
810
810
811
//---------- Cache sizes ----------
812
813
#if !defined(EIGEN_NO_CPUID)
814
#  if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
815
#    if defined(__PIC__) && EIGEN_ARCH_i386
816
       // Case for x86 with PIC
817
#      define EIGEN_CPUID(abcd,func,id) \
818
         __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
819
#    elif defined(__PIC__) && EIGEN_ARCH_x86_64
820
       // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.
821
       // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.
822
#      define EIGEN_CPUID(abcd,func,id) \
823
        __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
824
#    else
825
       // Case for x86_64 or x86 w/o PIC
826
#      define EIGEN_CPUID(abcd,func,id) \
827
         __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
828
#    endif
829
#  elif EIGEN_COMP_MSVC
830
#    if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64
831
#      define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
832
#    endif
833
#  endif
834
#endif
835
836
namespace internal {
837
838
#ifdef EIGEN_CPUID
839
840
inline bool cpuid_is_vendor(int abcd[4], const int vendor[3])
841
{
842
  return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2];
843
}
844
845
inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
846
{
847
  int abcd[4];
848
  l1 = l2 = l3 = 0;
849
  int cache_id = 0;
850
  int cache_type = 0;
851
  do {
852
    abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
853
    EIGEN_CPUID(abcd,0x4,cache_id);
854
    cache_type  = (abcd[0] & 0x0F) >> 0;
855
    if(cache_type==1||cache_type==3) // data or unified cache
856
    {
857
      int cache_level = (abcd[0] & 0xE0) >> 5;  // A[7:5]
858
      int ways        = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
859
      int partitions  = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
860
      int line_size   = (abcd[1] & 0x00000FFF) >>  0; // B[11:0]
861
      int sets        = (abcd[2]);                    // C[31:0]
862
863
      int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
864
865
      switch(cache_level)
866
      {
867
        case 1: l1 = cache_size; break;
868
        case 2: l2 = cache_size; break;
869
        case 3: l3 = cache_size; break;
870
        default: break;
871
      }
872
    }
873
    cache_id++;
874
  } while(cache_type>0 && cache_id<16);
875
}
876
877
inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
878
{
879
  int abcd[4];
880
  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
881
  l1 = l2 = l3 = 0;
882
  EIGEN_CPUID(abcd,0x00000002,0);
883
  unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
884
  bool check_for_p2_core2 = false;
885
  for(int i=0; i<14; ++i)
886
  {
887
    switch(bytes[i])
888
    {
889
      case 0x0A: l1 = 8; break;   // 0Ah   data L1 cache, 8 KB, 2 ways, 32 byte lines
890
      case 0x0C: l1 = 16; break;  // 0Ch   data L1 cache, 16 KB, 4 ways, 32 byte lines
891
      case 0x0E: l1 = 24; break;  // 0Eh   data L1 cache, 24 KB, 6 ways, 64 byte lines
892
      case 0x10: l1 = 16; break;  // 10h   data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
893
      case 0x15: l1 = 16; break;  // 15h   code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
894
      case 0x2C: l1 = 32; break;  // 2Ch   data L1 cache, 32 KB, 8 ways, 64 byte lines
895
      case 0x30: l1 = 32; break;  // 30h   code L1 cache, 32 KB, 8 ways, 64 byte lines
896
      case 0x60: l1 = 16; break;  // 60h   data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
897
      case 0x66: l1 = 8; break;   // 66h   data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
898
      case 0x67: l1 = 16; break;  // 67h   data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
899
      case 0x68: l1 = 32; break;  // 68h   data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
900
      case 0x1A: l2 = 96; break;   // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
901
      case 0x22: l3 = 512; break;   // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
902
      case 0x23: l3 = 1024; break;   // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
903
      case 0x25: l3 = 2048; break;   // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
904
      case 0x29: l3 = 4096; break;   // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
905
      case 0x39: l2 = 128; break;   // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
906
      case 0x3A: l2 = 192; break;   // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
907
      case 0x3B: l2 = 128; break;   // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
908
      case 0x3C: l2 = 256; break;   // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
909
      case 0x3D: l2 = 384; break;   // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
910
      case 0x3E: l2 = 512; break;   // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
911
      case 0x40: l2 = 0; break;   // no integrated L2 cache (P6 core) or L3 cache (P4 core)
912
      case 0x41: l2 = 128; break;   // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
913
      case 0x42: l2 = 256; break;   // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
914
      case 0x43: l2 = 512; break;   // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
915
      case 0x44: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
916
      case 0x45: l2 = 2048; break;   // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
917
      case 0x46: l3 = 4096; break;   // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
918
      case 0x47: l3 = 8192; break;   // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
919
      case 0x48: l2 = 3072; break;   // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
920
      case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;// code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
921
      case 0x4A: l3 = 6144; break;   // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
922
      case 0x4B: l3 = 8192; break;   // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
923
      case 0x4C: l3 = 12288; break;   // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
924
      case 0x4D: l3 = 16384; break;   // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
925
      case 0x4E: l2 = 6144; break;   // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
926
      case 0x78: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
927
      case 0x79: l2 = 128; break;   // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
928
      case 0x7A: l2 = 256; break;   // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
929
      case 0x7B: l2 = 512; break;   // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
930
      case 0x7C: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
931
      case 0x7D: l2 = 2048; break;   // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
932
      case 0x7E: l2 = 256; break;   // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
933
      case 0x7F: l2 = 512; break;   // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
934
      case 0x80: l2 = 512; break;   // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
935
      case 0x81: l2 = 128; break;   // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
936
      case 0x82: l2 = 256; break;   // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
937
      case 0x83: l2 = 512; break;   // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
938
      case 0x84: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
939
      case 0x85: l2 = 2048; break;   // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
940
      case 0x86: l2 = 512; break;   // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
941
      case 0x87: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
942
      case 0x88: l3 = 2048; break;   // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
943
      case 0x89: l3 = 4096; break;   // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
944
      case 0x8A: l3 = 8192; break;   // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
945
      case 0x8D: l3 = 3072; break;   // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
946
947
      default: break;
948
    }
949
  }
950
  if(check_for_p2_core2 && l2 == l3)
951
    l3 = 0;
952
  l1 *= 1024;
953
  l2 *= 1024;
954
  l3 *= 1024;
955
}
956
957
inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
958
{
959
  if(max_std_funcs>=4)
960
    queryCacheSizes_intel_direct(l1,l2,l3);
961
  else
962
    queryCacheSizes_intel_codes(l1,l2,l3);
963
}
964
965
inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
966
{
967
  int abcd[4];
968
  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
969
  EIGEN_CPUID(abcd,0x80000005,0);
970
  l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
971
  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
972
  EIGEN_CPUID(abcd,0x80000006,0);
973
  l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
974
  l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
975
}
976
#endif
977
978
/** \internal
979
 * Queries and returns the cache sizes in Bytes of the L1, L2, and L3 data caches respectively */
980
inline void queryCacheSizes(int& l1, int& l2, int& l3)
981
{
982
  #ifdef EIGEN_CPUID
983
  int abcd[4];
984
  const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e};
985
  const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163};
986
  const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!"
987
988
  // identify the CPU vendor
989
  EIGEN_CPUID(abcd,0x0,0);
990
  int max_std_funcs = abcd[1];
991
  if(cpuid_is_vendor(abcd,GenuineIntel))
992
    queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
993
  else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
994
    queryCacheSizes_amd(l1,l2,l3);
995
  else
996
    // by default let's use Intel's API
997
    queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
998
999
  // here is the list of other vendors:
1000
//   ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
1001
//   ||cpuid_is_vendor(abcd,"CyrixInstead")
1002
//   ||cpuid_is_vendor(abcd,"CentaurHauls")
1003
//   ||cpuid_is_vendor(abcd,"GenuineTMx86")
1004
//   ||cpuid_is_vendor(abcd,"TransmetaCPU")
1005
//   ||cpuid_is_vendor(abcd,"RiseRiseRise")
1006
//   ||cpuid_is_vendor(abcd,"Geode by NSC")
1007
//   ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
1008
//   ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
1009
//   ||cpuid_is_vendor(abcd,"NexGenDriven")
1010
  #else
1011
  l1 = l2 = l3 = -1;
1012
  #endif
1013
}
1014
1015
} // end namespace internal
1016
1017
} // end namespace Eigen
811
} // end namespace Eigen
1018
812
1019
#endif // EIGEN_MEMORY_H
813
#endif // EIGEN_MEMORY_H

Return to bug 931