Check the HTT bit before counting logical threads
Skip counting logical threads for Intel processors if the HTT bit is 0 which indicates there is only a single logical processor. * sysdeps/x86/cacheinfo.c (init_cacheinfo): Skip counting logical threads if the HTT bit is 0. * sysdeps/x86/cpu-features.h (bit_cpu_HTT): New. (index_cpu_HTT): Likewise. (reg_HTT): Likewise.
This commit is contained in:
parent
eb2c88c7c8
commit
7c08d791ee
|
@ -1,3 +1,11 @@
|
||||||
|
2016-05-19 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
|
* sysdeps/x86/cacheinfo.c (init_cacheinfo): Skip counting
|
||||||
|
logical threads if the HTT bit is 0.
|
||||||
|
* sysdeps/x86/cpu-features.h (bit_cpu_HTT): New.
|
||||||
|
(index_cpu_HTT): Likewise.
|
||||||
|
(reg_HTT): Likewise.
|
||||||
|
|
||||||
2016-05-19 H.J. Lu <hongjiu.lu@intel.com>
|
2016-05-19 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
[BZ #20115]
|
[BZ #20115]
|
||||||
|
|
|
@ -506,99 +506,105 @@ init_cacheinfo (void)
|
||||||
shared = core;
|
shared = core;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Figure out the number of logical threads that share the
|
/* A value of 0 for the HTT bit indicates there is only a single
|
||||||
highest cache level. */
|
logical processor. */
|
||||||
if (max_cpuid >= 4)
|
if (HAS_CPU_FEATURE (HTT))
|
||||||
{
|
{
|
||||||
unsigned int family = GLRO(dl_x86_cpu_features).family;
|
/* Figure out the number of logical threads that share the
|
||||||
unsigned int model = GLRO(dl_x86_cpu_features).model;
|
highest cache level. */
|
||||||
|
if (max_cpuid >= 4)
|
||||||
int i = 0;
|
|
||||||
|
|
||||||
/* Query until desired cache level is enumerated. */
|
|
||||||
do
|
|
||||||
{
|
{
|
||||||
__cpuid_count (4, i++, eax, ebx, ecx, edx);
|
unsigned int family = GLRO(dl_x86_cpu_features).family;
|
||||||
|
unsigned int model = GLRO(dl_x86_cpu_features).model;
|
||||||
|
|
||||||
/* There seems to be a bug in at least some Pentium Ds
|
int i = 0;
|
||||||
which sometimes fail to iterate all cache parameters.
|
|
||||||
Do not loop indefinitely here, stop in this case and
|
|
||||||
assume there is no such information. */
|
|
||||||
if ((eax & 0x1f) == 0)
|
|
||||||
goto intel_bug_no_cache_info;
|
|
||||||
}
|
|
||||||
while (((eax >> 5) & 0x7) != level);
|
|
||||||
|
|
||||||
/* Check if cache is inclusive of lower cache levels. */
|
/* Query until desired cache level is enumerated. */
|
||||||
inclusive_cache = (edx & 0x2) != 0;
|
do
|
||||||
|
|
||||||
threads = (eax >> 14) & 0x3ff;
|
|
||||||
|
|
||||||
/* If max_cpuid >= 11, THREADS is the maximum number of
|
|
||||||
addressable IDs for logical processors sharing the
|
|
||||||
cache, instead of the maximum number of threads
|
|
||||||
sharing the cache. */
|
|
||||||
if (threads && max_cpuid >= 11)
|
|
||||||
{
|
|
||||||
/* Find the number of logical processors shipped in
|
|
||||||
one core and apply count mask. */
|
|
||||||
i = 0;
|
|
||||||
while (1)
|
|
||||||
{
|
{
|
||||||
__cpuid_count (11, i++, eax, ebx, ecx, edx);
|
__cpuid_count (4, i++, eax, ebx, ecx, edx);
|
||||||
|
|
||||||
int shipped = ebx & 0xff;
|
/* There seems to be a bug in at least some Pentium Ds
|
||||||
int type = ecx & 0xff0;
|
which sometimes fail to iterate all cache parameters.
|
||||||
if (shipped == 0 || type == 0)
|
Do not loop indefinitely here, stop in this case and
|
||||||
break;
|
assume there is no such information. */
|
||||||
else if (type == 0x200)
|
if ((eax & 0x1f) == 0)
|
||||||
|
goto intel_bug_no_cache_info;
|
||||||
|
}
|
||||||
|
while (((eax >> 5) & 0x7) != level);
|
||||||
|
|
||||||
|
/* Check if cache is inclusive of lower cache levels. */
|
||||||
|
inclusive_cache = (edx & 0x2) != 0;
|
||||||
|
|
||||||
|
threads = (eax >> 14) & 0x3ff;
|
||||||
|
|
||||||
|
/* If max_cpuid >= 11, THREADS is the maximum number of
|
||||||
|
addressable IDs for logical processors sharing the
|
||||||
|
cache, instead of the maximum number of threads
|
||||||
|
sharing the cache. */
|
||||||
|
if (threads && max_cpuid >= 11)
|
||||||
|
{
|
||||||
|
/* Find the number of logical processors shipped in
|
||||||
|
one core and apply count mask. */
|
||||||
|
i = 0;
|
||||||
|
while (1)
|
||||||
{
|
{
|
||||||
int count_mask;
|
__cpuid_count (11, i++, eax, ebx, ecx, edx);
|
||||||
|
|
||||||
/* Compute count mask. */
|
int shipped = ebx & 0xff;
|
||||||
asm ("bsr %1, %0"
|
int type = ecx & 0xff0;
|
||||||
: "=r" (count_mask) : "g" (threads));
|
if (shipped == 0 || type == 0)
|
||||||
count_mask = ~(-1 << (count_mask + 1));
|
break;
|
||||||
threads = (shipped - 1) & count_mask;
|
else if (type == 0x200)
|
||||||
|
{
|
||||||
|
int count_mask;
|
||||||
|
|
||||||
|
/* Compute count mask. */
|
||||||
|
asm ("bsr %1, %0"
|
||||||
|
: "=r" (count_mask) : "g" (threads));
|
||||||
|
count_mask = ~(-1 << (count_mask + 1));
|
||||||
|
threads = (shipped - 1) & count_mask;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
threads += 1;
|
||||||
|
if (threads > 2 && level == 2 && family == 6)
|
||||||
|
{
|
||||||
|
switch (model)
|
||||||
|
{
|
||||||
|
case 0x57:
|
||||||
|
/* Knights Landing has L2 cache shared by 2 cores. */
|
||||||
|
case 0x37:
|
||||||
|
case 0x4a:
|
||||||
|
case 0x4d:
|
||||||
|
case 0x5a:
|
||||||
|
case 0x5d:
|
||||||
|
/* Silvermont has L2 cache shared by 2 cores. */
|
||||||
|
threads = 2;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
threads += 1;
|
else
|
||||||
if (threads > 2 && level == 2 && family == 6)
|
|
||||||
{
|
{
|
||||||
switch (model)
|
intel_bug_no_cache_info:
|
||||||
{
|
/* Assume that all logical threads share the highest cache
|
||||||
case 0x57:
|
level. */
|
||||||
/* Knights Landing has L2 cache shared by 2 cores. */
|
|
||||||
case 0x37:
|
threads
|
||||||
case 0x4a:
|
= ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
|
||||||
case 0x4d:
|
>> 16) & 0xff);
|
||||||
case 0x5a:
|
|
||||||
case 0x5d:
|
|
||||||
/* Silvermont has L2 cache shared by 2 cores. */
|
|
||||||
threads = 2;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
intel_bug_no_cache_info:
|
|
||||||
/* Assume that all logical threads share the highest cache level. */
|
|
||||||
|
|
||||||
threads
|
/* Cap usage of highest cache level to the number of supported
|
||||||
= ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
|
threads. */
|
||||||
>> 16) & 0xff);
|
if (shared > 0 && threads > 0)
|
||||||
|
shared /= threads;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Cap usage of highest cache level to the number of supported
|
|
||||||
threads. */
|
|
||||||
if (shared > 0 && threads > 0)
|
|
||||||
shared /= threads;
|
|
||||||
|
|
||||||
/* Account for non-inclusive L2 and L3 caches. */
|
/* Account for non-inclusive L2 and L3 caches. */
|
||||||
if (level == 3 && !inclusive_cache)
|
if (level == 3 && !inclusive_cache)
|
||||||
shared += core;
|
shared += core;
|
||||||
|
|
|
@ -51,6 +51,7 @@
|
||||||
#define bit_cpu_POPCOUNT (1 << 23)
|
#define bit_cpu_POPCOUNT (1 << 23)
|
||||||
#define bit_cpu_FMA (1 << 12)
|
#define bit_cpu_FMA (1 << 12)
|
||||||
#define bit_cpu_FMA4 (1 << 16)
|
#define bit_cpu_FMA4 (1 << 16)
|
||||||
|
#define bit_cpu_HTT (1 << 28)
|
||||||
|
|
||||||
/* COMMON_CPUID_INDEX_7. */
|
/* COMMON_CPUID_INDEX_7. */
|
||||||
#define bit_cpu_ERMS (1 << 9)
|
#define bit_cpu_ERMS (1 << 9)
|
||||||
|
@ -235,6 +236,7 @@ extern const struct cpu_features *__get_cpu_features (void)
|
||||||
# define index_cpu_FMA4 COMMON_CPUID_INDEX_80000001
|
# define index_cpu_FMA4 COMMON_CPUID_INDEX_80000001
|
||||||
# define index_cpu_POPCOUNT COMMON_CPUID_INDEX_1
|
# define index_cpu_POPCOUNT COMMON_CPUID_INDEX_1
|
||||||
# define index_cpu_OSXSAVE COMMON_CPUID_INDEX_1
|
# define index_cpu_OSXSAVE COMMON_CPUID_INDEX_1
|
||||||
|
# define index_cpu_HTT COMMON_CPUID_INDEX_1
|
||||||
|
|
||||||
# define reg_CX8 edx
|
# define reg_CX8 edx
|
||||||
# define reg_CMOV edx
|
# define reg_CMOV edx
|
||||||
|
@ -252,6 +254,7 @@ extern const struct cpu_features *__get_cpu_features (void)
|
||||||
# define reg_FMA4 ecx
|
# define reg_FMA4 ecx
|
||||||
# define reg_POPCOUNT ecx
|
# define reg_POPCOUNT ecx
|
||||||
# define reg_OSXSAVE ecx
|
# define reg_OSXSAVE ecx
|
||||||
|
# define reg_HTT edx
|
||||||
|
|
||||||
# define index_arch_Fast_Rep_String FEATURE_INDEX_1
|
# define index_arch_Fast_Rep_String FEATURE_INDEX_1
|
||||||
# define index_arch_Fast_Copy_Backward FEATURE_INDEX_1
|
# define index_arch_Fast_Copy_Backward FEATURE_INDEX_1
|
||||||
|
|
Loading…
Reference in New Issue