Check the HTT bit before counting logical threads

Skip counting logical threads for Intel processors if the HTT bit is 0
which indicates there is only a single logical processor.

	* sysdeps/x86/cacheinfo.c (init_cacheinfo): Skip counting
	logical threads if the HTT bit is 0.
	* sysdeps/x86/cpu-features.h (bit_cpu_HTT): New.
	(index_cpu_HTT): Likewise.
	(reg_HTT): Likewise.
This commit is contained in:
H.J. Lu 2016-05-19 09:09:00 -07:00
parent eb2c88c7c8
commit 7c08d791ee
3 changed files with 93 additions and 76 deletions

View File

@ -1,3 +1,11 @@
2016-05-19 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/x86/cacheinfo.c (init_cacheinfo): Skip counting
logical threads if the HTT bit is 0.
* sysdeps/x86/cpu-features.h (bit_cpu_HTT): New.
(index_cpu_HTT): Likewise.
(reg_HTT): Likewise.
2016-05-19 H.J. Lu <hongjiu.lu@intel.com> 2016-05-19 H.J. Lu <hongjiu.lu@intel.com>
[BZ #20115] [BZ #20115]

View File

@ -506,99 +506,105 @@ init_cacheinfo (void)
shared = core; shared = core;
} }
/* Figure out the number of logical threads that share the /* A value of 0 for the HTT bit indicates there is only a single
highest cache level. */ logical processor. */
if (max_cpuid >= 4) if (HAS_CPU_FEATURE (HTT))
{ {
unsigned int family = GLRO(dl_x86_cpu_features).family; /* Figure out the number of logical threads that share the
unsigned int model = GLRO(dl_x86_cpu_features).model; highest cache level. */
if (max_cpuid >= 4)
int i = 0;
/* Query until desired cache level is enumerated. */
do
{ {
__cpuid_count (4, i++, eax, ebx, ecx, edx); unsigned int family = GLRO(dl_x86_cpu_features).family;
unsigned int model = GLRO(dl_x86_cpu_features).model;
/* There seems to be a bug in at least some Pentium Ds int i = 0;
which sometimes fail to iterate all cache parameters.
Do not loop indefinitely here, stop in this case and
assume there is no such information. */
if ((eax & 0x1f) == 0)
goto intel_bug_no_cache_info;
}
while (((eax >> 5) & 0x7) != level);
/* Check if cache is inclusive of lower cache levels. */ /* Query until desired cache level is enumerated. */
inclusive_cache = (edx & 0x2) != 0; do
threads = (eax >> 14) & 0x3ff;
/* If max_cpuid >= 11, THREADS is the maximum number of
addressable IDs for logical processors sharing the
cache, instead of the maximum number of threads
sharing the cache. */
if (threads && max_cpuid >= 11)
{
/* Find the number of logical processors shipped in
one core and apply count mask. */
i = 0;
while (1)
{ {
__cpuid_count (11, i++, eax, ebx, ecx, edx); __cpuid_count (4, i++, eax, ebx, ecx, edx);
int shipped = ebx & 0xff; /* There seems to be a bug in at least some Pentium Ds
int type = ecx & 0xff0; which sometimes fail to iterate all cache parameters.
if (shipped == 0 || type == 0) Do not loop indefinitely here, stop in this case and
break; assume there is no such information. */
else if (type == 0x200) if ((eax & 0x1f) == 0)
goto intel_bug_no_cache_info;
}
while (((eax >> 5) & 0x7) != level);
/* Check if cache is inclusive of lower cache levels. */
inclusive_cache = (edx & 0x2) != 0;
threads = (eax >> 14) & 0x3ff;
/* If max_cpuid >= 11, THREADS is the maximum number of
addressable IDs for logical processors sharing the
cache, instead of the maximum number of threads
sharing the cache. */
if (threads && max_cpuid >= 11)
{
/* Find the number of logical processors shipped in
one core and apply count mask. */
i = 0;
while (1)
{ {
int count_mask; __cpuid_count (11, i++, eax, ebx, ecx, edx);
/* Compute count mask. */ int shipped = ebx & 0xff;
asm ("bsr %1, %0" int type = ecx & 0xff0;
: "=r" (count_mask) : "g" (threads)); if (shipped == 0 || type == 0)
count_mask = ~(-1 << (count_mask + 1)); break;
threads = (shipped - 1) & count_mask; else if (type == 0x200)
{
int count_mask;
/* Compute count mask. */
asm ("bsr %1, %0"
: "=r" (count_mask) : "g" (threads));
count_mask = ~(-1 << (count_mask + 1));
threads = (shipped - 1) & count_mask;
break;
}
}
}
threads += 1;
if (threads > 2 && level == 2 && family == 6)
{
switch (model)
{
case 0x57:
/* Knights Landing has L2 cache shared by 2 cores. */
case 0x37:
case 0x4a:
case 0x4d:
case 0x5a:
case 0x5d:
/* Silvermont has L2 cache shared by 2 cores. */
threads = 2;
break;
default:
break; break;
} }
} }
} }
threads += 1; else
if (threads > 2 && level == 2 && family == 6)
{ {
switch (model) intel_bug_no_cache_info:
{ /* Assume that all logical threads share the highest cache
case 0x57: level. */
/* Knights Landing has L2 cache shared by 2 cores. */
case 0x37: threads
case 0x4a: = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
case 0x4d: >> 16) & 0xff);
case 0x5a:
case 0x5d:
/* Silvermont has L2 cache shared by 2 cores. */
threads = 2;
break;
default:
break;
}
} }
}
else
{
intel_bug_no_cache_info:
/* Assume that all logical threads share the highest cache level. */
threads /* Cap usage of highest cache level to the number of supported
= ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx threads. */
>> 16) & 0xff); if (shared > 0 && threads > 0)
shared /= threads;
} }
/* Cap usage of highest cache level to the number of supported
threads. */
if (shared > 0 && threads > 0)
shared /= threads;
/* Account for non-inclusive L2 and L3 caches. */ /* Account for non-inclusive L2 and L3 caches. */
if (level == 3 && !inclusive_cache) if (level == 3 && !inclusive_cache)
shared += core; shared += core;

View File

@ -51,6 +51,7 @@
#define bit_cpu_POPCOUNT (1 << 23) #define bit_cpu_POPCOUNT (1 << 23)
#define bit_cpu_FMA (1 << 12) #define bit_cpu_FMA (1 << 12)
#define bit_cpu_FMA4 (1 << 16) #define bit_cpu_FMA4 (1 << 16)
#define bit_cpu_HTT (1 << 28)
/* COMMON_CPUID_INDEX_7. */ /* COMMON_CPUID_INDEX_7. */
#define bit_cpu_ERMS (1 << 9) #define bit_cpu_ERMS (1 << 9)
@ -235,6 +236,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define index_cpu_FMA4 COMMON_CPUID_INDEX_80000001 # define index_cpu_FMA4 COMMON_CPUID_INDEX_80000001
# define index_cpu_POPCOUNT COMMON_CPUID_INDEX_1 # define index_cpu_POPCOUNT COMMON_CPUID_INDEX_1
# define index_cpu_OSXSAVE COMMON_CPUID_INDEX_1 # define index_cpu_OSXSAVE COMMON_CPUID_INDEX_1
# define index_cpu_HTT COMMON_CPUID_INDEX_1
# define reg_CX8 edx # define reg_CX8 edx
# define reg_CMOV edx # define reg_CMOV edx
@ -252,6 +254,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define reg_FMA4 ecx # define reg_FMA4 ecx
# define reg_POPCOUNT ecx # define reg_POPCOUNT ecx
# define reg_OSXSAVE ecx # define reg_OSXSAVE ecx
# define reg_HTT edx
# define index_arch_Fast_Rep_String FEATURE_INDEX_1 # define index_arch_Fast_Rep_String FEATURE_INDEX_1
# define index_arch_Fast_Copy_Backward FEATURE_INDEX_1 # define index_arch_Fast_Copy_Backward FEATURE_INDEX_1