Check the HTT bit before counting logical threads

Skip counting logical threads for Intel processors if the HTT bit is 0
which indicates there is only a single logical processor.

	* sysdeps/x86/cacheinfo.c (init_cacheinfo): Skip counting
	logical threads if the HTT bit is 0.
	* sysdeps/x86/cpu-features.h (bit_cpu_HTT): New.
	(index_cpu_HTT): Likewise.
	(reg_HTT): Likewise.
This commit is contained in:
H.J. Lu 2016-05-19 09:09:00 -07:00
parent eb2c88c7c8
commit 7c08d791ee
3 changed files with 93 additions and 76 deletions

View File

@ -1,3 +1,11 @@
2016-05-19 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/x86/cacheinfo.c (init_cacheinfo): Skip counting
logical threads if the HTT bit is 0.
* sysdeps/x86/cpu-features.h (bit_cpu_HTT): New.
(index_cpu_HTT): Likewise.
(reg_HTT): Likewise.
2016-05-19 H.J. Lu <hongjiu.lu@intel.com>
[BZ #20115]

View File

@ -506,99 +506,105 @@ init_cacheinfo (void)
shared = core;
}
/* Figure out the number of logical threads that share the
highest cache level. */
if (max_cpuid >= 4)
/* A value of 0 for the HTT bit indicates there is only a single
logical processor. */
if (HAS_CPU_FEATURE (HTT))
{
unsigned int family = GLRO(dl_x86_cpu_features).family;
unsigned int model = GLRO(dl_x86_cpu_features).model;
int i = 0;
/* Query until desired cache level is enumerated. */
do
/* Figure out the number of logical threads that share the
highest cache level. */
if (max_cpuid >= 4)
{
__cpuid_count (4, i++, eax, ebx, ecx, edx);
unsigned int family = GLRO(dl_x86_cpu_features).family;
unsigned int model = GLRO(dl_x86_cpu_features).model;
/* There seems to be a bug in at least some Pentium Ds
which sometimes fail to iterate all cache parameters.
Do not loop indefinitely here, stop in this case and
assume there is no such information. */
if ((eax & 0x1f) == 0)
goto intel_bug_no_cache_info;
}
while (((eax >> 5) & 0x7) != level);
int i = 0;
/* Check if cache is inclusive of lower cache levels. */
inclusive_cache = (edx & 0x2) != 0;
threads = (eax >> 14) & 0x3ff;
/* If max_cpuid >= 11, THREADS is the maximum number of
addressable IDs for logical processors sharing the
cache, instead of the maximum number of threads
sharing the cache. */
if (threads && max_cpuid >= 11)
{
/* Find the number of logical processors shipped in
one core and apply count mask. */
i = 0;
while (1)
/* Query until desired cache level is enumerated. */
do
{
__cpuid_count (11, i++, eax, ebx, ecx, edx);
__cpuid_count (4, i++, eax, ebx, ecx, edx);
int shipped = ebx & 0xff;
int type = ecx & 0xff0;
if (shipped == 0 || type == 0)
break;
else if (type == 0x200)
/* There seems to be a bug in at least some Pentium Ds
which sometimes fail to iterate all cache parameters.
Do not loop indefinitely here, stop in this case and
assume there is no such information. */
if ((eax & 0x1f) == 0)
goto intel_bug_no_cache_info;
}
while (((eax >> 5) & 0x7) != level);
/* Check if cache is inclusive of lower cache levels. */
inclusive_cache = (edx & 0x2) != 0;
threads = (eax >> 14) & 0x3ff;
/* If max_cpuid >= 11, THREADS is the maximum number of
addressable IDs for logical processors sharing the
cache, instead of the maximum number of threads
sharing the cache. */
if (threads && max_cpuid >= 11)
{
/* Find the number of logical processors shipped in
one core and apply count mask. */
i = 0;
while (1)
{
int count_mask;
__cpuid_count (11, i++, eax, ebx, ecx, edx);
/* Compute count mask. */
asm ("bsr %1, %0"
: "=r" (count_mask) : "g" (threads));
count_mask = ~(-1 << (count_mask + 1));
threads = (shipped - 1) & count_mask;
int shipped = ebx & 0xff;
int type = ecx & 0xff0;
if (shipped == 0 || type == 0)
break;
else if (type == 0x200)
{
int count_mask;
/* Compute count mask. */
asm ("bsr %1, %0"
: "=r" (count_mask) : "g" (threads));
count_mask = ~(-1 << (count_mask + 1));
threads = (shipped - 1) & count_mask;
break;
}
}
}
threads += 1;
if (threads > 2 && level == 2 && family == 6)
{
switch (model)
{
case 0x57:
/* Knights Landing has L2 cache shared by 2 cores. */
case 0x37:
case 0x4a:
case 0x4d:
case 0x5a:
case 0x5d:
/* Silvermont has L2 cache shared by 2 cores. */
threads = 2;
break;
default:
break;
}
}
}
threads += 1;
if (threads > 2 && level == 2 && family == 6)
else
{
switch (model)
{
case 0x57:
/* Knights Landing has L2 cache shared by 2 cores. */
case 0x37:
case 0x4a:
case 0x4d:
case 0x5a:
case 0x5d:
/* Silvermont has L2 cache shared by 2 cores. */
threads = 2;
break;
default:
break;
}
intel_bug_no_cache_info:
/* Assume that all logical threads share the highest cache
level. */
threads
= ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
>> 16) & 0xff);
}
}
else
{
intel_bug_no_cache_info:
/* Assume that all logical threads share the highest cache level. */
threads
= ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
>> 16) & 0xff);
/* Cap usage of highest cache level to the number of supported
threads. */
if (shared > 0 && threads > 0)
shared /= threads;
}
/* Cap usage of highest cache level to the number of supported
threads. */
if (shared > 0 && threads > 0)
shared /= threads;
/* Account for non-inclusive L2 and L3 caches. */
if (level == 3 && !inclusive_cache)
shared += core;

View File

@ -51,6 +51,7 @@
#define bit_cpu_POPCOUNT (1 << 23)
#define bit_cpu_FMA (1 << 12)
#define bit_cpu_FMA4 (1 << 16)
#define bit_cpu_HTT (1 << 28)
/* COMMON_CPUID_INDEX_7. */
#define bit_cpu_ERMS (1 << 9)
@ -235,6 +236,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define index_cpu_FMA4 COMMON_CPUID_INDEX_80000001
# define index_cpu_POPCOUNT COMMON_CPUID_INDEX_1
# define index_cpu_OSXSAVE COMMON_CPUID_INDEX_1
# define index_cpu_HTT COMMON_CPUID_INDEX_1
# define reg_CX8 edx
# define reg_CMOV edx
@ -252,6 +254,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define reg_FMA4 ecx
# define reg_POPCOUNT ecx
# define reg_OSXSAVE ecx
# define reg_HTT edx
# define index_arch_Fast_Rep_String FEATURE_INDEX_1
# define index_arch_Fast_Copy_Backward FEATURE_INDEX_1