Properly count number of logical processors on Intel CPUs.
The meaning of the 25-14 bits in EAX returned from cpuid with EAX = 4 has been changed from "the maximum number of threads sharing the cache" to "the maximum number of addressable IDs for logical processors sharing the cache" if cpuid takes EAX = 11. We need to use results from both EAX = 4 and EAX = 11 to get the number of threads sharing the cache. The 25-14 bits in EAX on Core i7 is 15 although the number of logical processors is 8. Here is a white paper on this: http://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration/ This patch correctly counts number of logical processors on Intel CPUs with EAX = 11 support on cpuid. Tested on Dinnington, Core i7 and Nehalem EX/EP. It also fixed Pentium Ds workaround since EBX may not have the right value returned from cpuid with EAX = 1.
This commit is contained in:
parent
77c84aeb81
commit
a546baa9cd
|
@ -1,3 +1,8 @@
|
||||||
|
2009-08-05 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
|
* sysdeps/x86_64/cacheinfo.c (init_cacheinfo): Properly use
|
||||||
|
EBX from EAX = 1. Handle EAX = 11.
|
||||||
|
|
||||||
2009-08-07 Andreas Schwab <schwab@redhat.com>
|
2009-08-07 Andreas Schwab <schwab@redhat.com>
|
||||||
|
|
||||||
* Makefile (TAGS): Use separate sed -e expressions to avoid \
|
* Makefile (TAGS): Use separate sed -e expressions to avoid \
|
||||||
|
|
|
@ -516,13 +516,15 @@ init_cacheinfo (void)
|
||||||
shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
|
shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned int ebx_1;
|
||||||
|
|
||||||
#ifdef USE_MULTIARCH
|
#ifdef USE_MULTIARCH
|
||||||
eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
|
eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
|
||||||
ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
|
ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
|
||||||
ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
|
ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
|
||||||
edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
|
edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
|
||||||
#else
|
#else
|
||||||
__cpuid (1, eax, ebx, ecx, edx);
|
__cpuid (1, eax, ebx_1, ecx, edx);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
|
#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
|
||||||
|
@ -554,14 +556,46 @@ init_cacheinfo (void)
|
||||||
}
|
}
|
||||||
while (((eax >> 5) & 0x7) != level);
|
while (((eax >> 5) & 0x7) != level);
|
||||||
|
|
||||||
threads = ((eax >> 14) & 0x3ff) + 1;
|
threads = (eax >> 14) & 0x3ff;
|
||||||
|
|
||||||
|
/* If max_cpuid >= 11, THREADS is the maximum number of
|
||||||
|
addressable IDs for logical processors sharing the
|
||||||
|
cache, instead of the maximum number of threads
|
||||||
|
sharing the cache. */
|
||||||
|
if (threads && max_cpuid >= 11)
|
||||||
|
{
|
||||||
|
/* Find the number of logical processors shipped in
|
||||||
|
one core and apply count mask. */
|
||||||
|
i = 0;
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
__cpuid_count (11, i++, eax, ebx, ecx, edx);
|
||||||
|
|
||||||
|
int shipped = ebx & 0xff;
|
||||||
|
int type = ecx & 0xff0;
|
||||||
|
if (shipped == 0 || type == 0)
|
||||||
|
break;
|
||||||
|
else if (type == 0x200)
|
||||||
|
{
|
||||||
|
int count_mask;
|
||||||
|
|
||||||
|
/* Compute count mask. */
|
||||||
|
asm ("bsr %1, %0"
|
||||||
|
: "=r" (count_mask) : "g" (threads));
|
||||||
|
count_mask = ~(-1 << (count_mask + 1));
|
||||||
|
threads = (shipped - 1) & count_mask;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
threads += 1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
intel_bug_no_cache_info:
|
intel_bug_no_cache_info:
|
||||||
/* Assume that all logical threads share the highest cache level. */
|
/* Assume that all logical threads share the highest cache level. */
|
||||||
|
|
||||||
threads = (ebx >> 16) & 0xff;
|
threads = (ebx_1 >> 16) & 0xff;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Cap usage of highest cache level to the number of supported
|
/* Cap usage of highest cache level to the number of supported
|
||||||
|
|
Loading…
Reference in New Issue