32bit memset-sse2.S fails with uneven cache size
32bit memset-sse2.S assumes cache size is multiple of 128 bytes. If it isn't true, memset-sse2.S will fail. For example, a processor can have 24576 KB L3 cache and 20 cores. That is 2516582 byte per core. Half of it is 1258291, which isn't helpful for vector instructions. This patch rounds cache sizes to multiple of 256 bytes and adds "raw" cache sizes.
This commit is contained in:
parent
0e516e0e14
commit
c0dde15b5d
19
ChangeLog
19
ChangeLog
@ -1,3 +1,22 @@
|
||||
2010-11-03 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
[BZ #12191]
|
||||
* sysdeps/i386/i686/cacheinfo.c (__x86_64_raw_data_cache_size): New.
|
||||
(__x86_64_raw_data_cache_size_half): Likewise.
|
||||
(__x86_64_raw_shared_cache_size): Likewise.
|
||||
(__x86_64_raw_shared_cache_size_half): Likewise.
|
||||
|
||||
* sysdeps/x86_64/cacheinfo.c (__x86_64_raw_data_cache_size): New.
|
||||
(__x86_64_raw_data_cache_size_half): Likewise.
|
||||
(__x86_64_raw_shared_cache_size): Likewise.
|
||||
(__x86_64_raw_shared_cache_size_half): Likewise.
|
||||
(init_cacheinfo): Set __x86_64_raw_data_cache_size,
|
||||
__x86_64_raw_data_cache_size_half, __x86_64_raw_shared_cache_size
|
||||
and __x86_64_raw_shared_cache_size_half. Round
|
||||
__x86_64_data_cache_size_half, __x86_64_data_cache_size
|
||||
__x86_64_shared_cache_size_half and __x86_64_shared_cache_size,
|
||||
to multiple of 256 bytes.
|
||||
|
||||
2010-11-03 Ulrich Drepper <drepper@gmail.com>
|
||||
|
||||
[BZ #12167]
|
||||
|
4
NEWS
4
NEWS
@ -1,4 +1,4 @@
|
||||
GNU C Library NEWS -- history of user-visible changes. 2010-11-2
|
||||
GNU C Library NEWS -- history of user-visible changes. 2010-11-5
|
||||
Copyright (C) 1992-2009, 2010 Free Software Foundation, Inc.
|
||||
See the end for copying conditions.
|
||||
|
||||
@ -11,7 +11,7 @@ Version 2.13
|
||||
|
||||
3268, 7066, 10851, 11611, 11640, 11701, 11840, 11856, 11883, 11903, 11904,
|
||||
11968, 11979, 12005, 12037, 12067, 12077, 12078, 12092, 12093, 12107, 12108,
|
||||
12113, 12140, 12159, 12167
|
||||
12113, 12140, 12159, 12167, 12191
|
||||
|
||||
* New Linux interfaces: prlimit, prlimit64, fanotify_init, fanotify_mark
|
||||
|
||||
|
@ -1,7 +1,11 @@
|
||||
#define __x86_64_data_cache_size __x86_data_cache_size
|
||||
#define __x86_64_raw_data_cache_size __x86_raw_data_cache_size
|
||||
#define __x86_64_data_cache_size_half __x86_data_cache_size_half
|
||||
#define __x86_64_raw_data_cache_size_half __x86_raw_data_cache_size_half
|
||||
#define __x86_64_shared_cache_size __x86_shared_cache_size
|
||||
#define __x86_64_raw_shared_cache_size __x86_raw_shared_cache_size
|
||||
#define __x86_64_shared_cache_size_half __x86_shared_cache_size_half
|
||||
#define __x86_64_raw_shared_cache_size_half __x86_raw_shared_cache_size_half
|
||||
|
||||
#define DISABLE_PREFETCHW
|
||||
#define DISABLE_PREFERRED_MEMORY_INSTRUCTION
|
||||
|
@ -455,13 +455,21 @@ __cache_sysconf (int name)
|
||||
|
||||
|
||||
/* Data cache size for use in memory and string routines, typically
|
||||
L1 size. */
|
||||
L1 size, rounded to multiple of 256 bytes. */
|
||||
long int __x86_64_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
|
||||
long int __x86_64_data_cache_size attribute_hidden = 32 * 1024;
|
||||
/* Similar to __x86_64_data_cache_size_half, but not rounded. */
|
||||
long int __x86_64_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
|
||||
/* Similar to __x86_64_data_cache_size, but not rounded. */
|
||||
long int __x86_64_raw_data_cache_size attribute_hidden = 32 * 1024;
|
||||
/* Shared cache size for use in memory and string routines, typically
|
||||
L2 or L3 size. */
|
||||
L2 or L3 size, rounded to multiple of 256 bytes. */
|
||||
long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
|
||||
long int __x86_64_shared_cache_size attribute_hidden = 1024 * 1024;
|
||||
/* Similar to __x86_64_shared_cache_size_half, but not rounded. */
|
||||
long int __x86_64_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
|
||||
/* Similar to __x86_64_shared_cache_size, but not rounded. */
|
||||
long int __x86_64_raw_shared_cache_size attribute_hidden = 1024 * 1024;
|
||||
|
||||
#ifndef DISABLE_PREFETCHW
|
||||
/* PREFETCHW support flag for use in memory and string routines. */
|
||||
@ -661,12 +669,20 @@ init_cacheinfo (void)
|
||||
|
||||
if (data > 0)
|
||||
{
|
||||
__x86_64_raw_data_cache_size_half = data / 2;
|
||||
__x86_64_raw_data_cache_size = data;
|
||||
/* Round data cache size to multiple of 256 bytes. */
|
||||
data = data & ~255L;
|
||||
__x86_64_data_cache_size_half = data / 2;
|
||||
__x86_64_data_cache_size = data;
|
||||
}
|
||||
|
||||
if (shared > 0)
|
||||
{
|
||||
__x86_64_raw_shared_cache_size_half = shared / 2;
|
||||
__x86_64_raw_shared_cache_size = shared;
|
||||
/* Round shared cache size to multiple of 256 bytes. */
|
||||
shared = shared & ~255L;
|
||||
__x86_64_shared_cache_size_half = shared / 2;
|
||||
__x86_64_shared_cache_size = shared;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user