invoke.texi (l1-cache-size): Update documentation.
* doc/invoke.texi (l1-cache-size): Update documentation. (l2-cache-size): Document. * params.h (L2_CACHE_SIZE): New macro. * tree-ssa-loop-prefetch.c (L1_CACHE_SIZE_BYTES): Reflect that L1_CACHE_SIZE is in kB now. (L2_CACHE_SIZE_BYTES): New macro. (tree_ssa_prefetch_arrays): Show size in kB. * config/i386/i386.h (struct processor_costs): Add l1_cache_size and l2_cache_size fields. * config/i386/driver-i386.c (describe_cache): Detect cache size in kB. * config/i386/i386.c (size_cost, i386_cost, i486_cost,pentium_cost, pentiumpro_cost, geode_cost, k6_cost, athlon_cost, k8_cost, amdfam10_cost, pentium4_cost, nocona_cost, core2_cost, generic64_cost, generic32_cost): Add l1_cache_size and l2_cache_size. (override_options): Set l1-cache-size and l2-cache-size to default values if not specified otherwise. * params.def (PARAM_L1_CACHE_SIZE): Change to set in kB. (PARAM_L2_CACHE_SIZE): New. From-SVN: r127117
This commit is contained in:
parent
0ea339ea4d
commit
46cb04410d
@ -1,3 +1,25 @@
|
||||
2007-08-01 Zdenek Dvorak <ook@ucw.cz>
|
||||
|
||||
* doc/invoke.texi (l1-cache-size): Update documentation.
|
||||
(l2-cache-size): Document.
|
||||
* params.h (L2_CACHE_SIZE): New macro.
|
||||
* tree-ssa-loop-prefetch.c (L1_CACHE_SIZE_BYTES): Reflect
|
||||
that L1_CACHE_SIZE is in kB now.
|
||||
(L2_CACHE_SIZE_BYTES): New macro.
|
||||
(tree_ssa_prefetch_arrays): Show size in kB.
|
||||
* config/i386/i386.h (struct processor_costs): Add l1_cache_size
|
||||
and l2_cache_size fields.
|
||||
* config/i386/driver-i386.c (describe_cache): Detect cache size
|
||||
in kB.
|
||||
* config/i386/i386.c (size_cost, i386_cost, i486_cost,pentium_cost,
|
||||
pentiumpro_cost, geode_cost, k6_cost, athlon_cost, k8_cost,
|
||||
amdfam10_cost, pentium4_cost, nocona_cost, core2_cost,
|
||||
generic64_cost, generic32_cost): Add l1_cache_size and l2_cache_size.
|
||||
(override_options): Set l1-cache-size and l2-cache-size to default
|
||||
values if not specified otherwise.
|
||||
* params.def (PARAM_L1_CACHE_SIZE): Change to set in kB.
|
||||
(PARAM_L2_CACHE_SIZE): New.
|
||||
|
||||
2007-08-01 Nigel Stephens <nigel@mips.com>
|
||||
David Ung <davidu@mips.com>
|
||||
Thiemo Seufer <ths@mips.com>
|
||||
|
@ -56,14 +56,11 @@ describe_cache (unsigned l1_sizekb, unsigned l1_line,
|
||||
unsigned l1_assoc ATTRIBUTE_UNUSED)
|
||||
{
|
||||
char size[1000], line[1000];
|
||||
unsigned size_in_lines;
|
||||
|
||||
/* At the moment, gcc middle-end does not use the information about the
|
||||
associativity of the cache. */
|
||||
|
||||
size_in_lines = (l1_sizekb * 1024) / l1_line;
|
||||
|
||||
sprintf (size, "--param l1-cache-size=%u", size_in_lines);
|
||||
sprintf (size, "--param l1-cache-size=%u", l1_sizekb);
|
||||
sprintf (line, "--param l1-cache-line-size=%u", l1_line);
|
||||
|
||||
return concat (size, " ", line, " ", NULL);
|
||||
|
@ -113,6 +113,8 @@ struct processor_costs size_cost = { /* costs for tuning for size */
|
||||
{3, 3, 3}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
3, /* MMX or SSE register to integer */
|
||||
0, /* size of l1 cache */
|
||||
0, /* size of l2 cache */
|
||||
0, /* size of prefetch block */
|
||||
0, /* number of parallel prefetches */
|
||||
2, /* Branch cost */
|
||||
@ -171,6 +173,8 @@ struct processor_costs i386_cost = { /* 386 specific costs */
|
||||
{4, 8, 16}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
3, /* MMX or SSE register to integer */
|
||||
0, /* size of l1 cache */
|
||||
0, /* size of l2 cache */
|
||||
0, /* size of prefetch block */
|
||||
0, /* number of parallel prefetches */
|
||||
1, /* Branch cost */
|
||||
@ -228,6 +232,10 @@ struct processor_costs i486_cost = { /* 486 specific costs */
|
||||
{4, 8, 16}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
3, /* MMX or SSE register to integer */
|
||||
4, /* size of l1 cache. 486 has 8kB cache
|
||||
shared for code and data, so 4kB is
|
||||
not really precise. */
|
||||
4, /* size of l2 cache */
|
||||
0, /* size of prefetch block */
|
||||
0, /* number of parallel prefetches */
|
||||
1, /* Branch cost */
|
||||
@ -285,6 +293,8 @@ struct processor_costs pentium_cost = {
|
||||
{4, 8, 16}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
3, /* MMX or SSE register to integer */
|
||||
8, /* size of l1 cache. */
|
||||
8, /* size of l2 cache */
|
||||
0, /* size of prefetch block */
|
||||
0, /* number of parallel prefetches */
|
||||
2, /* Branch cost */
|
||||
@ -342,6 +352,8 @@ struct processor_costs pentiumpro_cost = {
|
||||
{2, 2, 8}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
3, /* MMX or SSE register to integer */
|
||||
8, /* size of l1 cache. */
|
||||
256, /* size of l2 cache */
|
||||
32, /* size of prefetch block */
|
||||
6, /* number of parallel prefetches */
|
||||
2, /* Branch cost */
|
||||
@ -407,6 +419,8 @@ struct processor_costs geode_cost = {
|
||||
{1, 1, 1}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
1, /* MMX or SSE register to integer */
|
||||
64, /* size of l1 cache. */
|
||||
128, /* size of l2 cache. */
|
||||
32, /* size of prefetch block */
|
||||
1, /* number of parallel prefetches */
|
||||
1, /* Branch cost */
|
||||
@ -464,6 +478,11 @@ struct processor_costs k6_cost = {
|
||||
{2, 2, 8}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
6, /* MMX or SSE register to integer */
|
||||
32, /* size of l1 cache. */
|
||||
32, /* size of l2 cache. Some models
|
||||
have integrated l2 cache, but
|
||||
optimizing for k6 is not important
|
||||
enough to worry about that. */
|
||||
32, /* size of prefetch block */
|
||||
1, /* number of parallel prefetches */
|
||||
1, /* Branch cost */
|
||||
@ -521,6 +540,8 @@ struct processor_costs athlon_cost = {
|
||||
{4, 4, 5}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
5, /* MMX or SSE register to integer */
|
||||
64, /* size of l1 cache. */
|
||||
256, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
6, /* number of parallel prefetches */
|
||||
5, /* Branch cost */
|
||||
@ -581,6 +602,8 @@ struct processor_costs k8_cost = {
|
||||
{4, 4, 5}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
5, /* MMX or SSE register to integer */
|
||||
64, /* size of l1 cache. */
|
||||
512, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
/* New AMD processors never drop prefetches; if they cannot be performed
|
||||
immediately, they are queued. We set number of simultaneous prefetches
|
||||
@ -654,6 +677,8 @@ struct processor_costs amdfam10_cost = {
|
||||
1/1 1/1
|
||||
MOVD reg32, xmmreg Double FADD 3
|
||||
1/1 1/1 */
|
||||
64, /* size of l1 cache. */
|
||||
512, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
/* New AMD processors never drop prefetches; if they cannot be performed
|
||||
immediately, they are queued. We set number of simultaneous prefetches
|
||||
@ -721,6 +746,8 @@ struct processor_costs pentium4_cost = {
|
||||
{2, 2, 8}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
10, /* MMX or SSE register to integer */
|
||||
8, /* size of l1 cache. */
|
||||
256, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
6, /* number of parallel prefetches */
|
||||
2, /* Branch cost */
|
||||
@ -779,6 +806,8 @@ struct processor_costs nocona_cost = {
|
||||
{12, 12, 12}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
8, /* MMX or SSE register to integer */
|
||||
8, /* size of l1 cache. */
|
||||
1024, /* size of l2 cache. */
|
||||
128, /* size of prefetch block */
|
||||
8, /* number of parallel prefetches */
|
||||
1, /* Branch cost */
|
||||
@ -838,6 +867,8 @@ struct processor_costs core2_cost = {
|
||||
{4, 4, 4}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
2, /* MMX or SSE register to integer */
|
||||
32, /* size of l1 cache. */
|
||||
2048, /* size of l2 cache. */
|
||||
128, /* size of prefetch block */
|
||||
8, /* number of parallel prefetches */
|
||||
3, /* Branch cost */
|
||||
@ -903,6 +934,8 @@ struct processor_costs generic64_cost = {
|
||||
{8, 8, 8}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
5, /* MMX or SSE register to integer */
|
||||
32, /* size of l1 cache. */
|
||||
512, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
6, /* number of parallel prefetches */
|
||||
/* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
|
||||
@ -963,6 +996,8 @@ struct processor_costs generic32_cost = {
|
||||
{8, 8, 8}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
5, /* MMX or SSE register to integer */
|
||||
32, /* size of l1 cache. */
|
||||
256, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
6, /* number of parallel prefetches */
|
||||
3, /* Branch cost */
|
||||
@ -2419,6 +2454,10 @@ override_options (void)
|
||||
ix86_cost->simultaneous_prefetches);
|
||||
if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
|
||||
set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
|
||||
if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
|
||||
set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
|
||||
if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
|
||||
set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
|
||||
}
|
||||
|
||||
/* Return true if this goes in large data/bss. */
|
||||
|
@ -124,6 +124,8 @@ struct processor_costs {
|
||||
in SImode, DImode and TImode*/
|
||||
const int mmxsse_to_integer; /* cost of moving mmxsse register to
|
||||
integer and vice versa. */
|
||||
const int l1_cache_size; /* size of l1 cache, in kilobytes. */
|
||||
const int l2_cache_size; /* size of l2 cache, in kilobytes. */
|
||||
const int prefetch_block; /* bytes moved to cache for prefetch. */
|
||||
const int simultaneous_prefetches; /* number of parallel prefetch
|
||||
operations. */
|
||||
|
@ -6958,7 +6958,10 @@ Maximum number of prefetches that can run at the same time.
|
||||
The size of cache line in L1 cache, in bytes.
|
||||
|
||||
@item l1-cache-size
|
||||
The number of cache lines in L1 cache.
|
||||
The size of L1 cache, in kilobytes.
|
||||
|
||||
@item l2-cache-size
|
||||
The size of L2 cache, in kilobytes.
|
||||
|
||||
@item use-canonical-types
|
||||
Whether the compiler should use the ``canonical'' type system. By
|
||||
|
@ -656,12 +656,12 @@ DEFPARAM (PARAM_SIMULTANEOUS_PREFETCHES,
|
||||
"The number of prefetches that can run at the same time",
|
||||
3, 0, 0)
|
||||
|
||||
/* The size of L1 cache in number of cache lines. */
|
||||
/* The size of L1 cache in kB. */
|
||||
|
||||
DEFPARAM (PARAM_L1_CACHE_SIZE,
|
||||
"l1-cache-size",
|
||||
"The size of L1 cache",
|
||||
1024, 0, 0)
|
||||
64, 0, 0)
|
||||
|
||||
/* The size of L1 cache line in bytes. */
|
||||
|
||||
@ -670,6 +670,13 @@ DEFPARAM (PARAM_L1_CACHE_LINE_SIZE,
|
||||
"The size of L1 cache line",
|
||||
32, 0, 0)
|
||||
|
||||
/* The size of L2 cache in kB. */
|
||||
|
||||
DEFPARAM (PARAM_L2_CACHE_SIZE,
|
||||
"l2-cache-size",
|
||||
"The size of L2 cache",
|
||||
512, 0, 0)
|
||||
|
||||
/* Whether we should use canonical types rather than deep "structural"
|
||||
type checking. Setting this value to 1 (the default) improves
|
||||
compilation performance in the C++ and Objective-C++ front end;
|
||||
|
@ -165,6 +165,8 @@ typedef enum compiler_param
|
||||
PARAM_VALUE (PARAM_L1_CACHE_SIZE)
|
||||
#define L1_CACHE_LINE_SIZE \
|
||||
PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE)
|
||||
#define L2_CACHE_SIZE \
|
||||
PARAM_VALUE (PARAM_L2_CACHE_SIZE)
|
||||
#define USE_CANONICAL_TYPES \
|
||||
PARAM_VALUE (PARAM_USE_CANONICAL_TYPES)
|
||||
#endif /* ! GCC_PARAMS_H */
|
||||
|
@ -166,9 +166,8 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define HAVE_prefetch 0
|
||||
#endif
|
||||
|
||||
#define L1_CACHE_SIZE_BYTES ((unsigned) (L1_CACHE_SIZE * L1_CACHE_LINE_SIZE))
|
||||
/* TODO: Add parameter to specify L2 cache size. */
|
||||
#define L2_CACHE_SIZE_BYTES (8 * L1_CACHE_SIZE_BYTES)
|
||||
#define L1_CACHE_SIZE_BYTES ((unsigned) (L1_CACHE_SIZE * 1024))
|
||||
#define L2_CACHE_SIZE_BYTES ((unsigned) (L2_CACHE_SIZE * 1024))
|
||||
|
||||
/* We consider a memory access nontemporal if it is not reused sooner than
|
||||
after L2_CACHE_SIZE_BYTES of memory are accessed. However, we ignore
|
||||
@ -1549,10 +1548,10 @@ tree_ssa_prefetch_arrays (void)
|
||||
SIMULTANEOUS_PREFETCHES);
|
||||
fprintf (dump_file, " prefetch latency: %d\n", PREFETCH_LATENCY);
|
||||
fprintf (dump_file, " prefetch block size: %d\n", PREFETCH_BLOCK);
|
||||
fprintf (dump_file, " L1 cache size: %d lines, %d bytes\n",
|
||||
L1_CACHE_SIZE, L1_CACHE_SIZE_BYTES);
|
||||
fprintf (dump_file, " L1 cache size: %d lines, %d kB\n",
|
||||
L1_CACHE_SIZE_BYTES / L1_CACHE_LINE_SIZE, L1_CACHE_SIZE);
|
||||
fprintf (dump_file, " L1 cache line size: %d\n", L1_CACHE_LINE_SIZE);
|
||||
fprintf (dump_file, " L2 cache size: %d bytes\n", L2_CACHE_SIZE_BYTES);
|
||||
fprintf (dump_file, " L2 cache size: %d kB\n", L2_CACHE_SIZE);
|
||||
fprintf (dump_file, "\n");
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user