diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9d610378334..178b736c9f7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2007-08-01 Zdenek Dvorak + + * doc/invoke.texi (l1-cache-size): Update documentation. + (l2-cache-size): Document. + * params.h (L2_CACHE_SIZE): New macro. + * tree-ssa-loop-prefetch.c (L1_CACHE_SIZE_BYTES): Reflect + that L1_CACHE_SIZE is in kB now. + (L2_CACHE_SIZE_BYTES): New macro. + (tree_ssa_prefetch_arrays): Show size in kB. + * config/i386/i386.h (struct processor_costs): Add l1_cache_size + and l2_cache_size fields. + * config/i386/driver-i386.c (describe_cache): Detect cache size + in kB. + * config/i386/i386.c (size_cost, i386_cost, i486_cost,pentium_cost, + pentiumpro_cost, geode_cost, k6_cost, athlon_cost, k8_cost, + amdfam10_cost, pentium4_cost, nocona_cost, core2_cost, + generic64_cost, generic32_cost): Add l1_cache_size and l2_cache_size. + (override_options): Set l1-cache-size and l2-cache-size to default + values if not specified otherwise. + * params.def (PARAM_L1_CACHE_SIZE): Change to set in kB. + (PARAM_L2_CACHE_SIZE): New. + 2007-08-01 Nigel Stephens David Ung Thiemo Seufer diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index a4155373f1a..1dbc783beaa 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -56,14 +56,11 @@ describe_cache (unsigned l1_sizekb, unsigned l1_line, unsigned l1_assoc ATTRIBUTE_UNUSED) { char size[1000], line[1000]; - unsigned size_in_lines; /* At the moment, gcc middle-end does not use the information about the associativity of the cache. */ - size_in_lines = (l1_sizekb * 1024) / l1_line; - - sprintf (size, "--param l1-cache-size=%u", size_in_lines); + sprintf (size, "--param l1-cache-size=%u", l1_sizekb); sprintf (line, "--param l1-cache-line-size=%u", l1_line); return concat (size, " ", line, " ", NULL); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index a9ee6d58564..cf8b5e7f226 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -113,6 +113,8 @@ struct processor_costs size_cost = { /* costs for tuning for size */ {3, 3, 3}, /* cost of storing SSE registers in SImode, DImode and TImode */ 3, /* MMX or SSE register to integer */ + 0, /* size of l1 cache */ + 0, /* size of l2 cache */ 0, /* size of prefetch block */ 0, /* number of parallel prefetches */ 2, /* Branch cost */ @@ -171,6 +173,8 @@ struct processor_costs i386_cost = { /* 386 specific costs */ {4, 8, 16}, /* cost of storing SSE registers in SImode, DImode and TImode */ 3, /* MMX or SSE register to integer */ + 0, /* size of l1 cache */ + 0, /* size of l2 cache */ 0, /* size of prefetch block */ 0, /* number of parallel prefetches */ 1, /* Branch cost */ @@ -228,6 +232,10 @@ struct processor_costs i486_cost = { /* 486 specific costs */ {4, 8, 16}, /* cost of storing SSE registers in SImode, DImode and TImode */ 3, /* MMX or SSE register to integer */ + 4, /* size of l1 cache. 486 has 8kB cache + shared for code and data, so 4kB is + not really precise. */ + 4, /* size of l2 cache */ 0, /* size of prefetch block */ 0, /* number of parallel prefetches */ 1, /* Branch cost */ @@ -285,6 +293,8 @@ struct processor_costs pentium_cost = { {4, 8, 16}, /* cost of storing SSE registers in SImode, DImode and TImode */ 3, /* MMX or SSE register to integer */ + 8, /* size of l1 cache. */ + 8, /* size of l2 cache */ 0, /* size of prefetch block */ 0, /* number of parallel prefetches */ 2, /* Branch cost */ @@ -342,6 +352,8 @@ struct processor_costs pentiumpro_cost = { {2, 2, 8}, /* cost of storing SSE registers in SImode, DImode and TImode */ 3, /* MMX or SSE register to integer */ + 8, /* size of l1 cache. */ + 256, /* size of l2 cache */ 32, /* size of prefetch block */ 6, /* number of parallel prefetches */ 2, /* Branch cost */ @@ -407,6 +419,8 @@ struct processor_costs geode_cost = { {1, 1, 1}, /* cost of storing SSE registers in SImode, DImode and TImode */ 1, /* MMX or SSE register to integer */ + 64, /* size of l1 cache. */ + 128, /* size of l2 cache. */ 32, /* size of prefetch block */ 1, /* number of parallel prefetches */ 1, /* Branch cost */ @@ -464,6 +478,11 @@ struct processor_costs k6_cost = { {2, 2, 8}, /* cost of storing SSE registers in SImode, DImode and TImode */ 6, /* MMX or SSE register to integer */ + 32, /* size of l1 cache. */ + 32, /* size of l2 cache. Some models + have integrated l2 cache, but + optimizing for k6 is not important + enough to worry about that. */ 32, /* size of prefetch block */ 1, /* number of parallel prefetches */ 1, /* Branch cost */ @@ -521,6 +540,8 @@ struct processor_costs athlon_cost = { {4, 4, 5}, /* cost of storing SSE registers in SImode, DImode and TImode */ 5, /* MMX or SSE register to integer */ + 64, /* size of l1 cache. */ + 256, /* size of l2 cache. */ 64, /* size of prefetch block */ 6, /* number of parallel prefetches */ 5, /* Branch cost */ @@ -581,6 +602,8 @@ struct processor_costs k8_cost = { {4, 4, 5}, /* cost of storing SSE registers in SImode, DImode and TImode */ 5, /* MMX or SSE register to integer */ + 64, /* size of l1 cache. */ + 512, /* size of l2 cache. */ 64, /* size of prefetch block */ /* New AMD processors never drop prefetches; if they cannot be performed immediately, they are queued. We set number of simultaneous prefetches @@ -654,6 +677,8 @@ struct processor_costs amdfam10_cost = { 1/1 1/1 MOVD reg32, xmmreg Double FADD 3 1/1 1/1 */ + 64, /* size of l1 cache. */ + 512, /* size of l2 cache. */ 64, /* size of prefetch block */ /* New AMD processors never drop prefetches; if they cannot be performed immediately, they are queued. We set number of simultaneous prefetches @@ -721,6 +746,8 @@ struct processor_costs pentium4_cost = { {2, 2, 8}, /* cost of storing SSE registers in SImode, DImode and TImode */ 10, /* MMX or SSE register to integer */ + 8, /* size of l1 cache. */ + 256, /* size of l2 cache. */ 64, /* size of prefetch block */ 6, /* number of parallel prefetches */ 2, /* Branch cost */ @@ -779,6 +806,8 @@ struct processor_costs nocona_cost = { {12, 12, 12}, /* cost of storing SSE registers in SImode, DImode and TImode */ 8, /* MMX or SSE register to integer */ + 8, /* size of l1 cache. */ + 1024, /* size of l2 cache. */ 128, /* size of prefetch block */ 8, /* number of parallel prefetches */ 1, /* Branch cost */ @@ -838,6 +867,8 @@ struct processor_costs core2_cost = { {4, 4, 4}, /* cost of storing SSE registers in SImode, DImode and TImode */ 2, /* MMX or SSE register to integer */ + 32, /* size of l1 cache. */ + 2048, /* size of l2 cache. */ 128, /* size of prefetch block */ 8, /* number of parallel prefetches */ 3, /* Branch cost */ @@ -903,6 +934,8 @@ struct processor_costs generic64_cost = { {8, 8, 8}, /* cost of storing SSE registers in SImode, DImode and TImode */ 5, /* MMX or SSE register to integer */ + 32, /* size of l1 cache. */ + 512, /* size of l2 cache. */ 64, /* size of prefetch block */ 6, /* number of parallel prefetches */ /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value @@ -963,6 +996,8 @@ struct processor_costs generic32_cost = { {8, 8, 8}, /* cost of storing SSE registers in SImode, DImode and TImode */ 5, /* MMX or SSE register to integer */ + 32, /* size of l1 cache. */ + 256, /* size of l2 cache. */ 64, /* size of prefetch block */ 6, /* number of parallel prefetches */ 3, /* Branch cost */ @@ -2419,6 +2454,10 @@ override_options (void) ix86_cost->simultaneous_prefetches); if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE)) set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block); + if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE)) + set_param_value ("l1-cache-size", ix86_cost->l1_cache_size); + if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE)) + set_param_value ("l2-cache-size", ix86_cost->l2_cache_size); } /* Return true if this goes in large data/bss. */ diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 31a434ac287..54601299c10 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -124,6 +124,8 @@ struct processor_costs { in SImode, DImode and TImode*/ const int mmxsse_to_integer; /* cost of moving mmxsse register to integer and vice versa. */ + const int l1_cache_size; /* size of l1 cache, in kilobytes. */ + const int l2_cache_size; /* size of l2 cache, in kilobytes. */ const int prefetch_block; /* bytes moved to cache for prefetch. */ const int simultaneous_prefetches; /* number of parallel prefetch operations. */ diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 49e29825ea0..d5974ee195f 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -6958,7 +6958,10 @@ Maximum number of prefetches that can run at the same time. The size of cache line in L1 cache, in bytes. @item l1-cache-size -The number of cache lines in L1 cache. +The size of L1 cache, in kilobytes. + +@item l2-cache-size +The size of L2 cache, in kilobytes. @item use-canonical-types Whether the compiler should use the ``canonical'' type system. By diff --git a/gcc/params.def b/gcc/params.def index e2817f8afee..32216764473 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -656,12 +656,12 @@ DEFPARAM (PARAM_SIMULTANEOUS_PREFETCHES, "The number of prefetches that can run at the same time", 3, 0, 0) -/* The size of L1 cache in number of cache lines. */ +/* The size of L1 cache in kB. */ DEFPARAM (PARAM_L1_CACHE_SIZE, "l1-cache-size", "The size of L1 cache", - 1024, 0, 0) + 64, 0, 0) /* The size of L1 cache line in bytes. */ @@ -670,6 +670,13 @@ DEFPARAM (PARAM_L1_CACHE_LINE_SIZE, "The size of L1 cache line", 32, 0, 0) +/* The size of L2 cache in kB. */ + +DEFPARAM (PARAM_L2_CACHE_SIZE, + "l2-cache-size", + "The size of L2 cache", + 512, 0, 0) + /* Whether we should use canonical types rather than deep "structural" type checking. Setting this value to 1 (the default) improves compilation performance in the C++ and Objective-C++ front end; diff --git a/gcc/params.h b/gcc/params.h index 6d06ccb1c33..71e6b21ef71 100644 --- a/gcc/params.h +++ b/gcc/params.h @@ -165,6 +165,8 @@ typedef enum compiler_param PARAM_VALUE (PARAM_L1_CACHE_SIZE) #define L1_CACHE_LINE_SIZE \ PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE) +#define L2_CACHE_SIZE \ + PARAM_VALUE (PARAM_L2_CACHE_SIZE) #define USE_CANONICAL_TYPES \ PARAM_VALUE (PARAM_USE_CANONICAL_TYPES) #endif /* ! GCC_PARAMS_H */ diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 0fada5537be..28cd3223c9c 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -166,9 +166,8 @@ along with GCC; see the file COPYING3. If not see #define HAVE_prefetch 0 #endif -#define L1_CACHE_SIZE_BYTES ((unsigned) (L1_CACHE_SIZE * L1_CACHE_LINE_SIZE)) -/* TODO: Add parameter to specify L2 cache size. */ -#define L2_CACHE_SIZE_BYTES (8 * L1_CACHE_SIZE_BYTES) +#define L1_CACHE_SIZE_BYTES ((unsigned) (L1_CACHE_SIZE * 1024)) +#define L2_CACHE_SIZE_BYTES ((unsigned) (L2_CACHE_SIZE * 1024)) /* We consider a memory access nontemporal if it is not reused sooner than after L2_CACHE_SIZE_BYTES of memory are accessed. However, we ignore @@ -1549,10 +1548,10 @@ tree_ssa_prefetch_arrays (void) SIMULTANEOUS_PREFETCHES); fprintf (dump_file, " prefetch latency: %d\n", PREFETCH_LATENCY); fprintf (dump_file, " prefetch block size: %d\n", PREFETCH_BLOCK); - fprintf (dump_file, " L1 cache size: %d lines, %d bytes\n", - L1_CACHE_SIZE, L1_CACHE_SIZE_BYTES); + fprintf (dump_file, " L1 cache size: %d lines, %d kB\n", + L1_CACHE_SIZE_BYTES / L1_CACHE_LINE_SIZE, L1_CACHE_SIZE); fprintf (dump_file, " L1 cache line size: %d\n", L1_CACHE_LINE_SIZE); - fprintf (dump_file, " L2 cache size: %d bytes\n", L2_CACHE_SIZE_BYTES); + fprintf (dump_file, " L2 cache size: %d kB\n", L2_CACHE_SIZE); fprintf (dump_file, "\n"); }