params.c (set_param_value): Initialize the "set" field.
* params.c (set_param_value): Initialize the "set" field. * params.h (struct param_info): Add "set" field. (PARAM_SET_P): New macro. (PREFETCH_LATENCY, SIMULTANEOUS_PREFETCHES, L1_CACHE_SIZE, L1_CACHE_LINE_SIZE): New macros. * toplev.c (DEFPARAM): Initialize the "set" field. * tree-ssa-loop-prefetch.c (PREFETCH_LATENCY, SIMULTANEOUS_PREFETCHES): Removed. (PREFETCH_BLOCK): Use L1_CACHE_LINE_SIZE. (tree_ssa_prefetch_arrays): Dump the values of the parameters. * config/sparc/sparc.c: Include params.h. (sparc_override_options): Set SIMULTANEOUS_PREFETCHES and L1_CACHE_LINE_SIZE parameters. * config/sparc/sparc.h (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES): Removed. * config/i386/i386.h (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES): Removed. * config/i386/i386.c: Include params.h. (k8_cost): Change default value for SIMULTANEOUS_PREFETCHES. (override_options): Set SIMULTANEOUS_PREFETCHES and L1_CACHE_LINE_SIZE parameters. * config/sh/sh.h (SIMULTANEOUS_PREFETCHES): Removed. (OPTIMIZATION_OPTIONS): Set SIMULTANEOUS_PREFETCHES and L1_CACHE_LINE_SIZE parameters. * config/ia64/ia64.c (ia64_optimization_options): Set SIMULTANEOUS_PREFETCHES and L1_CACHE_LINE_SIZE parameters. * config/ia64/ia64.h (SIMULTANEOUS_PREFETCHES, PREFETCH_BLOCK): Removed. * params.def (PARAM_PREFETCH_LATENCY, PARAM_SIMULTANEOUS_PREFETCHES, PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE): New params. * doc/invoke.texi: Document new params. From-SVN: r118728
This commit is contained in:
parent
015e23f400
commit
47eb5b329b
|
@ -1,3 +1,37 @@
|
|||
2006-11-12 Zdenek Dvorak <dvorakz@suse.cz>
|
||||
|
||||
* params.c (set_param_value): Initialize the "set" field.
|
||||
* params.h (struct param_info): Add "set" field.
|
||||
(PARAM_SET_P): New macro.
|
||||
(PREFETCH_LATENCY, SIMULTANEOUS_PREFETCHES, L1_CACHE_SIZE,
|
||||
L1_CACHE_LINE_SIZE): New macros.
|
||||
* toplev.c (DEFPARAM): Initialize the "set" field.
|
||||
* tree-ssa-loop-prefetch.c (PREFETCH_LATENCY,
|
||||
SIMULTANEOUS_PREFETCHES): Removed.
|
||||
(PREFETCH_BLOCK): Use L1_CACHE_LINE_SIZE.
|
||||
(tree_ssa_prefetch_arrays): Dump the values of the parameters.
|
||||
* config/sparc/sparc.c: Include params.h.
|
||||
(sparc_override_options): Set SIMULTANEOUS_PREFETCHES and
|
||||
L1_CACHE_LINE_SIZE parameters.
|
||||
* config/sparc/sparc.h (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES):
|
||||
Removed.
|
||||
* config/i386/i386.h (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES):
|
||||
Removed.
|
||||
* config/i386/i386.c: Include params.h.
|
||||
(k8_cost): Change default value for SIMULTANEOUS_PREFETCHES.
|
||||
(override_options): Set SIMULTANEOUS_PREFETCHES and
|
||||
L1_CACHE_LINE_SIZE parameters.
|
||||
* config/sh/sh.h (SIMULTANEOUS_PREFETCHES): Removed.
|
||||
(OPTIMIZATION_OPTIONS): Set SIMULTANEOUS_PREFETCHES and
|
||||
L1_CACHE_LINE_SIZE parameters.
|
||||
* config/ia64/ia64.c (ia64_optimization_options): Set
|
||||
SIMULTANEOUS_PREFETCHES and L1_CACHE_LINE_SIZE parameters.
|
||||
* config/ia64/ia64.h (SIMULTANEOUS_PREFETCHES, PREFETCH_BLOCK):
|
||||
Removed.
|
||||
* params.def (PARAM_PREFETCH_LATENCY, PARAM_SIMULTANEOUS_PREFETCHES,
|
||||
PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE): New params.
|
||||
* doc/invoke.texi: Document new params.
|
||||
|
||||
2006-11-12 Roger Sayle <roger@eyesopen.com>
|
||||
|
||||
PR tree-optimization/13827
|
||||
|
|
|
@ -50,6 +50,7 @@ Boston, MA 02110-1301, USA. */
|
|||
#include "tree-gimple.h"
|
||||
#include "dwarf2.h"
|
||||
#include "tm-constrs.h"
|
||||
#include "params.h"
|
||||
|
||||
#ifndef CHECK_STACK_LIMIT
|
||||
#define CHECK_STACK_LIMIT (-1)
|
||||
|
@ -536,7 +537,12 @@ struct processor_costs k8_cost = {
|
|||
in SImode, DImode and TImode */
|
||||
5, /* MMX or SSE register to integer */
|
||||
64, /* size of prefetch block */
|
||||
6, /* number of parallel prefetches */
|
||||
/* New AMD processors neer drop prefetches; if they cannot be performed
|
||||
immediately, they are queued. We set number of simultaneous prefetches
|
||||
to a large constant to reflect this (it probably is not a good idea not
|
||||
to limit number of prefetches at all, as their execution also takes some
|
||||
time). */
|
||||
100, /* number of parallel prefetches */
|
||||
5, /* Branch cost */
|
||||
COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
|
||||
COSTS_N_INSNS (4), /* cost of FMUL instruction. */
|
||||
|
@ -2063,6 +2069,12 @@ override_options (void)
|
|||
so it won't slow down the compilation and make x87 code slower. */
|
||||
if (!TARGET_SCHEDULE)
|
||||
flag_schedule_insns_after_reload = flag_schedule_insns = 0;
|
||||
|
||||
if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
|
||||
set_param_value ("simultaneous-prefetches",
|
||||
ix86_cost->simultaneous_prefetches);
|
||||
if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
|
||||
set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
|
||||
}
|
||||
|
||||
/* switch to the appropriate section for output of DECL.
|
||||
|
|
|
@ -1739,12 +1739,6 @@ do { \
|
|||
/* Define this as 1 if `char' should by default be signed; else as 0. */
|
||||
#define DEFAULT_SIGNED_CHAR 1
|
||||
|
||||
/* Number of bytes moved into a data cache for a single prefetch operation. */
|
||||
#define PREFETCH_BLOCK ix86_cost->prefetch_block
|
||||
|
||||
/* Number of prefetch operations that can be done in parallel. */
|
||||
#define SIMULTANEOUS_PREFETCHES ix86_cost->simultaneous_prefetches
|
||||
|
||||
/* Max number of bytes we can move from memory to memory
|
||||
in one reasonably fast instruction. */
|
||||
#define MOVE_MAX 16
|
||||
|
|
|
@ -9798,6 +9798,11 @@ ia64_optimization_options (int level ATTRIBUTE_UNUSED,
|
|||
{
|
||||
/* Let the scheduler form additional regions. */
|
||||
set_param_value ("max-sched-extend-regions-iters", 2);
|
||||
|
||||
/* Set the default values for cache-related parameters. */
|
||||
set_param_value ("simultaneous-prefetches", 6);
|
||||
set_param_value ("l1-cache-line-size", 32);
|
||||
|
||||
}
|
||||
|
||||
#include "gt-ia64.h"
|
||||
|
|
|
@ -1979,19 +1979,6 @@ do { \
|
|||
#pragma weak. Note, #pragma weak will only be supported if SUPPORT_WEAK is
|
||||
defined. */
|
||||
|
||||
/* If this architecture supports prefetch, define this to be the number of
|
||||
prefetch commands that can be executed in parallel.
|
||||
|
||||
??? This number is bogus and needs to be replaced before the value is
|
||||
actually used in optimizations. */
|
||||
|
||||
#define SIMULTANEOUS_PREFETCHES 6
|
||||
|
||||
/* If this architecture supports prefetch, define this to be the size of
|
||||
the cache line that is prefetched. */
|
||||
|
||||
#define PREFETCH_BLOCK 32
|
||||
|
||||
#define HANDLE_SYSV_PRAGMA 1
|
||||
|
||||
/* A C expression for the maximum number of instructions to execute via
|
||||
|
|
|
@ -495,6 +495,8 @@ do { \
|
|||
the user explicitly requested this to be on or off. */ \
|
||||
if (flag_schedule_insns > 0) \
|
||||
flag_schedule_insns = 2; \
|
||||
\
|
||||
set_param_value ("simultaneous-prefetches", 2); \
|
||||
} while (0)
|
||||
|
||||
#define ASSEMBLER_DIALECT assembler_dialect
|
||||
|
@ -3467,8 +3469,6 @@ extern int current_function_interrupt;
|
|||
2:\n" TEXT_SECTION_ASM_OP);
|
||||
#endif /* (defined CRT_BEGIN || defined CRT_END) && ! __SHMEDIA__ */
|
||||
|
||||
#define SIMULTANEOUS_PREFETCHES 2
|
||||
|
||||
/* FIXME: middle-end support for highpart optimizations is missing. */
|
||||
#define high_life_started reload_in_progress
|
||||
|
||||
|
|
|
@ -51,6 +51,7 @@ Boston, MA 02110-1301, USA. */
|
|||
#include "cfglayout.h"
|
||||
#include "tree-gimple.h"
|
||||
#include "langhooks.h"
|
||||
#include "params.h"
|
||||
|
||||
/* Processor costs */
|
||||
static const
|
||||
|
@ -827,6 +828,20 @@ sparc_override_options (void)
|
|||
if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
|
||||
target_flags |= MASK_LONG_DOUBLE_128;
|
||||
#endif
|
||||
|
||||
if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
|
||||
set_param_value ("simultaneous-prefetches",
|
||||
((sparc_cpu == PROCESSOR_ULTRASPARC
|
||||
|| sparc_cpu == PROCESSOR_NIAGARA)
|
||||
? 2
|
||||
: (sparc_cpu == PROCESSOR_ULTRASPARC3
|
||||
? 8 : 3)));
|
||||
if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
|
||||
set_param_value ("l1-cache-line-size",
|
||||
((sparc_cpu == PROCESSOR_ULTRASPARC
|
||||
|| sparc_cpu == PROCESSOR_ULTRASPARC3
|
||||
|| sparc_cpu == PROCESSOR_NIAGARA)
|
||||
? 64 : 32));
|
||||
}
|
||||
|
||||
#ifdef SUBTARGET_ATTRIBUTE_TABLE
|
||||
|
|
|
@ -2175,19 +2175,6 @@ do { \
|
|||
: (sparc_cpu == PROCESSOR_NIAGARA \
|
||||
? 4 \
|
||||
: 3)))
|
||||
|
||||
#define PREFETCH_BLOCK \
|
||||
((sparc_cpu == PROCESSOR_ULTRASPARC \
|
||||
|| sparc_cpu == PROCESSOR_ULTRASPARC3 \
|
||||
|| sparc_cpu == PROCESSOR_NIAGARA) \
|
||||
? 64 : 32)
|
||||
|
||||
#define SIMULTANEOUS_PREFETCHES \
|
||||
((sparc_cpu == PROCESSOR_ULTRASPARC \
|
||||
|| sparc_cpu == PROCESSOR_NIAGARA) \
|
||||
? 2 \
|
||||
: (sparc_cpu == PROCESSOR_ULTRASPARC3 \
|
||||
? 8 : 3))
|
||||
|
||||
/* Control the assembler format that we output. */
|
||||
|
||||
|
|
|
@ -6329,6 +6329,21 @@ duplicated when threading jumps.
|
|||
Maximum number of fields in a structure we will treat in
|
||||
a field sensitive manner during pointer analysis.
|
||||
|
||||
@item prefetch-latency
|
||||
Estimate on average number of instructions that are executed before
|
||||
prefetch finishes. The distance we prefetch ahead is proportional
|
||||
to this constant. Increasing this number may also lead to less
|
||||
streams being prefetched (see @option{simultaneous-prefetches}).
|
||||
|
||||
@item simultaneous-prefetches
|
||||
Maximum number of prefetches that can run at the same time.
|
||||
|
||||
@item l1-cache-line-size
|
||||
The size of cache line in L1 cache, in bytes.
|
||||
|
||||
@item l1-cache-size
|
||||
The number of cache lines in L1 cache.
|
||||
|
||||
@end table
|
||||
@end table
|
||||
|
||||
|
|
|
@ -77,7 +77,10 @@ set_param_value (const char *name, int value)
|
|||
compiler_params[i].option,
|
||||
compiler_params[i].max_value);
|
||||
else
|
||||
compiler_params[i].value = value;
|
||||
{
|
||||
compiler_params[i].value = value;
|
||||
compiler_params[i].set = true;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -594,6 +594,37 @@ DEFPARAM(PARAM_MAX_SCHED_READY_INSNS,
|
|||
"The maximum number of instructions ready to be issued to be considered by the scheduler during the first scheduling pass",
|
||||
100, 0, 0)
|
||||
|
||||
/* Prefetching and cache-optimizations related parameters. Default values are
|
||||
usually set by machine description. */
|
||||
|
||||
/* The number of insns executed before prefetch is completed. */
|
||||
|
||||
DEFPARAM (PARAM_PREFETCH_LATENCY,
|
||||
"prefetch-latency",
|
||||
"The number of insns executed before prefetch is completed",
|
||||
200, 0, 0)
|
||||
|
||||
/* The number of prefetches that can run at the same time. */
|
||||
|
||||
DEFPARAM (PARAM_SIMULTANEOUS_PREFETCHES,
|
||||
"simultaneous-prefetches",
|
||||
"The number of prefetches that can run at the same time",
|
||||
3, 0, 0)
|
||||
|
||||
/* The size of L1 cache in number of cache lines. */
|
||||
|
||||
DEFPARAM (PARAM_L1_CACHE_SIZE,
|
||||
"l1-cache-size",
|
||||
"The size of L1 cache",
|
||||
1024, 0, 0)
|
||||
|
||||
/* The size of L1 cache line in bytes. */
|
||||
|
||||
DEFPARAM (PARAM_L1_CACHE_LINE_SIZE,
|
||||
"l1-cache-line-size",
|
||||
"The size of L1 cache line",
|
||||
32, 0, 0)
|
||||
|
||||
/*
|
||||
Local variables:
|
||||
mode:c
|
||||
|
|
15
gcc/params.h
15
gcc/params.h
|
@ -49,6 +49,9 @@ typedef struct param_info
|
|||
/* The associated value. */
|
||||
int value;
|
||||
|
||||
/* True if the parameter was explicitly set. */
|
||||
bool set;
|
||||
|
||||
/* Minimum acceptable value. */
|
||||
int min_value;
|
||||
|
||||
|
@ -88,6 +91,10 @@ typedef enum compiler_param
|
|||
#define PARAM_VALUE(ENUM) \
|
||||
(compiler_params[(int) ENUM].value)
|
||||
|
||||
/* True if the value of the parameter was explicitly changed. */
|
||||
#define PARAM_SET_P(ENUM) \
|
||||
(compiler_params[(int) ENUM].set)
|
||||
|
||||
/* Macros for the various parameters. */
|
||||
#define SALIAS_MAX_IMPLICIT_FIELDS \
|
||||
PARAM_VALUE (PARAM_SALIAS_MAX_IMPLICIT_FIELDS)
|
||||
|
@ -151,4 +158,12 @@ typedef enum compiler_param
|
|||
((size_t) PARAM_VALUE (PARAM_MAX_FIELDS_FOR_FIELD_SENSITIVE))
|
||||
#define MAX_SCHED_READY_INSNS \
|
||||
PARAM_VALUE (PARAM_MAX_SCHED_READY_INSNS)
|
||||
#define PREFETCH_LATENCY \
|
||||
PARAM_VALUE (PARAM_PREFETCH_LATENCY)
|
||||
#define SIMULTANEOUS_PREFETCHES \
|
||||
PARAM_VALUE (PARAM_SIMULTANEOUS_PREFETCHES)
|
||||
#define L1_CACHE_SIZE \
|
||||
PARAM_VALUE (PARAM_L1_CACHE_SIZE)
|
||||
#define L1_CACHE_LINE_SIZE \
|
||||
PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE)
|
||||
#endif /* ! GCC_PARAMS_H */
|
||||
|
|
|
@ -387,10 +387,10 @@ const char *user_label_prefix;
|
|||
|
||||
static const param_info lang_independent_params[] = {
|
||||
#define DEFPARAM(ENUM, OPTION, HELP, DEFAULT, MIN, MAX) \
|
||||
{ OPTION, DEFAULT, MIN, MAX, HELP },
|
||||
{ OPTION, DEFAULT, false, MIN, MAX, HELP },
|
||||
#include "params.def"
|
||||
#undef DEFPARAM
|
||||
{ NULL, 0, 0, 0, NULL }
|
||||
{ NULL, 0, false, 0, 0, NULL }
|
||||
};
|
||||
|
||||
/* Output files for assembler code (real compiler output)
|
||||
|
|
|
@ -115,19 +115,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
|||
/* Magic constants follow. These should be replaced by machine specific
|
||||
numbers. */
|
||||
|
||||
/* A number that should roughly correspond to the number of instructions
|
||||
executed before the prefetch is completed. */
|
||||
|
||||
#ifndef PREFETCH_LATENCY
|
||||
#define PREFETCH_LATENCY 200
|
||||
#endif
|
||||
|
||||
/* Number of prefetches that can run at the same time. */
|
||||
|
||||
#ifndef SIMULTANEOUS_PREFETCHES
|
||||
#define SIMULTANEOUS_PREFETCHES 3
|
||||
#endif
|
||||
|
||||
/* True if write can be prefetched by a read prefetch. */
|
||||
|
||||
#ifndef WRITE_CAN_USE_READ_PREFETCH
|
||||
|
@ -140,10 +127,12 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
|||
#define READ_CAN_USE_WRITE_PREFETCH 0
|
||||
#endif
|
||||
|
||||
/* Cache line size. Assumed to be a power of two. */
|
||||
/* The size of the block loaded by a single prefetch. Usually, this is
|
||||
the same as cache line size (at the moment, we only consider one level
|
||||
of cache hierarchy). */
|
||||
|
||||
#ifndef PREFETCH_BLOCK
|
||||
#define PREFETCH_BLOCK 32
|
||||
#define PREFETCH_BLOCK L1_CACHE_LINE_SIZE
|
||||
#endif
|
||||
|
||||
/* Do we have a forward hardware sequential prefetching? */
|
||||
|
@ -1026,6 +1015,19 @@ tree_ssa_prefetch_arrays (struct loops *loops)
|
|||
|| PREFETCH_BLOCK == 0)
|
||||
return 0;
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "Prefetching parameters:\n");
|
||||
fprintf (dump_file, " simultaneous prefetches: %d\n",
|
||||
SIMULTANEOUS_PREFETCHES);
|
||||
fprintf (dump_file, " prefetch latency: %d\n", PREFETCH_LATENCY);
|
||||
fprintf (dump_file, " L1 cache size: %d (%d bytes)\n",
|
||||
L1_CACHE_SIZE, L1_CACHE_SIZE * L1_CACHE_LINE_SIZE);
|
||||
fprintf (dump_file, " L1 cache line size: %d\n", L1_CACHE_LINE_SIZE);
|
||||
fprintf (dump_file, " prefetch block size: %d\n", PREFETCH_BLOCK);
|
||||
fprintf (dump_file, "\n");
|
||||
}
|
||||
|
||||
initialize_original_copy_tables ();
|
||||
|
||||
if (!built_in_decls[BUILT_IN_PREFETCH])
|
||||
|
|
Loading…
Reference in New Issue