params.c (set_param_value): Initialize the "set" field.

* params.c (set_param_value): Initialize the "set" field.
	* params.h (struct param_info): Add "set" field.
	(PARAM_SET_P): New macro.
	(PREFETCH_LATENCY, SIMULTANEOUS_PREFETCHES, L1_CACHE_SIZE,
	L1_CACHE_LINE_SIZE): New macros.
	* toplev.c (DEFPARAM): Initialize the "set" field.
	* tree-ssa-loop-prefetch.c (PREFETCH_LATENCY,
	SIMULTANEOUS_PREFETCHES): Removed.
	(PREFETCH_BLOCK): Use L1_CACHE_LINE_SIZE.
	(tree_ssa_prefetch_arrays): Dump the values of the parameters.
	* config/sparc/sparc.c: Include params.h.
	(sparc_override_options): Set SIMULTANEOUS_PREFETCHES and
	L1_CACHE_LINE_SIZE parameters.
	* config/sparc/sparc.h (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES):
	Removed.
	* config/i386/i386.h (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES):
	Removed.
	* config/i386/i386.c: Include params.h.
	(k8_cost): Change default value for SIMULTANEOUS_PREFETCHES.
	(override_options): Set SIMULTANEOUS_PREFETCHES and
	L1_CACHE_LINE_SIZE parameters.
	* config/sh/sh.h (SIMULTANEOUS_PREFETCHES): Removed.
	(OPTIMIZATION_OPTIONS): Set SIMULTANEOUS_PREFETCHES and
	L1_CACHE_LINE_SIZE parameters.
	* config/ia64/ia64.c (ia64_optimization_options): Set
	SIMULTANEOUS_PREFETCHES and L1_CACHE_LINE_SIZE parameters.
	* config/ia64/ia64.h (SIMULTANEOUS_PREFETCHES, PREFETCH_BLOCK):
	Removed.
	* params.def (PARAM_PREFETCH_LATENCY, PARAM_SIMULTANEOUS_PREFETCHES,
	PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE): New params.
	* doc/invoke.texi: Document new params.

From-SVN: r118728
This commit is contained in:
Zdenek Dvorak 2006-11-12 20:17:02 +01:00 committed by Zdenek Dvorak
parent 015e23f400
commit 47eb5b329b
14 changed files with 153 additions and 53 deletions

View File

@ -1,3 +1,37 @@
2006-11-12 Zdenek Dvorak <dvorakz@suse.cz>
* params.c (set_param_value): Initialize the "set" field.
* params.h (struct param_info): Add "set" field.
(PARAM_SET_P): New macro.
(PREFETCH_LATENCY, SIMULTANEOUS_PREFETCHES, L1_CACHE_SIZE,
L1_CACHE_LINE_SIZE): New macros.
* toplev.c (DEFPARAM): Initialize the "set" field.
* tree-ssa-loop-prefetch.c (PREFETCH_LATENCY,
SIMULTANEOUS_PREFETCHES): Removed.
(PREFETCH_BLOCK): Use L1_CACHE_LINE_SIZE.
(tree_ssa_prefetch_arrays): Dump the values of the parameters.
* config/sparc/sparc.c: Include params.h.
(sparc_override_options): Set SIMULTANEOUS_PREFETCHES and
L1_CACHE_LINE_SIZE parameters.
* config/sparc/sparc.h (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES):
Removed.
* config/i386/i386.h (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES):
Removed.
* config/i386/i386.c: Include params.h.
(k8_cost): Change default value for SIMULTANEOUS_PREFETCHES.
(override_options): Set SIMULTANEOUS_PREFETCHES and
L1_CACHE_LINE_SIZE parameters.
* config/sh/sh.h (SIMULTANEOUS_PREFETCHES): Removed.
(OPTIMIZATION_OPTIONS): Set SIMULTANEOUS_PREFETCHES and
L1_CACHE_LINE_SIZE parameters.
* config/ia64/ia64.c (ia64_optimization_options): Set
SIMULTANEOUS_PREFETCHES and L1_CACHE_LINE_SIZE parameters.
* config/ia64/ia64.h (SIMULTANEOUS_PREFETCHES, PREFETCH_BLOCK):
Removed.
* params.def (PARAM_PREFETCH_LATENCY, PARAM_SIMULTANEOUS_PREFETCHES,
PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE): New params.
* doc/invoke.texi: Document new params.
2006-11-12 Roger Sayle <roger@eyesopen.com>
PR tree-optimization/13827

View File

@ -50,6 +50,7 @@ Boston, MA 02110-1301, USA. */
#include "tree-gimple.h"
#include "dwarf2.h"
#include "tm-constrs.h"
#include "params.h"
#ifndef CHECK_STACK_LIMIT
#define CHECK_STACK_LIMIT (-1)
@ -536,7 +537,12 @@ struct processor_costs k8_cost = {
in SImode, DImode and TImode */
5, /* MMX or SSE register to integer */
64, /* size of prefetch block */
6, /* number of parallel prefetches */
/* New AMD processors neer drop prefetches; if they cannot be performed
immediately, they are queued. We set number of simultaneous prefetches
to a large constant to reflect this (it probably is not a good idea not
to limit number of prefetches at all, as their execution also takes some
time). */
100, /* number of parallel prefetches */
5, /* Branch cost */
COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
COSTS_N_INSNS (4), /* cost of FMUL instruction. */
@ -2063,6 +2069,12 @@ override_options (void)
so it won't slow down the compilation and make x87 code slower. */
if (!TARGET_SCHEDULE)
flag_schedule_insns_after_reload = flag_schedule_insns = 0;
if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
set_param_value ("simultaneous-prefetches",
ix86_cost->simultaneous_prefetches);
if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
}
/* switch to the appropriate section for output of DECL.

View File

@ -1739,12 +1739,6 @@ do { \
/* Define this as 1 if `char' should by default be signed; else as 0. */
#define DEFAULT_SIGNED_CHAR 1
/* Number of bytes moved into a data cache for a single prefetch operation. */
#define PREFETCH_BLOCK ix86_cost->prefetch_block
/* Number of prefetch operations that can be done in parallel. */
#define SIMULTANEOUS_PREFETCHES ix86_cost->simultaneous_prefetches
/* Max number of bytes we can move from memory to memory
in one reasonably fast instruction. */
#define MOVE_MAX 16

View File

@ -9798,6 +9798,11 @@ ia64_optimization_options (int level ATTRIBUTE_UNUSED,
{
/* Let the scheduler form additional regions. */
set_param_value ("max-sched-extend-regions-iters", 2);
/* Set the default values for cache-related parameters. */
set_param_value ("simultaneous-prefetches", 6);
set_param_value ("l1-cache-line-size", 32);
}
#include "gt-ia64.h"

View File

@ -1979,19 +1979,6 @@ do { \
#pragma weak. Note, #pragma weak will only be supported if SUPPORT_WEAK is
defined. */
/* If this architecture supports prefetch, define this to be the number of
prefetch commands that can be executed in parallel.
??? This number is bogus and needs to be replaced before the value is
actually used in optimizations. */
#define SIMULTANEOUS_PREFETCHES 6
/* If this architecture supports prefetch, define this to be the size of
the cache line that is prefetched. */
#define PREFETCH_BLOCK 32
#define HANDLE_SYSV_PRAGMA 1
/* A C expression for the maximum number of instructions to execute via

View File

@ -495,6 +495,8 @@ do { \
the user explicitly requested this to be on or off. */ \
if (flag_schedule_insns > 0) \
flag_schedule_insns = 2; \
\
set_param_value ("simultaneous-prefetches", 2); \
} while (0)
#define ASSEMBLER_DIALECT assembler_dialect
@ -3467,8 +3469,6 @@ extern int current_function_interrupt;
2:\n" TEXT_SECTION_ASM_OP);
#endif /* (defined CRT_BEGIN || defined CRT_END) && ! __SHMEDIA__ */
#define SIMULTANEOUS_PREFETCHES 2
/* FIXME: middle-end support for highpart optimizations is missing. */
#define high_life_started reload_in_progress

View File

@ -51,6 +51,7 @@ Boston, MA 02110-1301, USA. */
#include "cfglayout.h"
#include "tree-gimple.h"
#include "langhooks.h"
#include "params.h"
/* Processor costs */
static const
@ -827,6 +828,20 @@ sparc_override_options (void)
if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
target_flags |= MASK_LONG_DOUBLE_128;
#endif
if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
set_param_value ("simultaneous-prefetches",
((sparc_cpu == PROCESSOR_ULTRASPARC
|| sparc_cpu == PROCESSOR_NIAGARA)
? 2
: (sparc_cpu == PROCESSOR_ULTRASPARC3
? 8 : 3)));
if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
set_param_value ("l1-cache-line-size",
((sparc_cpu == PROCESSOR_ULTRASPARC
|| sparc_cpu == PROCESSOR_ULTRASPARC3
|| sparc_cpu == PROCESSOR_NIAGARA)
? 64 : 32));
}
#ifdef SUBTARGET_ATTRIBUTE_TABLE

View File

@ -2175,19 +2175,6 @@ do { \
: (sparc_cpu == PROCESSOR_NIAGARA \
? 4 \
: 3)))
#define PREFETCH_BLOCK \
((sparc_cpu == PROCESSOR_ULTRASPARC \
|| sparc_cpu == PROCESSOR_ULTRASPARC3 \
|| sparc_cpu == PROCESSOR_NIAGARA) \
? 64 : 32)
#define SIMULTANEOUS_PREFETCHES \
((sparc_cpu == PROCESSOR_ULTRASPARC \
|| sparc_cpu == PROCESSOR_NIAGARA) \
? 2 \
: (sparc_cpu == PROCESSOR_ULTRASPARC3 \
? 8 : 3))
/* Control the assembler format that we output. */

View File

@ -6329,6 +6329,21 @@ duplicated when threading jumps.
Maximum number of fields in a structure we will treat in
a field sensitive manner during pointer analysis.
@item prefetch-latency
Estimate on average number of instructions that are executed before
prefetch finishes. The distance we prefetch ahead is proportional
to this constant. Increasing this number may also lead to less
streams being prefetched (see @option{simultaneous-prefetches}).
@item simultaneous-prefetches
Maximum number of prefetches that can run at the same time.
@item l1-cache-line-size
The size of cache line in L1 cache, in bytes.
@item l1-cache-size
The number of cache lines in L1 cache.
@end table
@end table

View File

@ -77,7 +77,10 @@ set_param_value (const char *name, int value)
compiler_params[i].option,
compiler_params[i].max_value);
else
compiler_params[i].value = value;
{
compiler_params[i].value = value;
compiler_params[i].set = true;
}
return;
}

View File

@ -594,6 +594,37 @@ DEFPARAM(PARAM_MAX_SCHED_READY_INSNS,
"The maximum number of instructions ready to be issued to be considered by the scheduler during the first scheduling pass",
100, 0, 0)
/* Prefetching and cache-optimizations related parameters. Default values are
usually set by machine description. */
/* The number of insns executed before prefetch is completed. */
DEFPARAM (PARAM_PREFETCH_LATENCY,
"prefetch-latency",
"The number of insns executed before prefetch is completed",
200, 0, 0)
/* The number of prefetches that can run at the same time. */
DEFPARAM (PARAM_SIMULTANEOUS_PREFETCHES,
"simultaneous-prefetches",
"The number of prefetches that can run at the same time",
3, 0, 0)
/* The size of L1 cache in number of cache lines. */
DEFPARAM (PARAM_L1_CACHE_SIZE,
"l1-cache-size",
"The size of L1 cache",
1024, 0, 0)
/* The size of L1 cache line in bytes. */
DEFPARAM (PARAM_L1_CACHE_LINE_SIZE,
"l1-cache-line-size",
"The size of L1 cache line",
32, 0, 0)
/*
Local variables:
mode:c

View File

@ -49,6 +49,9 @@ typedef struct param_info
/* The associated value. */
int value;
/* True if the parameter was explicitly set. */
bool set;
/* Minimum acceptable value. */
int min_value;
@ -88,6 +91,10 @@ typedef enum compiler_param
#define PARAM_VALUE(ENUM) \
(compiler_params[(int) ENUM].value)
/* True if the value of the parameter was explicitly changed. */
#define PARAM_SET_P(ENUM) \
(compiler_params[(int) ENUM].set)
/* Macros for the various parameters. */
#define SALIAS_MAX_IMPLICIT_FIELDS \
PARAM_VALUE (PARAM_SALIAS_MAX_IMPLICIT_FIELDS)
@ -151,4 +158,12 @@ typedef enum compiler_param
((size_t) PARAM_VALUE (PARAM_MAX_FIELDS_FOR_FIELD_SENSITIVE))
#define MAX_SCHED_READY_INSNS \
PARAM_VALUE (PARAM_MAX_SCHED_READY_INSNS)
#define PREFETCH_LATENCY \
PARAM_VALUE (PARAM_PREFETCH_LATENCY)
#define SIMULTANEOUS_PREFETCHES \
PARAM_VALUE (PARAM_SIMULTANEOUS_PREFETCHES)
#define L1_CACHE_SIZE \
PARAM_VALUE (PARAM_L1_CACHE_SIZE)
#define L1_CACHE_LINE_SIZE \
PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE)
#endif /* ! GCC_PARAMS_H */

View File

@ -387,10 +387,10 @@ const char *user_label_prefix;
static const param_info lang_independent_params[] = {
#define DEFPARAM(ENUM, OPTION, HELP, DEFAULT, MIN, MAX) \
{ OPTION, DEFAULT, MIN, MAX, HELP },
{ OPTION, DEFAULT, false, MIN, MAX, HELP },
#include "params.def"
#undef DEFPARAM
{ NULL, 0, 0, 0, NULL }
{ NULL, 0, false, 0, 0, NULL }
};
/* Output files for assembler code (real compiler output)

View File

@ -115,19 +115,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
/* Magic constants follow. These should be replaced by machine specific
numbers. */
/* A number that should roughly correspond to the number of instructions
executed before the prefetch is completed. */
#ifndef PREFETCH_LATENCY
#define PREFETCH_LATENCY 200
#endif
/* Number of prefetches that can run at the same time. */
#ifndef SIMULTANEOUS_PREFETCHES
#define SIMULTANEOUS_PREFETCHES 3
#endif
/* True if write can be prefetched by a read prefetch. */
#ifndef WRITE_CAN_USE_READ_PREFETCH
@ -140,10 +127,12 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#define READ_CAN_USE_WRITE_PREFETCH 0
#endif
/* Cache line size. Assumed to be a power of two. */
/* The size of the block loaded by a single prefetch. Usually, this is
the same as cache line size (at the moment, we only consider one level
of cache hierarchy). */
#ifndef PREFETCH_BLOCK
#define PREFETCH_BLOCK 32
#define PREFETCH_BLOCK L1_CACHE_LINE_SIZE
#endif
/* Do we have a forward hardware sequential prefetching? */
@ -1026,6 +1015,19 @@ tree_ssa_prefetch_arrays (struct loops *loops)
|| PREFETCH_BLOCK == 0)
return 0;
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Prefetching parameters:\n");
fprintf (dump_file, " simultaneous prefetches: %d\n",
SIMULTANEOUS_PREFETCHES);
fprintf (dump_file, " prefetch latency: %d\n", PREFETCH_LATENCY);
fprintf (dump_file, " L1 cache size: %d (%d bytes)\n",
L1_CACHE_SIZE, L1_CACHE_SIZE * L1_CACHE_LINE_SIZE);
fprintf (dump_file, " L1 cache line size: %d\n", L1_CACHE_LINE_SIZE);
fprintf (dump_file, " prefetch block size: %d\n", PREFETCH_BLOCK);
fprintf (dump_file, "\n");
}
initialize_original_copy_tables ();
if (!built_in_decls[BUILT_IN_PREFETCH])