arm-protos.h (tune_params): New structure.
* arm-protos.h (tune_params): New structure. * arm.c (current_tune): New variable. (arm_constant_limit): Delete. (struct processors): Add pointer to the tune parameters. (arm_slowmul_tune): New tuning option. (arm_fastmul_tune, arm_xscale_tune, arm_9e_tune): Likewise. (all_cores): Adjust to pick up the tuning model. (arm_constant_limit): New function. (arm_override_options): Select the appropriate tuning model. Delete initialization of arm_const_limit. (arm_split_constant): Use the new constant-limit model. (arm_rtx_costs): Pick up the current tuning model. * arm.md (is_strongarm, is_xscale): Delete. * arm-generic.md (load_ldsched_x, load_ldsched): Test explicitly for Xscale variant architectures. (mult_ldsched_strongarm, mult_ldsched): Similarly for StrongARM. From-SVN: r158463
This commit is contained in:
parent
6936c61a5c
commit
1b78f5757a
|
@ -1,6 +1,26 @@
|
|||
2010-04-17 Richard Earnshaw <rearnsha@arm.com>
|
||||
|
||||
* arm-protos.h (tune_params): New structure.
|
||||
* arm.c (current_tune): New variable.
|
||||
(arm_constant_limit): Delete.
|
||||
(struct processors): Add pointer to the tune parameters.
|
||||
(arm_slowmul_tune): New tuning option.
|
||||
(arm_fastmul_tune, arm_xscale_tune, arm_9e_tune): Likewise.
|
||||
(all_cores): Adjust to pick up the tuning model.
|
||||
(arm_constant_limit): New function.
|
||||
(arm_override_options): Select the appropriate tuning model. Delete
|
||||
initialization of arm_const_limit.
|
||||
(arm_split_constant): Use the new constant-limit model.
|
||||
(arm_rtx_costs): Pick up the current tuning model.
|
||||
* arm.md (is_strongarm, is_xscale): Delete.
|
||||
* arm-generic.md (load_ldsched_x, load_ldsched): Test explicitly
|
||||
for Xscale variant architectures.
|
||||
(mult_ldsched_strongarm, mult_ldsched): Similarly for StrongARM.
|
||||
|
||||
2010-04-17 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
|
||||
|
||||
* config/arm/arm.c (arm_gen_constant): Remove unused variable can_shift.
|
||||
* config/arm/arm.c (arm_gen_constant): Remove unused variable
|
||||
can_shift.
|
||||
(arm_rtx_costs_1): Remove unused variable extra_cost.
|
||||
(arm_unwind_emit_set): Use variable offset.
|
||||
(thumb1_output_casesi): Remove unused variable flags.
|
||||
|
|
|
@ -104,14 +104,14 @@
|
|||
(and (eq_attr "generic_sched" "yes")
|
||||
(and (eq_attr "ldsched" "yes")
|
||||
(and (eq_attr "type" "load_byte,load1")
|
||||
(eq_attr "is_xscale" "yes"))))
|
||||
(eq_attr "tune" "xscale,iwmmxt,iwmmxt2"))))
|
||||
"core")
|
||||
|
||||
(define_insn_reservation "load_ldsched" 2
|
||||
(and (eq_attr "generic_sched" "yes")
|
||||
(and (eq_attr "ldsched" "yes")
|
||||
(and (eq_attr "type" "load_byte,load1")
|
||||
(eq_attr "is_xscale" "no"))))
|
||||
(eq_attr "tune" "!xscale,iwmmxt,iwmmxt2"))))
|
||||
"core")
|
||||
|
||||
(define_insn_reservation "load_or_store" 2
|
||||
|
@ -128,14 +128,16 @@
|
|||
(define_insn_reservation "mult_ldsched_strongarm" 3
|
||||
(and (eq_attr "generic_sched" "yes")
|
||||
(and (eq_attr "ldsched" "yes")
|
||||
(and (eq_attr "is_strongarm" "yes")
|
||||
(and (eq_attr "tune"
|
||||
"strongarm,strongarm110,strongarm1100,strongarm1110")
|
||||
(eq_attr "type" "mult"))))
|
||||
"core*2")
|
||||
|
||||
(define_insn_reservation "mult_ldsched" 4
|
||||
(and (eq_attr "generic_sched" "yes")
|
||||
(and (eq_attr "ldsched" "yes")
|
||||
(and (eq_attr "is_strongarm" "no")
|
||||
(and (eq_attr "tune"
|
||||
"!strongarm,strongarm110,strongarm1100,strongarm1110")
|
||||
(eq_attr "type" "mult"))))
|
||||
"core*4")
|
||||
|
||||
|
|
|
@ -213,4 +213,16 @@ extern const char *arm_mangle_type (const_tree);
|
|||
|
||||
extern void arm_order_regs_for_local_alloc (void);
|
||||
|
||||
#ifdef RTX_CODE
|
||||
/* This needs to be here because we need RTX_CODE and similar. */
|
||||
|
||||
struct tune_params
|
||||
{
|
||||
bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
|
||||
int constant_limit;
|
||||
};
|
||||
|
||||
extern const struct tune_params *current_tune;
|
||||
#endif /* RTX_CODE */
|
||||
|
||||
#endif /* ! GCC_ARM_PROTOS_H */
|
||||
|
|
|
@ -526,6 +526,9 @@ int making_const_table;
|
|||
/* The processor for which instructions should be scheduled. */
|
||||
enum processor_type arm_tune = arm_none;
|
||||
|
||||
/* The current tuning set. */
|
||||
const struct tune_params *current_tune;
|
||||
|
||||
/* The default processor used if not overridden by commandline. */
|
||||
static enum processor_type arm_default_cpu = arm_none;
|
||||
|
||||
|
@ -698,9 +701,6 @@ unsigned arm_pic_register = INVALID_REGNUM;
|
|||
the next function. */
|
||||
static int after_arm_reorg = 0;
|
||||
|
||||
/* The maximum number of insns to be used when loading a constant. */
|
||||
static int arm_constant_limit = 3;
|
||||
|
||||
static enum arm_pcs arm_pcs_default;
|
||||
|
||||
/* For an explanation of these variables, see final_prescan_insn below. */
|
||||
|
@ -739,7 +739,31 @@ struct processors
|
|||
enum processor_type core;
|
||||
const char *arch;
|
||||
const unsigned long flags;
|
||||
bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
|
||||
const struct tune_params *const tune;
|
||||
};
|
||||
|
||||
const struct tune_params arm_slowmul_tune =
|
||||
{
|
||||
arm_slowmul_rtx_costs,
|
||||
3
|
||||
};
|
||||
|
||||
const struct tune_params arm_fastmul_tune =
|
||||
{
|
||||
arm_fastmul_rtx_costs,
|
||||
1
|
||||
};
|
||||
|
||||
const struct tune_params arm_xscale_tune =
|
||||
{
|
||||
arm_xscale_rtx_costs,
|
||||
2
|
||||
};
|
||||
|
||||
const struct tune_params arm_9e_tune =
|
||||
{
|
||||
arm_9e_rtx_costs,
|
||||
1
|
||||
};
|
||||
|
||||
/* Not all of these give usefully different compilation alternatives,
|
||||
|
@ -748,7 +772,7 @@ static const struct processors all_cores[] =
|
|||
{
|
||||
/* ARM Cores */
|
||||
#define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
|
||||
{NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
|
||||
{NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
|
||||
#include "arm-cores.def"
|
||||
#undef ARM_CORE
|
||||
{NULL, arm_none, NULL, 0, NULL}
|
||||
|
@ -757,7 +781,7 @@ static const struct processors all_cores[] =
|
|||
static const struct processors all_architectures[] =
|
||||
{
|
||||
/* ARM Architectures */
|
||||
/* We don't specify rtx_costs here as it will be figured out
|
||||
/* We don't specify tuning costs here as it will be figured out
|
||||
from the core. */
|
||||
|
||||
{"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
|
||||
|
@ -906,6 +930,13 @@ enum tls_reloc {
|
|||
TLS_LE32
|
||||
};
|
||||
|
||||
/* The maximum number of insns to be used when loading a constant. */
|
||||
inline static int
|
||||
arm_constant_limit (bool size_p)
|
||||
{
|
||||
return size_p ? 1 : current_tune->constant_limit;
|
||||
}
|
||||
|
||||
/* Emit an insn that's a simple single-set. Both the operands must be known
|
||||
to be valid. */
|
||||
inline static rtx
|
||||
|
@ -1446,6 +1477,7 @@ arm_override_options (void)
|
|||
gcc_assert (arm_tune != arm_none);
|
||||
|
||||
tune_flags = all_cores[(int)arm_tune].flags;
|
||||
current_tune = all_cores[(int)arm_tune].tune;
|
||||
|
||||
if (target_fp16_format_name)
|
||||
{
|
||||
|
@ -1842,26 +1874,12 @@ arm_override_options (void)
|
|||
|
||||
if (optimize_size)
|
||||
{
|
||||
arm_constant_limit = 1;
|
||||
|
||||
/* If optimizing for size, bump the number of instructions that we
|
||||
are prepared to conditionally execute (even on a StrongARM). */
|
||||
max_insns_skipped = 6;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* For processors with load scheduling, it never costs more than
|
||||
2 cycles to load a constant, and the load scheduler may well
|
||||
reduce that to 1. */
|
||||
if (arm_ld_sched)
|
||||
arm_constant_limit = 1;
|
||||
|
||||
/* On XScale the longer latency of a load makes it more difficult
|
||||
to achieve a good schedule, so it's faster to synthesize
|
||||
constants that can be done in two insns. */
|
||||
if (arm_tune_xscale)
|
||||
arm_constant_limit = 2;
|
||||
|
||||
/* StrongARM has early execution of branches, so a sequence
|
||||
that is worth skipping is shorter. */
|
||||
if (arm_tune_strongarm)
|
||||
|
@ -2362,7 +2380,8 @@ arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
|
|||
&& !cond
|
||||
&& (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
|
||||
1, 0)
|
||||
> arm_constant_limit + (code != SET)))
|
||||
> (arm_constant_limit (optimize_function_for_size_p (cfun))
|
||||
+ (code != SET))))
|
||||
{
|
||||
if (code == SET)
|
||||
{
|
||||
|
@ -7294,9 +7313,9 @@ arm_rtx_costs (rtx x, int code, int outer_code, int *total,
|
|||
return arm_size_rtx_costs (x, (enum rtx_code) code,
|
||||
(enum rtx_code) outer_code, total);
|
||||
else
|
||||
return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code,
|
||||
(enum rtx_code) outer_code,
|
||||
total, speed);
|
||||
return current_tune->rtx_costs (x, (enum rtx_code) code,
|
||||
(enum rtx_code) outer_code,
|
||||
total, speed);
|
||||
}
|
||||
|
||||
/* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
|
||||
|
@ -7441,7 +7460,8 @@ arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
|
|||
so it can be ignored. */
|
||||
|
||||
static bool
|
||||
arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
|
||||
arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
|
||||
int *total, bool speed)
|
||||
{
|
||||
enum machine_mode mode = GET_MODE (x);
|
||||
|
||||
|
|
|
@ -148,13 +148,6 @@
|
|||
; patterns that share the same RTL in both ARM and Thumb code.
|
||||
(define_attr "is_thumb" "no,yes" (const (symbol_ref "thumb_code")))
|
||||
|
||||
; IS_STRONGARM is set to 'yes' when compiling for StrongARM, it affects
|
||||
; scheduling decisions for the load unit and the multiplier.
|
||||
(define_attr "is_strongarm" "no,yes" (const (symbol_ref "arm_tune_strongarm")))
|
||||
|
||||
; IS_XSCALE is set to 'yes' when compiling for XScale.
|
||||
(define_attr "is_xscale" "no,yes" (const (symbol_ref "arm_tune_xscale")))
|
||||
|
||||
;; Operand number of an input operand that is shifted. Zero if the
|
||||
;; given instruction does not shift one of its input operands.
|
||||
(define_attr "shift" "" (const_int 0))
|
||||
|
|
Loading…
Reference in New Issue