invoke.texi (-mslow-flash-data): Document new option.
gcc/ChangeLog 2013-11-25 Terry Guo <terry.guo@arm.com> * doc/invoke.texi (-mslow-flash-data): Document new option. * config/arm/arm.opt (mslow-flash-data): New option. * config/arm/arm-protos.h (arm_max_const_double_inline_cost): Declare it. * config/arm/arm.h (TARGET_USE_MOVT): Always true when literal pools are disabled. (arm_disable_literal_pool): Declare it. * config/arm/arm.c (arm_disable_literal_pool): New variable. (arm_option_override): Handle new option. (thumb2_legitimate_address_p): Don't allow symbol references when literal pools are disabled. (arm_max_const_double_inline_cost): New function. * config/arm/arm.md (types.md): Include it before ... (use_literal_pool): New attribute. (enabled): Use new attribute. (split pattern): Replace symbol+offset with MOVW/MOVT. gcc/testsuite/ChangeLog 2013-11-25 Terry Guo <terry.guo@arm.com> * gcc.target/arm/thumb2-slow-flash-data.c: New. From-SVN: r205342
This commit is contained in:
parent
36ef4e9d58
commit
02231c1350
@ -1,3 +1,22 @@
|
||||
2013-11-25 Terry Guo <terry.guo@arm.com>
|
||||
|
||||
* doc/invoke.texi (-mslow-flash-data): Document new option.
|
||||
* config/arm/arm.opt (mslow-flash-data): New option.
|
||||
* config/arm/arm-protos.h (arm_max_const_double_inline_cost): Declare
|
||||
it.
|
||||
* config/arm/arm.h (TARGET_USE_MOVT): Always true when literal pools
|
||||
are disabled.
|
||||
(arm_disable_literal_pool): Declare it.
|
||||
* config/arm/arm.c (arm_disable_literal_pool): New variable.
|
||||
(arm_option_override): Handle new option.
|
||||
(thumb2_legitimate_address_p): Don't allow symbol references when
|
||||
literal pools are disabled.
|
||||
(arm_max_const_double_inline_cost): New function.
|
||||
* config/arm/arm.md (types.md): Include it before ...
|
||||
(use_literal_pool): New attribute.
|
||||
(enabled): Use new attribute.
|
||||
(split pattern): Replace symbol+offset with MOVW/MOVT.
|
||||
|
||||
2013-11-24 Steven Bosscher <steven@gcc.gnu.org>
|
||||
|
||||
PR bootstrap/59279
|
||||
|
@ -121,6 +121,7 @@ extern rtx arm_gen_compare_reg (RTX_CODE, rtx, rtx, rtx);
|
||||
extern rtx arm_gen_return_addr_mask (void);
|
||||
extern void arm_reload_in_hi (rtx *);
|
||||
extern void arm_reload_out_hi (rtx *);
|
||||
extern int arm_max_const_double_inline_cost (void);
|
||||
extern int arm_const_double_inline_cost (rtx);
|
||||
extern bool arm_const_double_by_parts (rtx);
|
||||
extern bool arm_const_double_by_immediates (rtx);
|
||||
|
@ -869,6 +869,9 @@ int arm_arch_thumb_hwdiv;
|
||||
than core registers. */
|
||||
int prefer_neon_for_64bits = 0;
|
||||
|
||||
/* Nonzero if we shouldn't use literal pools. */
|
||||
bool arm_disable_literal_pool = false;
|
||||
|
||||
/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
|
||||
we must report the mode of the memory reference from
|
||||
TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
|
||||
@ -2573,6 +2576,16 @@ arm_option_override (void)
|
||||
if (TARGET_APCS_FRAME)
|
||||
flag_shrink_wrap = false;
|
||||
|
||||
/* We only support -mslow-flash-data on armv7-m targets. */
|
||||
if (target_slow_flash_data
|
||||
&& ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
|
||||
|| (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
|
||||
error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
|
||||
|
||||
/* Currently, for slow flash data, we just disable literal pools. */
|
||||
if (target_slow_flash_data)
|
||||
arm_disable_literal_pool = true;
|
||||
|
||||
/* Register global variables with the garbage collector. */
|
||||
arm_add_gc_roots ();
|
||||
}
|
||||
@ -6417,6 +6430,25 @@ thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
|
||||
&& thumb2_legitimate_index_p (mode, xop0, strict_p)));
|
||||
}
|
||||
|
||||
/* Normally we can assign constant values to target registers without
|
||||
the help of constant pool. But there are cases we have to use constant
|
||||
pool like:
|
||||
1) assign a label to register.
|
||||
2) sign-extend a 8bit value to 32bit and then assign to register.
|
||||
|
||||
Constant pool access in format:
|
||||
(set (reg r0) (mem (symbol_ref (".LC0"))))
|
||||
will cause the use of literal pool (later in function arm_reorg).
|
||||
So here we mark such format as an invalid format, then the compiler
|
||||
will adjust it into:
|
||||
(set (reg r0) (symbol_ref (".LC0")))
|
||||
(set (reg r0) (mem (reg r0))).
|
||||
No extra register is required, and (mem (reg r0)) won't cause the use
|
||||
of literal pools. */
|
||||
else if (arm_disable_literal_pool && code == SYMBOL_REF
|
||||
&& CONSTANT_POOL_ADDRESS_P (x))
|
||||
return 0;
|
||||
|
||||
else if (GET_MODE_CLASS (mode) != MODE_FLOAT
|
||||
&& code == SYMBOL_REF
|
||||
&& CONSTANT_POOL_ADDRESS_P (x)
|
||||
@ -16222,6 +16254,19 @@ push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
|
||||
minipool_fix_tail = fix;
|
||||
}
|
||||
|
||||
/* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
|
||||
Returns the number of insns needed, or 99 if we always want to synthesize
|
||||
the value. */
|
||||
int
|
||||
arm_max_const_double_inline_cost ()
|
||||
{
|
||||
/* Let the value get synthesized to avoid the use of literal pools. */
|
||||
if (arm_disable_literal_pool)
|
||||
return 99;
|
||||
|
||||
return ((optimize_size || arm_ld_sched) ? 3 : 4);
|
||||
}
|
||||
|
||||
/* Return the cost of synthesizing a 64-bit constant VAL inline.
|
||||
Returns the number of insns needed, or 99 if we don't know how to
|
||||
do it. */
|
||||
|
@ -329,7 +329,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
|
||||
|
||||
/* Should MOVW/MOVT be used in preference to a constant pool. */
|
||||
#define TARGET_USE_MOVT \
|
||||
(arm_arch_thumb2 && !optimize_size && !current_tune->prefer_constant_pool)
|
||||
(arm_arch_thumb2 \
|
||||
&& (arm_disable_literal_pool \
|
||||
|| (!optimize_size && !current_tune->prefer_constant_pool)))
|
||||
|
||||
/* We could use unified syntax for arm mode, but for now we just use it
|
||||
for Thumb-2. */
|
||||
@ -554,6 +556,11 @@ extern int arm_arch_thumb_hwdiv;
|
||||
than core registers. */
|
||||
extern int prefer_neon_for_64bits;
|
||||
|
||||
/* Nonzero if we shouldn't use literal pools. */
|
||||
#ifndef USED_FOR_TARGET
|
||||
extern bool arm_disable_literal_pool;
|
||||
#endif
|
||||
|
||||
#ifndef TARGET_DEFAULT
|
||||
#define TARGET_DEFAULT (MASK_APCS_FRAME)
|
||||
#endif
|
||||
|
@ -82,6 +82,9 @@
|
||||
;; Processor type. This is created automatically from arm-cores.def.
|
||||
(include "arm-tune.md")
|
||||
|
||||
;; Instruction classification types
|
||||
(include "types.md")
|
||||
|
||||
; IS_THUMB is set to 'yes' when we are generating Thumb code, and 'no' when
|
||||
; generating ARM code. This is used to control the length of some insn
|
||||
; patterns that share the same RTL in both ARM and Thumb code.
|
||||
@ -191,6 +194,12 @@
|
||||
(const_string "yes")]
|
||||
(const_string "no")))
|
||||
|
||||
(define_attr "use_literal_pool" "no,yes"
|
||||
(cond [(and (eq_attr "type" "f_loads,f_loadd")
|
||||
(match_test "CONSTANT_P (operands[1])"))
|
||||
(const_string "yes")]
|
||||
(const_string "no")))
|
||||
|
||||
; Allows an insn to disable certain alternatives for reasons other than
|
||||
; arch support.
|
||||
(define_attr "insn_enabled" "no,yes"
|
||||
@ -210,6 +219,10 @@
|
||||
(match_test "arm_restrict_it"))
|
||||
(const_string "no")
|
||||
|
||||
(and (eq_attr "use_literal_pool" "yes")
|
||||
(match_test "arm_disable_literal_pool"))
|
||||
(const_string "no")
|
||||
|
||||
(eq_attr "arch_enabled" "no")
|
||||
(const_string "no")
|
||||
|
||||
@ -245,9 +258,6 @@
|
||||
(set_attr "length" "4")
|
||||
(set_attr "pool_range" "250")])
|
||||
|
||||
;; Instruction classification types
|
||||
(include "types.md")
|
||||
|
||||
; Load scheduling, set from the arm_ld_sched variable
|
||||
; initialized by arm_option_override()
|
||||
(define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))
|
||||
@ -6049,7 +6059,7 @@
|
||||
"TARGET_32BIT
|
||||
&& reload_completed
|
||||
&& (arm_const_double_inline_cost (operands[1])
|
||||
<= ((optimize_size || arm_ld_sched) ? 3 : 4))"
|
||||
<= arm_max_const_double_inline_cost ())"
|
||||
[(const_int 0)]
|
||||
"
|
||||
arm_split_constant (SET, SImode, curr_insn,
|
||||
@ -6312,6 +6322,47 @@
|
||||
"
|
||||
)
|
||||
|
||||
;; A normal way to do (symbol + offset) requires three instructions at least
|
||||
;; (depends on how big the offset is) as below:
|
||||
;; movw r0, #:lower16:g
|
||||
;; movw r0, #:upper16:g
|
||||
;; adds r0, #4
|
||||
;;
|
||||
;; A better way would be:
|
||||
;; movw r0, #:lower16:g+4
|
||||
;; movw r0, #:upper16:g+4
|
||||
;;
|
||||
;; The limitation of this way is that the length of offset should be a 16-bit
|
||||
;; signed value, because current assembler only supports REL type relocation for
|
||||
;; such case. If the more powerful RELA type is supported in future, we should
|
||||
;; update this pattern to go with better way.
|
||||
(define_split
|
||||
[(set (match_operand:SI 0 "arm_general_register_operand" "")
|
||||
(const:SI (plus:SI (match_operand:SI 1 "general_operand" "")
|
||||
(match_operand:SI 2 "const_int_operand" ""))))]
|
||||
"TARGET_THUMB2
|
||||
&& arm_disable_literal_pool
|
||||
&& reload_completed
|
||||
&& GET_CODE (operands[1]) == SYMBOL_REF"
|
||||
[(clobber (const_int 0))]
|
||||
"
|
||||
int offset = INTVAL (operands[2]);
|
||||
|
||||
if (offset < -0x8000 || offset > 0x7fff)
|
||||
{
|
||||
arm_emit_movpair (operands[0], operands[1]);
|
||||
emit_insn (gen_rtx_SET (SImode, operands[0],
|
||||
gen_rtx_PLUS (SImode, operands[0], operands[2])));
|
||||
}
|
||||
else
|
||||
{
|
||||
rtx op = gen_rtx_CONST (SImode,
|
||||
gen_rtx_PLUS (SImode, operands[1], operands[2]));
|
||||
arm_emit_movpair (operands[0], op);
|
||||
}
|
||||
"
|
||||
)
|
||||
|
||||
;; Split symbol_refs at the later stage (after cprop), instead of generating
|
||||
;; movt/movw pair directly at expand. Otherwise corresponding high_sum
|
||||
;; and lo_sum would be merged back into memory load at cprop. However,
|
||||
|
@ -271,3 +271,7 @@ Enable unaligned word and halfword accesses to packed data.
|
||||
mneon-for-64bits
|
||||
Target Report RejectNegative Var(use_neon_for_64bits) Init(0)
|
||||
Use Neon to perform 64-bits operations rather than core registers.
|
||||
|
||||
mslow-flash-data
|
||||
Target Report Var(target_slow_flash_data) Init(0)
|
||||
Assume loading data from flash is slower than fetching instructions.
|
||||
|
@ -533,6 +533,7 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-mfix-cortex-m3-ldrd @gol
|
||||
-munaligned-access @gol
|
||||
-mneon-for-64bits @gol
|
||||
-mslow-flash-data @gol
|
||||
-mrestrict-it}
|
||||
|
||||
@emph{AVR Options}
|
||||
@ -12345,6 +12346,13 @@ Enables using Neon to handle scalar 64-bits operations. This is
|
||||
disabled by default since the cost of moving data from core registers
|
||||
to Neon is high.
|
||||
|
||||
@item -mslow-flash-data
|
||||
@opindex mslow-flash-data
|
||||
Assume loading data from flash is slower than fetching instruction.
|
||||
Therefore literal load is minimized for better performance.
|
||||
This option is only supported when compiling for ARMv7 M-profile and
|
||||
off by default.
|
||||
|
||||
@item -mrestrict-it
|
||||
@opindex mrestrict-it
|
||||
Restricts generation of IT blocks to conform to the rules of ARMv8.
|
||||
|
@ -1,3 +1,7 @@
|
||||
2013-11-25 Terry Guo <terry.guo@arm.com>
|
||||
|
||||
* gcc.target/arm/thumb2-slow-flash-data.c: New.
|
||||
|
||||
2013-11-23 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* gcc.dg/float-exact-1.c: Use dg-add-options ieee.
|
||||
|
74
gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data.c
Normal file
74
gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data.c
Normal file
@ -0,0 +1,74 @@
|
||||
/* The option -mslow-flash-data is just for performance tuning, it
|
||||
doesn't totally disable the use of literal pools. But for below
|
||||
simple cases, the use of literal pool should be replaced by
|
||||
movw/movt or read-only constant pool. */
|
||||
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_cortex_m } */
|
||||
/* { dg-require-effective-target arm_thumb2_ok } */
|
||||
/* { dg-options "-O2 -mthumb -mslow-flash-data" } */
|
||||
|
||||
float sf;
|
||||
double df;
|
||||
long long l;
|
||||
static char *p = "Hello World";
|
||||
|
||||
float
|
||||
testsf (float *p)
|
||||
{
|
||||
if (*p > 1.1234f)
|
||||
return 2.1234f;
|
||||
else
|
||||
return 3.1234f;
|
||||
}
|
||||
|
||||
double
|
||||
testdf (double *p)
|
||||
{
|
||||
if (*p > 4.1234)
|
||||
return 2.1234;
|
||||
else
|
||||
return 3.1234;
|
||||
}
|
||||
|
||||
long long
|
||||
testll (long long *p)
|
||||
{
|
||||
if (*p > 0x123456789ABCDEFll)
|
||||
return 0x111111111ll;
|
||||
else
|
||||
return 0x222222222ll;
|
||||
}
|
||||
|
||||
char *
|
||||
testchar ()
|
||||
{
|
||||
return p + 4;
|
||||
}
|
||||
|
||||
int
|
||||
foo (int a, int b)
|
||||
{
|
||||
int i;
|
||||
volatile *labelref = &&label1;
|
||||
|
||||
if (a > b)
|
||||
{
|
||||
while (i < b)
|
||||
{
|
||||
a += *labelref;
|
||||
i += 1;
|
||||
}
|
||||
goto *labelref;
|
||||
}
|
||||
else
|
||||
b = b + 3;
|
||||
|
||||
a = a * b;
|
||||
|
||||
label1:
|
||||
return a + b;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movt" 13 } } */
|
||||
/* { dg-final { scan-assembler-times "movt.*LC0\\+4" 1 } } */
|
Loading…
Reference in New Issue
Block a user