invoke.texi (-mslow-flash-data): Document new option.

gcc/ChangeLog
2013-11-25  Terry Guo  <terry.guo@arm.com>

	* doc/invoke.texi (-mslow-flash-data): Document new option.
	* config/arm/arm.opt (mslow-flash-data): New option.
	* config/arm/arm-protos.h (arm_max_const_double_inline_cost): Declare
	it.
	* config/arm/arm.h (TARGET_USE_MOVT): Always true when literal pools
	are disabled.
	(arm_disable_literal_pool): Declare it.
	* config/arm/arm.c (arm_disable_literal_pool): New variable.
	(arm_option_override): Handle new option.
	(thumb2_legitimate_address_p): Don't allow symbol references when
	literal pools are disabled.
	(arm_max_const_double_inline_cost): New function.
	* config/arm/arm.md (types.md): Include it before ...
	(use_literal_pool): New attribute.
	(enabled): Use new attribute.
	(split pattern): Replace symbol+offset with MOVW/MOVT.

gcc/testsuite/ChangeLog
2013-11-25  Terry Guo  <terry.guo@arm.com>

	* gcc.target/arm/thumb2-slow-flash-data.c: New.

From-SVN: r205342
This commit is contained in:
Terry Guo 2013-11-25 06:41:20 +00:00 committed by Xuepeng Guo
parent 36ef4e9d58
commit 02231c1350
9 changed files with 218 additions and 5 deletions

View File

@ -1,3 +1,22 @@
2013-11-25 Terry Guo <terry.guo@arm.com>
* doc/invoke.texi (-mslow-flash-data): Document new option.
* config/arm/arm.opt (mslow-flash-data): New option.
* config/arm/arm-protos.h (arm_max_const_double_inline_cost): Declare
it.
* config/arm/arm.h (TARGET_USE_MOVT): Always true when literal pools
are disabled.
(arm_disable_literal_pool): Declare it.
* config/arm/arm.c (arm_disable_literal_pool): New variable.
(arm_option_override): Handle new option.
(thumb2_legitimate_address_p): Don't allow symbol references when
literal pools are disabled.
(arm_max_const_double_inline_cost): New function.
* config/arm/arm.md (types.md): Include it before ...
(use_literal_pool): New attribute.
(enabled): Use new attribute.
(split pattern): Replace symbol+offset with MOVW/MOVT.
2013-11-24 Steven Bosscher <steven@gcc.gnu.org>
PR bootstrap/59279

View File

@ -121,6 +121,7 @@ extern rtx arm_gen_compare_reg (RTX_CODE, rtx, rtx, rtx);
extern rtx arm_gen_return_addr_mask (void);
extern void arm_reload_in_hi (rtx *);
extern void arm_reload_out_hi (rtx *);
extern int arm_max_const_double_inline_cost (void);
extern int arm_const_double_inline_cost (rtx);
extern bool arm_const_double_by_parts (rtx);
extern bool arm_const_double_by_immediates (rtx);

View File

@ -869,6 +869,9 @@ int arm_arch_thumb_hwdiv;
than core registers. */
int prefer_neon_for_64bits = 0;
/* Nonzero if we shouldn't use literal pools. */
bool arm_disable_literal_pool = false;
/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
we must report the mode of the memory reference from
TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
@ -2573,6 +2576,16 @@ arm_option_override (void)
if (TARGET_APCS_FRAME)
flag_shrink_wrap = false;
/* We only support -mslow-flash-data on armv7-m targets. */
if (target_slow_flash_data
&& ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
|| (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
/* Currently, for slow flash data, we just disable literal pools. */
if (target_slow_flash_data)
arm_disable_literal_pool = true;
/* Register global variables with the garbage collector. */
arm_add_gc_roots ();
}
@ -6417,6 +6430,25 @@ thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
&& thumb2_legitimate_index_p (mode, xop0, strict_p)));
}
/* Normally we can assign constant values to target registers without
the help of constant pool. But there are cases we have to use constant
pool like:
1) assign a label to register.
2) sign-extend a 8bit value to 32bit and then assign to register.
Constant pool access in format:
(set (reg r0) (mem (symbol_ref (".LC0"))))
will cause the use of literal pool (later in function arm_reorg).
So here we mark such format as an invalid format, then the compiler
will adjust it into:
(set (reg r0) (symbol_ref (".LC0")))
(set (reg r0) (mem (reg r0))).
No extra register is required, and (mem (reg r0)) won't cause the use
of literal pools. */
else if (arm_disable_literal_pool && code == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x))
return 0;
else if (GET_MODE_CLASS (mode) != MODE_FLOAT
&& code == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x)
@ -16222,6 +16254,19 @@ push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
minipool_fix_tail = fix;
}
/* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
Returns the number of insns needed, or 99 if we always want to synthesize
the value. */
int
arm_max_const_double_inline_cost ()
{
/* Let the value get synthesized to avoid the use of literal pools. */
if (arm_disable_literal_pool)
return 99;
return ((optimize_size || arm_ld_sched) ? 3 : 4);
}
/* Return the cost of synthesizing a 64-bit constant VAL inline.
Returns the number of insns needed, or 99 if we don't know how to
do it. */

View File

@ -329,7 +329,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
/* Should MOVW/MOVT be used in preference to a constant pool. */
#define TARGET_USE_MOVT \
(arm_arch_thumb2 && !optimize_size && !current_tune->prefer_constant_pool)
(arm_arch_thumb2 \
&& (arm_disable_literal_pool \
|| (!optimize_size && !current_tune->prefer_constant_pool)))
/* We could use unified syntax for arm mode, but for now we just use it
for Thumb-2. */
@ -554,6 +556,11 @@ extern int arm_arch_thumb_hwdiv;
than core registers. */
extern int prefer_neon_for_64bits;
/* Nonzero if we shouldn't use literal pools. */
#ifndef USED_FOR_TARGET
extern bool arm_disable_literal_pool;
#endif
#ifndef TARGET_DEFAULT
#define TARGET_DEFAULT (MASK_APCS_FRAME)
#endif

View File

@ -82,6 +82,9 @@
;; Processor type. This is created automatically from arm-cores.def.
(include "arm-tune.md")
;; Instruction classification types
(include "types.md")
; IS_THUMB is set to 'yes' when we are generating Thumb code, and 'no' when
; generating ARM code. This is used to control the length of some insn
; patterns that share the same RTL in both ARM and Thumb code.
@ -191,6 +194,12 @@
(const_string "yes")]
(const_string "no")))
(define_attr "use_literal_pool" "no,yes"
(cond [(and (eq_attr "type" "f_loads,f_loadd")
(match_test "CONSTANT_P (operands[1])"))
(const_string "yes")]
(const_string "no")))
; Allows an insn to disable certain alternatives for reasons other than
; arch support.
(define_attr "insn_enabled" "no,yes"
@ -210,6 +219,10 @@
(match_test "arm_restrict_it"))
(const_string "no")
(and (eq_attr "use_literal_pool" "yes")
(match_test "arm_disable_literal_pool"))
(const_string "no")
(eq_attr "arch_enabled" "no")
(const_string "no")
@ -245,9 +258,6 @@
(set_attr "length" "4")
(set_attr "pool_range" "250")])
;; Instruction classification types
(include "types.md")
; Load scheduling, set from the arm_ld_sched variable
; initialized by arm_option_override()
(define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))
@ -6049,7 +6059,7 @@
"TARGET_32BIT
&& reload_completed
&& (arm_const_double_inline_cost (operands[1])
<= ((optimize_size || arm_ld_sched) ? 3 : 4))"
<= arm_max_const_double_inline_cost ())"
[(const_int 0)]
"
arm_split_constant (SET, SImode, curr_insn,
@ -6312,6 +6322,47 @@
"
)
;; A normal way to do (symbol + offset) requires three instructions at least
;; (depends on how big the offset is) as below:
;; movw r0, #:lower16:g
;; movw r0, #:upper16:g
;; adds r0, #4
;;
;; A better way would be:
;; movw r0, #:lower16:g+4
;; movw r0, #:upper16:g+4
;;
;; The limitation of this way is that the length of offset should be a 16-bit
;; signed value, because current assembler only supports REL type relocation for
;; such case. If the more powerful RELA type is supported in future, we should
;; update this pattern to go with better way.
(define_split
[(set (match_operand:SI 0 "arm_general_register_operand" "")
(const:SI (plus:SI (match_operand:SI 1 "general_operand" "")
(match_operand:SI 2 "const_int_operand" ""))))]
"TARGET_THUMB2
&& arm_disable_literal_pool
&& reload_completed
&& GET_CODE (operands[1]) == SYMBOL_REF"
[(clobber (const_int 0))]
"
int offset = INTVAL (operands[2]);
if (offset < -0x8000 || offset > 0x7fff)
{
arm_emit_movpair (operands[0], operands[1]);
emit_insn (gen_rtx_SET (SImode, operands[0],
gen_rtx_PLUS (SImode, operands[0], operands[2])));
}
else
{
rtx op = gen_rtx_CONST (SImode,
gen_rtx_PLUS (SImode, operands[1], operands[2]));
arm_emit_movpair (operands[0], op);
}
"
)
;; Split symbol_refs at the later stage (after cprop), instead of generating
;; movt/movw pair directly at expand. Otherwise corresponding high_sum
;; and lo_sum would be merged back into memory load at cprop. However,

View File

@ -271,3 +271,7 @@ Enable unaligned word and halfword accesses to packed data.
mneon-for-64bits
Target Report RejectNegative Var(use_neon_for_64bits) Init(0)
Use Neon to perform 64-bits operations rather than core registers.
mslow-flash-data
Target Report Var(target_slow_flash_data) Init(0)
Assume loading data from flash is slower than fetching instructions.

View File

@ -533,6 +533,7 @@ Objective-C and Objective-C++ Dialects}.
-mfix-cortex-m3-ldrd @gol
-munaligned-access @gol
-mneon-for-64bits @gol
-mslow-flash-data @gol
-mrestrict-it}
@emph{AVR Options}
@ -12345,6 +12346,13 @@ Enables using Neon to handle scalar 64-bits operations. This is
disabled by default since the cost of moving data from core registers
to Neon is high.
@item -mslow-flash-data
@opindex mslow-flash-data
Assume loading data from flash is slower than fetching instruction.
Therefore literal load is minimized for better performance.
This option is only supported when compiling for ARMv7 M-profile and
off by default.
@item -mrestrict-it
@opindex mrestrict-it
Restricts generation of IT blocks to conform to the rules of ARMv8.

View File

@ -1,3 +1,7 @@
2013-11-25 Terry Guo <terry.guo@arm.com>
* gcc.target/arm/thumb2-slow-flash-data.c: New.
2013-11-23 Uros Bizjak <ubizjak@gmail.com>
* gcc.dg/float-exact-1.c: Use dg-add-options ieee.

View File

@ -0,0 +1,74 @@
/* The option -mslow-flash-data is just for performance tuning, it
doesn't totally disable the use of literal pools. But for below
simple cases, the use of literal pool should be replaced by
movw/movt or read-only constant pool. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_cortex_m } */
/* { dg-require-effective-target arm_thumb2_ok } */
/* { dg-options "-O2 -mthumb -mslow-flash-data" } */
float sf;
double df;
long long l;
static char *p = "Hello World";
float
testsf (float *p)
{
if (*p > 1.1234f)
return 2.1234f;
else
return 3.1234f;
}
double
testdf (double *p)
{
if (*p > 4.1234)
return 2.1234;
else
return 3.1234;
}
long long
testll (long long *p)
{
if (*p > 0x123456789ABCDEFll)
return 0x111111111ll;
else
return 0x222222222ll;
}
char *
testchar ()
{
return p + 4;
}
int
foo (int a, int b)
{
int i;
volatile *labelref = &&label1;
if (a > b)
{
while (i < b)
{
a += *labelref;
i += 1;
}
goto *labelref;
}
else
b = b + 3;
a = a * b;
label1:
return a + b;
}
/* { dg-final { scan-assembler-times "movt" 13 } } */
/* { dg-final { scan-assembler-times "movt.*LC0\\+4" 1 } } */