[multiple changes]
2013-11-14 Julian Brown <julian@codesourcery.com> Joey Ye <joey.ye@arm.com> * config/arm/arm.c (arm_cortex_m_branch_cost): New. (arm_v7m_tune): New. (arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune, arm_cortex_a15_tune, arm_cortex_a5_tune, arm_v6m_tune): Add comments for Sched adj cost. * config/arm/arm-cores.def (cortex-m4, cortex-m3): Use arm_v7m_tune. testsuite: 2013-11-14 Joey Ye <joey.ye@arm.com> * gcc.dg/tree-ssa/forwprop-28.c: Disable for cortex_m. * gcc.dg/tree-ssa/vrp47.c: Likewise. * gcc.dg/tree-ssa/vrp87.c: Likewise. * gcc.dg/tree-ssa/ssa-dom-thread-4.c: Ingore for cortex_m. * gcc.dg/tree-ssa/ssa-vrp-thread-1.c: Likewise. From-SVN: r204778
This commit is contained in:
parent
2430d1e263
commit
571880a0a4
@ -1,3 +1,16 @@
|
||||
2013-11-14 Julian Brown <julian@codesourcery.com>
|
||||
Joey Ye <joey.ye@arm.com>
|
||||
|
||||
* config/arm/arm.c (arm_cortex_m_branch_cost): New.
|
||||
(arm_v7m_tune): New.
|
||||
(arm_slowmul_tune, arm_fastmul_tune,
|
||||
arm_strongarm_tune, arm_9e_tune, arm_v6t2_tune,
|
||||
arm_cortex_tune, arm_cortex_a15_tune,
|
||||
arm_cortex_a5_tune, arm_v6m_tune): Add comments
|
||||
for Sched adj cost.
|
||||
* config/arm/arm-cores.def (cortex-m4, cortex-m3):
|
||||
Use arm_v7m_tune.
|
||||
|
||||
2013-11-14 Kirill Yukhin <kirill.yukhin@intel.com>
|
||||
|
||||
PR target/57491
|
||||
|
@ -134,8 +134,8 @@ ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex)
|
||||
ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex)
|
||||
ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
|
||||
ARM_CORE("cortex-r7", cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
|
||||
ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, cortex)
|
||||
ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex)
|
||||
ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, v7m)
|
||||
ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, v7m)
|
||||
ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, v6m)
|
||||
ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, v6m)
|
||||
ARM_CORE("cortex-m0plus", cortexm0plus, 6M, FL_LDSCHED, v6m)
|
||||
|
@ -266,6 +266,7 @@ static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
|
||||
static unsigned int arm_autovectorize_vector_sizes (void);
|
||||
static int arm_default_branch_cost (bool, bool);
|
||||
static int arm_cortex_a5_branch_cost (bool, bool);
|
||||
static int arm_cortex_m_branch_cost (bool, bool);
|
||||
|
||||
static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
|
||||
const unsigned char *sel);
|
||||
@ -1260,7 +1261,7 @@ const struct tune_params arm_slowmul_tune =
|
||||
{
|
||||
arm_slowmul_rtx_costs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL, /* Sched adj cost. */
|
||||
3, /* Constant limit. */
|
||||
5, /* Max cond insns. */
|
||||
ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
@ -1276,7 +1277,7 @@ const struct tune_params arm_fastmul_tune =
|
||||
{
|
||||
arm_fastmul_rtx_costs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL, /* Sched adj cost. */
|
||||
1, /* Constant limit. */
|
||||
5, /* Max cond insns. */
|
||||
ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
@ -1295,7 +1296,7 @@ const struct tune_params arm_strongarm_tune =
|
||||
{
|
||||
arm_fastmul_rtx_costs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL, /* Sched adj cost. */
|
||||
1, /* Constant limit. */
|
||||
3, /* Max cond insns. */
|
||||
ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
@ -1327,7 +1328,7 @@ const struct tune_params arm_9e_tune =
|
||||
{
|
||||
arm_9e_rtx_costs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL, /* Sched adj cost. */
|
||||
1, /* Constant limit. */
|
||||
5, /* Max cond insns. */
|
||||
ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
@ -1343,7 +1344,7 @@ const struct tune_params arm_v6t2_tune =
|
||||
{
|
||||
arm_9e_rtx_costs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL, /* Sched adj cost. */
|
||||
1, /* Constant limit. */
|
||||
5, /* Max cond insns. */
|
||||
ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
@ -1360,7 +1361,7 @@ const struct tune_params arm_cortex_tune =
|
||||
{
|
||||
arm_9e_rtx_costs,
|
||||
&generic_extra_costs,
|
||||
NULL,
|
||||
NULL, /* Sched adj cost. */
|
||||
1, /* Constant limit. */
|
||||
5, /* Max cond insns. */
|
||||
ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
@ -1392,7 +1393,7 @@ const struct tune_params arm_cortex_a15_tune =
|
||||
{
|
||||
arm_9e_rtx_costs,
|
||||
&cortexa15_extra_costs,
|
||||
NULL,
|
||||
NULL, /* Sched adj cost. */
|
||||
1, /* Constant limit. */
|
||||
2, /* Max cond insns. */
|
||||
ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
@ -1411,7 +1412,7 @@ const struct tune_params arm_cortex_a5_tune =
|
||||
{
|
||||
arm_9e_rtx_costs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL, /* Sched adj cost. */
|
||||
1, /* Constant limit. */
|
||||
1, /* Max cond insns. */
|
||||
ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
@ -1439,13 +1440,36 @@ const struct tune_params arm_cortex_a9_tune =
|
||||
false /* Prefer Neon for 64-bits bitops. */
|
||||
};
|
||||
|
||||
/* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
|
||||
cycle to execute each. An LDR from the constant pool also takes two cycles
|
||||
to execute, but mildly increases pipelining opportunity (consecutive
|
||||
loads/stores can be pipelined together, saving one cycle), and may also
|
||||
improve icache utilisation. Hence we prefer the constant pool for such
|
||||
processors. */
|
||||
|
||||
const struct tune_params arm_v7m_tune =
|
||||
{
|
||||
arm_9e_rtx_costs,
|
||||
&generic_extra_costs,
|
||||
NULL, /* Sched adj cost. */
|
||||
1, /* Constant limit. */
|
||||
5, /* Max cond insns. */
|
||||
ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
true, /* Prefer constant pool. */
|
||||
arm_cortex_m_branch_cost,
|
||||
false, /* Prefer LDRD/STRD. */
|
||||
{false, false}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false /* Prefer Neon for 64-bits bitops. */
|
||||
};
|
||||
|
||||
/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
|
||||
arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
|
||||
const struct tune_params arm_v6m_tune =
|
||||
{
|
||||
arm_9e_rtx_costs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL, /* Sched adj cost. */
|
||||
1, /* Constant limit. */
|
||||
5, /* Max cond insns. */
|
||||
ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
@ -11241,6 +11265,20 @@ arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
|
||||
return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
|
||||
}
|
||||
|
||||
/* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
|
||||
on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
|
||||
sequences of non-executed instructions in IT blocks probably take the same
|
||||
amount of time as executed instructions (and the IT instruction itself takes
|
||||
space in icache). This function was experimentally determined to give good
|
||||
results on a popular embedded benchmark. */
|
||||
|
||||
static int
|
||||
arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
|
||||
{
|
||||
return (TARGET_32BIT && speed_p) ? 1
|
||||
: arm_default_branch_cost (speed_p, predictable_p);
|
||||
}
|
||||
|
||||
static bool fp_consts_inited = false;
|
||||
|
||||
static REAL_VALUE_TYPE value_fp0;
|
||||
|
@ -1,3 +1,11 @@
|
||||
2013-11-14 Joey Ye <joey.ye@arm.com>
|
||||
|
||||
* gcc.dg/tree-ssa/forwprop-28.c: Disable for cortex_m.
|
||||
* gcc.dg/tree-ssa/vrp47.c: Likewise.
|
||||
* gcc.dg/tree-ssa/vrp87.c: Likewise.
|
||||
* gcc.dg/tree-ssa/ssa-dom-thread-4.c: Ingore for cortex_m.
|
||||
* gcc.dg/tree-ssa/ssa-vrp-thread-1.c: Likewise.
|
||||
|
||||
2013-11-14 Adam Butcher <adam@jessamine.co.uk>
|
||||
|
||||
PR c++/58533
|
||||
|
@ -1,5 +1,9 @@
|
||||
/* { dg-do compile { target { ! "m68k*-*-* mmix*-*-* mep*-*-* bfin*-*-* v850*-*-* picochip*-*-* moxie*-*-* cris*-*-* m32c*-*-* fr30*-*-* mcore*-*-* powerpc*-*-* xtensa*-*-* arc*-*-*"} } } */
|
||||
/* { dg-options "-O2 -fdump-tree-forwprop1" } */
|
||||
/* Skip on ARM Cortex-M, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false,
|
||||
leading to two conditional jumps when evaluating an && condition. Forwprop1
|
||||
is not able to optimize this. */
|
||||
/* { dg-skip-if "" { arm_cortex_m } } */
|
||||
|
||||
extern char *frob (void);
|
||||
extern _Bool testit (void);
|
||||
|
@ -59,9 +59,9 @@ bitmap_ior_and_compl (bitmap dst, const_bitmap a, const_bitmap b,
|
||||
code we missed the edge when the first conditional is false
|
||||
(b_elt is zero, which means the second conditional is always
|
||||
zero. */
|
||||
/* ARM Cortex-M0 defined LOGICAL_OP_NON_SHORT_CIRCUIT to false,
|
||||
/* ARM Cortex-M defined LOGICAL_OP_NON_SHORT_CIRCUIT to false,
|
||||
so skip below test. */
|
||||
/* { dg-final { scan-tree-dump-times "Threaded" 3 "dom1" { target { ! { { mips*-*-* avr-*-* arc*-*-* } || { arm_cortex_m && arm_thumb1 } } } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Threaded" 3 "dom1" { target { ! { { mips*-*-* avr-*-* arc*-*-* } || { arm_cortex_m } } } } } } */
|
||||
/* MIPS defines LOGICAL_OP_NON_SHORT_CIRCUIT to 0, so we split both
|
||||
"a_elt || b_elt" and "b_elt && kill_elt" into two conditions each,
|
||||
rather than using "(var1 != 0) op (var2 != 0)". Also, as on other targets,
|
||||
|
@ -26,6 +26,8 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent,
|
||||
oof ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Threaded" 1 "vrp1" } } */
|
||||
/* ARM Cortex-M defined LOGICAL_OP_NON_SHORT_CIRCUIT to false,
|
||||
so skip below test. */
|
||||
/* { dg-final { { scan-tree-dump-times "Threaded" 1 "vrp1" } || { arm_cortex_m } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vrp1" } } */
|
||||
|
||||
|
@ -6,10 +6,10 @@
|
||||
/* { dg-do compile { target { ! "mips*-*-* arc*-*-* s390*-*-* avr-*-* mn10300-*-*" } } } */
|
||||
/* { dg-options "-O2 -fdump-tree-vrp1 -fdump-tree-dom1 -fdump-tree-vrp2" } */
|
||||
/* { dg-additional-options "-march=i586" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */
|
||||
/* Skip on ARM Cortex-M0, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false,
|
||||
/* Skip on ARM Cortex-M, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false,
|
||||
leading to two conditional jumps when evaluating an && condition. VRP is
|
||||
not able to optimize this. */
|
||||
/* { dg-skip-if "" { arm_cortex_m && arm_thumb1} } */
|
||||
/* { dg-skip-if "" { arm_cortex_m } } */
|
||||
|
||||
int h(int x, int y)
|
||||
{
|
||||
|
@ -2,6 +2,10 @@
|
||||
|
||||
/* { dg-options "-O2 -fdump-tree-vrp2-details -fdump-tree-cddce2-details" } */
|
||||
/* { dg-additional-options "-mbranch-cost=2" { target avr-*-* } } */
|
||||
/* Skip on ARM Cortex-M, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false,
|
||||
leading to two conditional jumps when evaluating an && condition. VRP is
|
||||
not able to optimize this. */
|
||||
/* { dg-skip-if "" { arm_cortex_m } } */
|
||||
|
||||
struct bitmap_head_def;
|
||||
typedef struct bitmap_head_def *bitmap;
|
||||
|
Loading…
Reference in New Issue
Block a user