[multiple changes]

2013-11-14  Julian Brown  <julian@codesourcery.com>
        Joey Ye  <joey.ye@arm.com>

        * config/arm/arm.c (arm_cortex_m_branch_cost): New.
        (arm_v7m_tune): New.
        (arm_slowmul_tune, arm_fastmul_tune,
        arm_strongarm_tune, arm_9e_tune, arm_v6t2_tune,
        arm_cortex_tune, arm_cortex_a15_tune,
        arm_cortex_a5_tune, arm_v6m_tune): Add comments
        for Sched adj cost.
        * config/arm/arm-cores.def (cortex-m4, cortex-m3):
        Use arm_v7m_tune.

testsuite:
2013-11-14  Joey Ye  <joey.ye@arm.com>

        * gcc.dg/tree-ssa/forwprop-28.c: Disable for cortex_m.
        * gcc.dg/tree-ssa/vrp47.c: Likewise.
        * gcc.dg/tree-ssa/vrp87.c: Likewise.
        * gcc.dg/tree-ssa/ssa-dom-thread-4.c: Ingore for cortex_m.
        * gcc.dg/tree-ssa/ssa-vrp-thread-1.c: Likewise.

From-SVN: r204778
This commit is contained in:
Joey Ye 2013-11-14 08:38:54 +00:00
parent 2430d1e263
commit 571880a0a4
9 changed files with 85 additions and 16 deletions

View File

@ -1,3 +1,16 @@
2013-11-14 Julian Brown <julian@codesourcery.com>
Joey Ye <joey.ye@arm.com>
* config/arm/arm.c (arm_cortex_m_branch_cost): New.
(arm_v7m_tune): New.
(arm_slowmul_tune, arm_fastmul_tune,
arm_strongarm_tune, arm_9e_tune, arm_v6t2_tune,
arm_cortex_tune, arm_cortex_a15_tune,
arm_cortex_a5_tune, arm_v6m_tune): Add comments
for Sched adj cost.
* config/arm/arm-cores.def (cortex-m4, cortex-m3):
Use arm_v7m_tune.
2013-11-14 Kirill Yukhin <kirill.yukhin@intel.com>
PR target/57491

View File

@ -134,8 +134,8 @@ ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex)
ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex)
ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
ARM_CORE("cortex-r7", cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, cortex)
ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex)
ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, v7m)
ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, v7m)
ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, v6m)
ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, v6m)
ARM_CORE("cortex-m0plus", cortexm0plus, 6M, FL_LDSCHED, v6m)

View File

@ -266,6 +266,7 @@ static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
static unsigned int arm_autovectorize_vector_sizes (void);
static int arm_default_branch_cost (bool, bool);
static int arm_cortex_a5_branch_cost (bool, bool);
static int arm_cortex_m_branch_cost (bool, bool);
static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
const unsigned char *sel);
@ -1260,7 +1261,7 @@ const struct tune_params arm_slowmul_tune =
{
arm_slowmul_rtx_costs,
NULL,
NULL,
NULL, /* Sched adj cost. */
3, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
@ -1276,7 +1277,7 @@ const struct tune_params arm_fastmul_tune =
{
arm_fastmul_rtx_costs,
NULL,
NULL,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
@ -1295,7 +1296,7 @@ const struct tune_params arm_strongarm_tune =
{
arm_fastmul_rtx_costs,
NULL,
NULL,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
3, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
@ -1327,7 +1328,7 @@ const struct tune_params arm_9e_tune =
{
arm_9e_rtx_costs,
NULL,
NULL,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
@ -1343,7 +1344,7 @@ const struct tune_params arm_v6t2_tune =
{
arm_9e_rtx_costs,
NULL,
NULL,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
@ -1360,7 +1361,7 @@ const struct tune_params arm_cortex_tune =
{
arm_9e_rtx_costs,
&generic_extra_costs,
NULL,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
@ -1392,7 +1393,7 @@ const struct tune_params arm_cortex_a15_tune =
{
arm_9e_rtx_costs,
&cortexa15_extra_costs,
NULL,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
2, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
@ -1411,7 +1412,7 @@ const struct tune_params arm_cortex_a5_tune =
{
arm_9e_rtx_costs,
NULL,
NULL,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
1, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
@ -1439,13 +1440,36 @@ const struct tune_params arm_cortex_a9_tune =
false /* Prefer Neon for 64-bits bitops. */
};
/* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
cycle to execute each. An LDR from the constant pool also takes two cycles
to execute, but mildly increases pipelining opportunity (consecutive
loads/stores can be pipelined together, saving one cycle), and may also
improve icache utilisation. Hence we prefer the constant pool for such
processors. */
const struct tune_params arm_v7m_tune =
{
arm_9e_rtx_costs,
&generic_extra_costs,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
arm_cortex_m_branch_cost,
false, /* Prefer LDRD/STRD. */
{false, false}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
false /* Prefer Neon for 64-bits bitops. */
};
/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
const struct tune_params arm_v6m_tune =
{
arm_9e_rtx_costs,
NULL,
NULL,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
@ -11241,6 +11265,20 @@ arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
}
/* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
sequences of non-executed instructions in IT blocks probably take the same
amount of time as executed instructions (and the IT instruction itself takes
space in icache). This function was experimentally determined to give good
results on a popular embedded benchmark. */
static int
arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
{
return (TARGET_32BIT && speed_p) ? 1
: arm_default_branch_cost (speed_p, predictable_p);
}
static bool fp_consts_inited = false;
static REAL_VALUE_TYPE value_fp0;

View File

@ -1,3 +1,11 @@
2013-11-14 Joey Ye <joey.ye@arm.com>
* gcc.dg/tree-ssa/forwprop-28.c: Disable for cortex_m.
* gcc.dg/tree-ssa/vrp47.c: Likewise.
* gcc.dg/tree-ssa/vrp87.c: Likewise.
* gcc.dg/tree-ssa/ssa-dom-thread-4.c: Ingore for cortex_m.
* gcc.dg/tree-ssa/ssa-vrp-thread-1.c: Likewise.
2013-11-14 Adam Butcher <adam@jessamine.co.uk>
PR c++/58533

View File

@ -1,5 +1,9 @@
/* { dg-do compile { target { ! "m68k*-*-* mmix*-*-* mep*-*-* bfin*-*-* v850*-*-* picochip*-*-* moxie*-*-* cris*-*-* m32c*-*-* fr30*-*-* mcore*-*-* powerpc*-*-* xtensa*-*-* arc*-*-*"} } } */
/* { dg-options "-O2 -fdump-tree-forwprop1" } */
/* Skip on ARM Cortex-M, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false,
leading to two conditional jumps when evaluating an && condition. Forwprop1
is not able to optimize this. */
/* { dg-skip-if "" { arm_cortex_m } } */
extern char *frob (void);
extern _Bool testit (void);

View File

@ -59,9 +59,9 @@ bitmap_ior_and_compl (bitmap dst, const_bitmap a, const_bitmap b,
code we missed the edge when the first conditional is false
(b_elt is zero, which means the second conditional is always
zero. */
/* ARM Cortex-M0 defined LOGICAL_OP_NON_SHORT_CIRCUIT to false,
/* ARM Cortex-M defined LOGICAL_OP_NON_SHORT_CIRCUIT to false,
so skip below test. */
/* { dg-final { scan-tree-dump-times "Threaded" 3 "dom1" { target { ! { { mips*-*-* avr-*-* arc*-*-* } || { arm_cortex_m && arm_thumb1 } } } } } } */
/* { dg-final { scan-tree-dump-times "Threaded" 3 "dom1" { target { ! { { mips*-*-* avr-*-* arc*-*-* } || { arm_cortex_m } } } } } } */
/* MIPS defines LOGICAL_OP_NON_SHORT_CIRCUIT to 0, so we split both
"a_elt || b_elt" and "b_elt && kill_elt" into two conditions each,
rather than using "(var1 != 0) op (var2 != 0)". Also, as on other targets,

View File

@ -26,6 +26,8 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent,
oof ();
}
/* { dg-final { scan-tree-dump-times "Threaded" 1 "vrp1" } } */
/* ARM Cortex-M defined LOGICAL_OP_NON_SHORT_CIRCUIT to false,
so skip below test. */
/* { dg-final { { scan-tree-dump-times "Threaded" 1 "vrp1" } || { arm_cortex_m } } } */
/* { dg-final { cleanup-tree-dump "vrp1" } } */

View File

@ -6,10 +6,10 @@
/* { dg-do compile { target { ! "mips*-*-* arc*-*-* s390*-*-* avr-*-* mn10300-*-*" } } } */
/* { dg-options "-O2 -fdump-tree-vrp1 -fdump-tree-dom1 -fdump-tree-vrp2" } */
/* { dg-additional-options "-march=i586" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */
/* Skip on ARM Cortex-M0, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false,
/* Skip on ARM Cortex-M, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false,
leading to two conditional jumps when evaluating an && condition. VRP is
not able to optimize this. */
/* { dg-skip-if "" { arm_cortex_m && arm_thumb1} } */
/* { dg-skip-if "" { arm_cortex_m } } */
int h(int x, int y)
{

View File

@ -2,6 +2,10 @@
/* { dg-options "-O2 -fdump-tree-vrp2-details -fdump-tree-cddce2-details" } */
/* { dg-additional-options "-mbranch-cost=2" { target avr-*-* } } */
/* Skip on ARM Cortex-M, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false,
leading to two conditional jumps when evaluating an && condition. VRP is
not able to optimize this. */
/* { dg-skip-if "" { arm_cortex_m } } */
struct bitmap_head_def;
typedef struct bitmap_head_def *bitmap;