From 8b898d4cbd89a6260b93d768d0c2af1d51f28763 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Mon, 12 Jan 2015 15:09:03 +0000 Subject: [PATCH] [ARM] Implement TARGET_SCHED_MACRO_FUSION_PAIR_P * config/arm/arm-protos.h (tune_params): Add fuseable_ops field. * config/arm/arm.c (arm_macro_fusion_p): New function. (arm_macro_fusion_pair_p): Likewise. (TARGET_SCHED_MACRO_FUSION_P): Define. (TARGET_SCHED_MACRO_FUSION_PAIR_P): Likewise. (ARM_FUSE_NOTHING): Likewise. (ARM_FUSE_MOVW_MOVT): Likewise. (arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune, arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune, arm_cortex_a8_tune, arm_cortex_a7_tune, arm_cortex_a15_tune, arm_cortex_a53_tune, arm_cortex_a57_tune, arm_cortex_a9_tune, arm_cortex_a12_tune, arm_v7m_tune, arm_v6m_tune, arm_fa726te_tune arm_cortex_a5_tune): Specify fuseable_ops value. From-SVN: r219470 --- gcc/ChangeLog | 16 +++++ gcc/config/arm/arm-protos.h | 2 + gcc/config/arm/arm.c | 136 +++++++++++++++++++++++++++++++----- 3 files changed, 135 insertions(+), 19 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4cf72062ed7..6587ffc8146 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2015-01-12 Kyrylo Tkachov + + * config/arm/arm-protos.h (tune_params): Add fuseable_ops field. + * config/arm/arm.c (arm_macro_fusion_p): New function. + (arm_macro_fusion_pair_p): Likewise. + (TARGET_SCHED_MACRO_FUSION_P): Define. + (TARGET_SCHED_MACRO_FUSION_PAIR_P): Likewise. + (ARM_FUSE_NOTHING): Likewise. + (ARM_FUSE_MOVW_MOVT): Likewise. + (arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune, + arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune, + arm_cortex_a8_tune, arm_cortex_a7_tune, arm_cortex_a15_tune, + arm_cortex_a53_tune, arm_cortex_a57_tune, arm_cortex_a9_tune, + arm_cortex_a12_tune, arm_v7m_tune, arm_v6m_tune, arm_fa726te_tune + arm_cortex_a5_tune): Specify fuseable_ops value. + 2015-01-12 H.J. Lu PR bootstrap/64561 diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index fc453480c42..320215bcaf6 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -289,6 +289,8 @@ struct tune_params bool string_ops_prefer_neon; /* Maximum number of instructions to inline calls to memset. */ int max_insns_inline_memset; + /* Bitfield encoding the fuseable pairs of instructions. */ + unsigned int fuseable_ops; }; extern const struct tune_params *current_tune; diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index b9d7613ba32..a9cc3e11285 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -257,6 +257,7 @@ static void arm_expand_builtin_va_start (tree, rtx); static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); static void arm_option_override (void); static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode); +static bool arm_macro_fusion_p (void); static bool arm_cannot_copy_insn_p (rtx_insn *); static int arm_issue_rate (void); static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; @@ -297,6 +298,8 @@ static int arm_cortex_m_branch_cost (bool, bool); static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode, const unsigned char *sel); +static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*); + static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, tree vectype, int misalign ATTRIBUTE_UNUSED); @@ -404,6 +407,12 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_COMP_TYPE_ATTRIBUTES #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes +#undef TARGET_SCHED_MACRO_FUSION_P +#define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p + +#undef TARGET_SCHED_MACRO_FUSION_PAIR_P +#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p + #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes @@ -1647,6 +1656,9 @@ const struct cpu_cost_table v7m_extra_costs = } }; +#define ARM_FUSE_NOTHING (0) +#define ARM_FUSE_MOVW_MOVT (1 << 0) + const struct tune_params arm_slowmul_tune = { arm_slowmul_rtx_costs, @@ -1663,7 +1675,8 @@ const struct tune_params arm_slowmul_tune = false, /* Prefer Neon for 64-bits bitops. */ false, false, /* Prefer 32-bit encodings. */ false, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */ }; const struct tune_params arm_fastmul_tune = @@ -1682,7 +1695,8 @@ const struct tune_params arm_fastmul_tune = false, /* Prefer Neon for 64-bits bitops. */ false, false, /* Prefer 32-bit encodings. */ false, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */ }; /* StrongARM has early execution of branches, so a sequence that is worth @@ -1704,7 +1718,8 @@ const struct tune_params arm_strongarm_tune = false, /* Prefer Neon for 64-bits bitops. */ false, false, /* Prefer 32-bit encodings. */ false, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */ }; const struct tune_params arm_xscale_tune = @@ -1723,7 +1738,8 @@ const struct tune_params arm_xscale_tune = false, /* Prefer Neon for 64-bits bitops. */ false, false, /* Prefer 32-bit encodings. */ false, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */ }; const struct tune_params arm_9e_tune = @@ -1742,7 +1758,8 @@ const struct tune_params arm_9e_tune = false, /* Prefer Neon for 64-bits bitops. */ false, false, /* Prefer 32-bit encodings. */ false, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */ }; const struct tune_params arm_v6t2_tune = @@ -1761,7 +1778,8 @@ const struct tune_params arm_v6t2_tune = false, /* Prefer Neon for 64-bits bitops. */ false, false, /* Prefer 32-bit encodings. */ false, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */ }; /* Generic Cortex tuning. Use more specific tunings if appropriate. */ @@ -1781,7 +1799,8 @@ const struct tune_params arm_cortex_tune = false, /* Prefer Neon for 64-bits bitops. */ false, false, /* Prefer 32-bit encodings. */ false, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */ }; const struct tune_params arm_cortex_a8_tune = @@ -1800,7 +1819,8 @@ const struct tune_params arm_cortex_a8_tune = false, /* Prefer Neon for 64-bits bitops. */ false, false, /* Prefer 32-bit encodings. */ true, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */ }; const struct tune_params arm_cortex_a7_tune = @@ -1819,7 +1839,8 @@ const struct tune_params arm_cortex_a7_tune = false, /* Prefer Neon for 64-bits bitops. */ false, false, /* Prefer 32-bit encodings. */ true, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */ }; const struct tune_params arm_cortex_a15_tune = @@ -1838,7 +1859,8 @@ const struct tune_params arm_cortex_a15_tune = false, /* Prefer Neon for 64-bits bitops. */ true, true, /* Prefer 32-bit encodings. */ true, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */ }; const struct tune_params arm_cortex_a53_tune = @@ -1857,7 +1879,8 @@ const struct tune_params arm_cortex_a53_tune = false, /* Prefer Neon for 64-bits bitops. */ false, false, /* Prefer 32-bit encodings. */ false, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_MOVW_MOVT /* Fuseable pairs of instructions. */ }; const struct tune_params arm_cortex_a57_tune = @@ -1876,7 +1899,8 @@ const struct tune_params arm_cortex_a57_tune = false, /* Prefer Neon for 64-bits bitops. */ true, true, /* Prefer 32-bit encodings. */ false, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_MOVW_MOVT /* Fuseable pairs of instructions. */ }; /* Branches can be dual-issued on Cortex-A5, so conditional execution is @@ -1898,7 +1922,8 @@ const struct tune_params arm_cortex_a5_tune = false, /* Prefer Neon for 64-bits bitops. */ false, false, /* Prefer 32-bit encodings. */ true, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */ }; const struct tune_params arm_cortex_a9_tune = @@ -1917,7 +1942,8 @@ const struct tune_params arm_cortex_a9_tune = false, /* Prefer Neon for 64-bits bitops. */ false, false, /* Prefer 32-bit encodings. */ false, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */ }; const struct tune_params arm_cortex_a12_tune = @@ -1936,7 +1962,8 @@ const struct tune_params arm_cortex_a12_tune = false, /* Prefer Neon for 64-bits bitops. */ false, false, /* Prefer 32-bit encodings. */ true, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_MOVW_MOVT /* Fuseable pairs of instructions. */ }; /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single @@ -1962,7 +1989,8 @@ const struct tune_params arm_v7m_tune = false, /* Prefer Neon for 64-bits bitops. */ false, false, /* Prefer 32-bit encodings. */ false, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */ }; /* Cortex-M7 tuning. */ @@ -1983,7 +2011,8 @@ const struct tune_params arm_cortex_m7_tune = false, /* Prefer Neon for 64-bits bitops. */ false, false, /* Prefer 32-bit encodings. */ false, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */ }; /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than @@ -2004,7 +2033,8 @@ const struct tune_params arm_v6m_tune = false, /* Prefer Neon for 64-bits bitops. */ false, false, /* Prefer 32-bit encodings. */ false, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */ }; const struct tune_params arm_fa726te_tune = @@ -2023,7 +2053,8 @@ const struct tune_params arm_fa726te_tune = false, /* Prefer Neon for 64-bits bitops. */ false, false, /* Prefer 32-bit encodings. */ false, /* Prefer Neon for stringops. */ - 8 /* Maximum insns to inline memset. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */ }; @@ -29157,6 +29188,73 @@ arm_gen_setmem (rtx *operands) return arm_block_set_aligned_non_vect (dstbase, length, value, align); } + +static bool +arm_macro_fusion_p (void) +{ + return current_tune->fuseable_ops != ARM_FUSE_NOTHING; +} + + +static bool +aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr) +{ + rtx set_dest; + rtx prev_set = single_set (prev); + rtx curr_set = single_set (curr); + + if (!prev_set + || !curr_set) + return false; + + if (any_condjump_p (curr)) + return false; + + if (!arm_macro_fusion_p ()) + return false; + + if (current_tune->fuseable_ops & ARM_FUSE_MOVW_MOVT) + { + /* We are trying to fuse + movw imm / movt imm + instructions as a group that gets scheduled together. */ + + set_dest = SET_DEST (curr_set); + + if (GET_MODE (set_dest) != SImode) + return false; + + /* We are trying to match: + prev (movw) == (set (reg r0) (const_int imm16)) + curr (movt) == (set (zero_extract (reg r0) + (const_int 16) + (const_int 16)) + (const_int imm16_1)) + or + prev (movw) == (set (reg r1) + (high (symbol_ref ("SYM")))) + curr (movt) == (set (reg r0) + (lo_sum (reg r1) + (symbol_ref ("SYM")))) */ + if (GET_CODE (set_dest) == ZERO_EXTRACT) + { + if (CONST_INT_P (SET_SRC (curr_set)) + && CONST_INT_P (SET_SRC (prev_set)) + && REG_P (XEXP (set_dest, 0)) + && REG_P (SET_DEST (prev_set)) + && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set))) + return true; + } + else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM + && REG_P (SET_DEST (curr_set)) + && REG_P (SET_DEST (prev_set)) + && GET_CODE (SET_SRC (prev_set)) == HIGH + && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set))) + return true; + } + return false; +} + /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ static unsigned HOST_WIDE_INT