diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f91582b93ce..87991acb905 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2014-10-10 Kyrylo Tkachov + Ramana Radhakrishnan + + * config/aarch64/aarch64.h (FINAL_PRESCAN_INSN): Define. + (ADJUST_INSN_LENGTH): Define. + * config/aarch64/aarch64.opt (mfix-cortex-a53-835769): New option. + * config/aarch64/aarch64.c (is_mem_p): New function. + (is_memory_op): Likewise. + (aarch64_prev_real_insn): Likewise. + (is_madd_op): Likewise. + (dep_between_memop_and_curr): Likewise. + (aarch64_madd_needs_nop): Likewise. + (aarch64_final_prescan_insn): Likewise. + * doc/invoke.texi (AArch64 Options): Document -mfix-cortex-a53-835769 + and -mno-fix-cortex-a53-835769 options. + 2014-10-10 Jakub Jelinek PR tree-optimization/63464 diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index b5f53d21cdf..c57a467c327 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -308,6 +308,8 @@ aarch64_builtin_vectorized_function (tree fndecl, extern void aarch64_split_combinev16qi (rtx operands[3]); extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel); +extern bool aarch64_madd_needs_nop (rtx_insn *); +extern void aarch64_final_prescan_insn (rtx_insn *); extern bool aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 5144c35e29f..76a2480582a 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -7586,6 +7586,128 @@ aarch64_mangle_type (const_tree type) return NULL; } +static int +is_mem_p (rtx *x, void *data ATTRIBUTE_UNUSED) +{ + return MEM_P (*x); +} + +static bool +is_memory_op (rtx_insn *mem_insn) +{ + rtx pattern = PATTERN (mem_insn); + return for_each_rtx (&pattern, is_mem_p, NULL); +} + +/* Find the first rtx_insn before insn that will generate an assembly + instruction. */ + +static rtx_insn * +aarch64_prev_real_insn (rtx_insn *insn) +{ + if (!insn) + return NULL; + + do + { + insn = prev_real_insn (insn); + } + while (insn && recog_memoized (insn) < 0); + + return insn; +} + +static bool +is_madd_op (enum attr_type t1) +{ + unsigned int i; + /* A number of these may be AArch32 only. */ + enum attr_type mlatypes[] = { + TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD, + TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY, + TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD + }; + + for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++) + { + if (t1 == mlatypes[i]) + return true; + } + + return false; +} + +/* Check if there is a register dependency between a load and the insn + for which we hold recog_data. */ + +static bool +dep_between_memop_and_curr (rtx memop) +{ + rtx load_reg; + int opno; + + if (!memop) + return false; + + if (!REG_P (SET_DEST (memop))) + return false; + + load_reg = SET_DEST (memop); + for (opno = 0; opno < recog_data.n_operands; opno++) + { + rtx operand = recog_data.operand[opno]; + if (REG_P (operand) + && reg_overlap_mentioned_p (load_reg, operand)) + return true; + + } + return false; +} + +bool +aarch64_madd_needs_nop (rtx_insn* insn) +{ + enum attr_type attr_type; + rtx_insn *prev; + rtx body; + + if (!aarch64_fix_a53_err835769) + return false; + + if (recog_memoized (insn) < 0) + return false; + + attr_type = get_attr_type (insn); + if (!is_madd_op (attr_type)) + return false; + + prev = aarch64_prev_real_insn (insn); + if (!prev) + return false; + + body = single_set (prev); + + /* If the previous insn is a memory op and there is no dependency between + it and the madd, emit a nop between them. If we know the previous insn is + a memory op but body is NULL, emit the nop to be safe, it's probably a + load/store pair insn. */ + if (is_memory_op (prev) + && GET_MODE (recog_data.operand[0]) == DImode + && (!dep_between_memop_and_curr (body))) + return true; + + return false; + +} + +void +aarch64_final_prescan_insn (rtx_insn *insn) +{ + if (aarch64_madd_needs_nop (insn)) + fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n"); +} + + /* Return the equivalent letter for size. */ static char sizetochar (int size) diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index db950da8f5e..e9e5fd838fc 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -486,6 +486,15 @@ enum target_cpus (TARGET_CPU_generic | (AARCH64_CPU_DEFAULT_FLAGS << 6)) #endif +/* If inserting NOP before a mult-accumulate insn remember to adjust the + length so that conditional branching code is updated appropriately. */ +#define ADJUST_INSN_LENGTH(insn, length) \ + if (aarch64_madd_needs_nop (insn)) \ + length += 4; + +#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \ + aarch64_final_prescan_insn (INSN); \ + /* The processor for which instructions should be scheduled. */ extern enum aarch64_processor aarch64_tune; diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt index f5a15b7298e..77deb2e7d36 100644 --- a/gcc/config/aarch64/aarch64.opt +++ b/gcc/config/aarch64/aarch64.opt @@ -67,6 +67,10 @@ mgeneral-regs-only Target Report RejectNegative Mask(GENERAL_REGS_ONLY) Generate code which uses only the general registers +mfix-cortex-a53-835769 +Target Report Var(aarch64_fix_a53_err835769) Init(0) +Workaround for ARM Cortex-A53 Erratum number 835769 + mlittle-endian Target Report RejectNegative InverseMask(BIG_END) Assume target CPU is configured as little endian diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 8f3eb16beb2..f1ba77bdabe 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -489,6 +489,7 @@ Objective-C and Objective-C++ Dialects}. -mstrict-align @gol -momit-leaf-frame-pointer -mno-omit-leaf-frame-pointer @gol -mtls-dialect=desc -mtls-dialect=traditional @gol +-mfix-cortex-a53-835769 -mno-fix-cortex-a53-835769 @gol -march=@var{name} -mcpu=@var{name} -mtune=@var{name}} @emph{Adapteva Epiphany Options} @@ -11757,6 +11758,14 @@ of TLS variables. This is the default. Use traditional TLS as the thread-local storage mechanism for dynamic accesses of TLS variables. +@item -mfix-cortex-a53-835769 +@itemx -mno-fix-cortex-a53-835769 +@opindex -mfix-cortex-a53-835769 +@opindex -mno-fix-cortex-a53-835769 +Enable or disable the workaround for the ARM Cortex-A53 erratum number 835769. +This will involve inserting a NOP instruction between memory instructions and +64-bit integer multiply-accumulate instructions. + @item -march=@var{name} @opindex march Specify the name of the target architecture, optionally suffixed by one or