[AArch64] Implement workaround for ARM Cortex-A53 erratum 835769
* config/aarch64/aarch64.h (FINAL_PRESCAN_INSN): Define. (ADJUST_INSN_LENGTH): Define. * config/aarch64/aarch64.opt (mfix-cortex-a53-835769): New option. * config/aarch64/aarch64.c (is_mem_p): New function. (is_memory_op): Likewise. (aarch64_prev_real_insn): Likewise. (is_madd_op): Likewise. (dep_between_memop_and_curr): Likewise. (aarch64_madd_needs_nop): Likewise. (aarch64_final_prescan_insn): Likewise. * doc/invoke.texi (AArch64 Options): Document -mfix-cortex-a53-835769 and -mno-fix-cortex-a53-835769 options. From-SVN: r216075
This commit is contained in:
parent
b9696ffb6e
commit
75cf149453
@ -1,3 +1,19 @@
|
||||
2014-10-10 Kyrylo Tkachov<kyrylo.tkachov@arm.com>
|
||||
Ramana Radhakrishnan<ramana.radhakrishnan@arm.com>
|
||||
|
||||
* config/aarch64/aarch64.h (FINAL_PRESCAN_INSN): Define.
|
||||
(ADJUST_INSN_LENGTH): Define.
|
||||
* config/aarch64/aarch64.opt (mfix-cortex-a53-835769): New option.
|
||||
* config/aarch64/aarch64.c (is_mem_p): New function.
|
||||
(is_memory_op): Likewise.
|
||||
(aarch64_prev_real_insn): Likewise.
|
||||
(is_madd_op): Likewise.
|
||||
(dep_between_memop_and_curr): Likewise.
|
||||
(aarch64_madd_needs_nop): Likewise.
|
||||
(aarch64_final_prescan_insn): Likewise.
|
||||
* doc/invoke.texi (AArch64 Options): Document -mfix-cortex-a53-835769
|
||||
and -mno-fix-cortex-a53-835769 options.
|
||||
|
||||
2014-10-10 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/63464
|
||||
|
@ -308,6 +308,8 @@ aarch64_builtin_vectorized_function (tree fndecl,
|
||||
|
||||
extern void aarch64_split_combinev16qi (rtx operands[3]);
|
||||
extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
|
||||
extern bool aarch64_madd_needs_nop (rtx_insn *);
|
||||
extern void aarch64_final_prescan_insn (rtx_insn *);
|
||||
extern bool
|
||||
aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
|
||||
void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *);
|
||||
|
@ -7586,6 +7586,128 @@ aarch64_mangle_type (const_tree type)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
is_mem_p (rtx *x, void *data ATTRIBUTE_UNUSED)
|
||||
{
|
||||
return MEM_P (*x);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_memory_op (rtx_insn *mem_insn)
|
||||
{
|
||||
rtx pattern = PATTERN (mem_insn);
|
||||
return for_each_rtx (&pattern, is_mem_p, NULL);
|
||||
}
|
||||
|
||||
/* Find the first rtx_insn before insn that will generate an assembly
|
||||
instruction. */
|
||||
|
||||
static rtx_insn *
|
||||
aarch64_prev_real_insn (rtx_insn *insn)
|
||||
{
|
||||
if (!insn)
|
||||
return NULL;
|
||||
|
||||
do
|
||||
{
|
||||
insn = prev_real_insn (insn);
|
||||
}
|
||||
while (insn && recog_memoized (insn) < 0);
|
||||
|
||||
return insn;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_madd_op (enum attr_type t1)
|
||||
{
|
||||
unsigned int i;
|
||||
/* A number of these may be AArch32 only. */
|
||||
enum attr_type mlatypes[] = {
|
||||
TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
|
||||
TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
|
||||
TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
|
||||
};
|
||||
|
||||
for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
|
||||
{
|
||||
if (t1 == mlatypes[i])
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Check if there is a register dependency between a load and the insn
|
||||
for which we hold recog_data. */
|
||||
|
||||
static bool
|
||||
dep_between_memop_and_curr (rtx memop)
|
||||
{
|
||||
rtx load_reg;
|
||||
int opno;
|
||||
|
||||
if (!memop)
|
||||
return false;
|
||||
|
||||
if (!REG_P (SET_DEST (memop)))
|
||||
return false;
|
||||
|
||||
load_reg = SET_DEST (memop);
|
||||
for (opno = 0; opno < recog_data.n_operands; opno++)
|
||||
{
|
||||
rtx operand = recog_data.operand[opno];
|
||||
if (REG_P (operand)
|
||||
&& reg_overlap_mentioned_p (load_reg, operand))
|
||||
return true;
|
||||
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
aarch64_madd_needs_nop (rtx_insn* insn)
|
||||
{
|
||||
enum attr_type attr_type;
|
||||
rtx_insn *prev;
|
||||
rtx body;
|
||||
|
||||
if (!aarch64_fix_a53_err835769)
|
||||
return false;
|
||||
|
||||
if (recog_memoized (insn) < 0)
|
||||
return false;
|
||||
|
||||
attr_type = get_attr_type (insn);
|
||||
if (!is_madd_op (attr_type))
|
||||
return false;
|
||||
|
||||
prev = aarch64_prev_real_insn (insn);
|
||||
if (!prev)
|
||||
return false;
|
||||
|
||||
body = single_set (prev);
|
||||
|
||||
/* If the previous insn is a memory op and there is no dependency between
|
||||
it and the madd, emit a nop between them. If we know the previous insn is
|
||||
a memory op but body is NULL, emit the nop to be safe, it's probably a
|
||||
load/store pair insn. */
|
||||
if (is_memory_op (prev)
|
||||
&& GET_MODE (recog_data.operand[0]) == DImode
|
||||
&& (!dep_between_memop_and_curr (body)))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
aarch64_final_prescan_insn (rtx_insn *insn)
|
||||
{
|
||||
if (aarch64_madd_needs_nop (insn))
|
||||
fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
|
||||
}
|
||||
|
||||
|
||||
/* Return the equivalent letter for size. */
|
||||
static char
|
||||
sizetochar (int size)
|
||||
|
@ -486,6 +486,15 @@ enum target_cpus
|
||||
(TARGET_CPU_generic | (AARCH64_CPU_DEFAULT_FLAGS << 6))
|
||||
#endif
|
||||
|
||||
/* If inserting NOP before a mult-accumulate insn remember to adjust the
|
||||
length so that conditional branching code is updated appropriately. */
|
||||
#define ADJUST_INSN_LENGTH(insn, length) \
|
||||
if (aarch64_madd_needs_nop (insn)) \
|
||||
length += 4;
|
||||
|
||||
#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \
|
||||
aarch64_final_prescan_insn (INSN); \
|
||||
|
||||
/* The processor for which instructions should be scheduled. */
|
||||
extern enum aarch64_processor aarch64_tune;
|
||||
|
||||
|
@ -67,6 +67,10 @@ mgeneral-regs-only
|
||||
Target Report RejectNegative Mask(GENERAL_REGS_ONLY)
|
||||
Generate code which uses only the general registers
|
||||
|
||||
mfix-cortex-a53-835769
|
||||
Target Report Var(aarch64_fix_a53_err835769) Init(0)
|
||||
Workaround for ARM Cortex-A53 Erratum number 835769
|
||||
|
||||
mlittle-endian
|
||||
Target Report RejectNegative InverseMask(BIG_END)
|
||||
Assume target CPU is configured as little endian
|
||||
|
@ -489,6 +489,7 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-mstrict-align @gol
|
||||
-momit-leaf-frame-pointer -mno-omit-leaf-frame-pointer @gol
|
||||
-mtls-dialect=desc -mtls-dialect=traditional @gol
|
||||
-mfix-cortex-a53-835769 -mno-fix-cortex-a53-835769 @gol
|
||||
-march=@var{name} -mcpu=@var{name} -mtune=@var{name}}
|
||||
|
||||
@emph{Adapteva Epiphany Options}
|
||||
@ -11757,6 +11758,14 @@ of TLS variables. This is the default.
|
||||
Use traditional TLS as the thread-local storage mechanism for dynamic accesses
|
||||
of TLS variables.
|
||||
|
||||
@item -mfix-cortex-a53-835769
|
||||
@itemx -mno-fix-cortex-a53-835769
|
||||
@opindex -mfix-cortex-a53-835769
|
||||
@opindex -mno-fix-cortex-a53-835769
|
||||
Enable or disable the workaround for the ARM Cortex-A53 erratum number 835769.
|
||||
This will involve inserting a NOP instruction between memory instructions and
|
||||
64-bit integer multiply-accumulate instructions.
|
||||
|
||||
@item -march=@var{name}
|
||||
@opindex march
|
||||
Specify the name of the target architecture, optionally suffixed by one or
|
||||
|
Loading…
Reference in New Issue
Block a user