[AArch64] Implement workaround for ARM Cortex-A53 erratum 835769

* config/aarch64/aarch64.h (FINAL_PRESCAN_INSN): Define.
	(ADJUST_INSN_LENGTH): Define.
	* config/aarch64/aarch64.opt (mfix-cortex-a53-835769): New option.
	* config/aarch64/aarch64.c (is_mem_p): New function.
	(is_memory_op): Likewise.
	(aarch64_prev_real_insn): Likewise.
	(is_madd_op): Likewise.
	(dep_between_memop_and_curr): Likewise.
	(aarch64_madd_needs_nop): Likewise.
	(aarch64_final_prescan_insn): Likewise.
	* doc/invoke.texi (AArch64 Options): Document -mfix-cortex-a53-835769
	and -mno-fix-cortex-a53-835769 options.

From-SVN: r216075
This commit is contained in:
Kyrylo Tkachov 2014-10-10 12:28:45 +00:00 committed by Kyrylo Tkachov
parent b9696ffb6e
commit 75cf149453
6 changed files with 162 additions and 0 deletions

View File

@ -1,3 +1,19 @@
2014-10-10 Kyrylo Tkachov<kyrylo.tkachov@arm.com>
Ramana Radhakrishnan<ramana.radhakrishnan@arm.com>
* config/aarch64/aarch64.h (FINAL_PRESCAN_INSN): Define.
(ADJUST_INSN_LENGTH): Define.
* config/aarch64/aarch64.opt (mfix-cortex-a53-835769): New option.
* config/aarch64/aarch64.c (is_mem_p): New function.
(is_memory_op): Likewise.
(aarch64_prev_real_insn): Likewise.
(is_madd_op): Likewise.
(dep_between_memop_and_curr): Likewise.
(aarch64_madd_needs_nop): Likewise.
(aarch64_final_prescan_insn): Likewise.
* doc/invoke.texi (AArch64 Options): Document -mfix-cortex-a53-835769
and -mno-fix-cortex-a53-835769 options.
2014-10-10 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/63464

View File

@ -308,6 +308,8 @@ aarch64_builtin_vectorized_function (tree fndecl,
extern void aarch64_split_combinev16qi (rtx operands[3]);
extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
extern bool aarch64_madd_needs_nop (rtx_insn *);
extern void aarch64_final_prescan_insn (rtx_insn *);
extern bool
aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *);

View File

@ -7586,6 +7586,128 @@ aarch64_mangle_type (const_tree type)
return NULL;
}
static int
is_mem_p (rtx *x, void *data ATTRIBUTE_UNUSED)
{
return MEM_P (*x);
}
static bool
is_memory_op (rtx_insn *mem_insn)
{
rtx pattern = PATTERN (mem_insn);
return for_each_rtx (&pattern, is_mem_p, NULL);
}
/* Find the first rtx_insn before insn that will generate an assembly
instruction. */
static rtx_insn *
aarch64_prev_real_insn (rtx_insn *insn)
{
if (!insn)
return NULL;
do
{
insn = prev_real_insn (insn);
}
while (insn && recog_memoized (insn) < 0);
return insn;
}
static bool
is_madd_op (enum attr_type t1)
{
unsigned int i;
/* A number of these may be AArch32 only. */
enum attr_type mlatypes[] = {
TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
};
for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
{
if (t1 == mlatypes[i])
return true;
}
return false;
}
/* Check if there is a register dependency between a load and the insn
for which we hold recog_data. */
static bool
dep_between_memop_and_curr (rtx memop)
{
rtx load_reg;
int opno;
if (!memop)
return false;
if (!REG_P (SET_DEST (memop)))
return false;
load_reg = SET_DEST (memop);
for (opno = 0; opno < recog_data.n_operands; opno++)
{
rtx operand = recog_data.operand[opno];
if (REG_P (operand)
&& reg_overlap_mentioned_p (load_reg, operand))
return true;
}
return false;
}
bool
aarch64_madd_needs_nop (rtx_insn* insn)
{
enum attr_type attr_type;
rtx_insn *prev;
rtx body;
if (!aarch64_fix_a53_err835769)
return false;
if (recog_memoized (insn) < 0)
return false;
attr_type = get_attr_type (insn);
if (!is_madd_op (attr_type))
return false;
prev = aarch64_prev_real_insn (insn);
if (!prev)
return false;
body = single_set (prev);
/* If the previous insn is a memory op and there is no dependency between
it and the madd, emit a nop between them. If we know the previous insn is
a memory op but body is NULL, emit the nop to be safe, it's probably a
load/store pair insn. */
if (is_memory_op (prev)
&& GET_MODE (recog_data.operand[0]) == DImode
&& (!dep_between_memop_and_curr (body)))
return true;
return false;
}
void
aarch64_final_prescan_insn (rtx_insn *insn)
{
if (aarch64_madd_needs_nop (insn))
fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
}
/* Return the equivalent letter for size. */
static char
sizetochar (int size)

View File

@ -486,6 +486,15 @@ enum target_cpus
(TARGET_CPU_generic | (AARCH64_CPU_DEFAULT_FLAGS << 6))
#endif
/* If inserting NOP before a mult-accumulate insn remember to adjust the
length so that conditional branching code is updated appropriately. */
#define ADJUST_INSN_LENGTH(insn, length) \
if (aarch64_madd_needs_nop (insn)) \
length += 4;
#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \
aarch64_final_prescan_insn (INSN); \
/* The processor for which instructions should be scheduled. */
extern enum aarch64_processor aarch64_tune;

View File

@ -67,6 +67,10 @@ mgeneral-regs-only
Target Report RejectNegative Mask(GENERAL_REGS_ONLY)
Generate code which uses only the general registers
mfix-cortex-a53-835769
Target Report Var(aarch64_fix_a53_err835769) Init(0)
Workaround for ARM Cortex-A53 Erratum number 835769
mlittle-endian
Target Report RejectNegative InverseMask(BIG_END)
Assume target CPU is configured as little endian

View File

@ -489,6 +489,7 @@ Objective-C and Objective-C++ Dialects}.
-mstrict-align @gol
-momit-leaf-frame-pointer -mno-omit-leaf-frame-pointer @gol
-mtls-dialect=desc -mtls-dialect=traditional @gol
-mfix-cortex-a53-835769 -mno-fix-cortex-a53-835769 @gol
-march=@var{name} -mcpu=@var{name} -mtune=@var{name}}
@emph{Adapteva Epiphany Options}
@ -11757,6 +11758,14 @@ of TLS variables. This is the default.
Use traditional TLS as the thread-local storage mechanism for dynamic accesses
of TLS variables.
@item -mfix-cortex-a53-835769
@itemx -mno-fix-cortex-a53-835769
@opindex -mfix-cortex-a53-835769
@opindex -mno-fix-cortex-a53-835769
Enable or disable the workaround for the ARM Cortex-A53 erratum number 835769.
This will involve inserting a NOP instruction between memory instructions and
64-bit integer multiply-accumulate instructions.
@item -march=@var{name}
@opindex march
Specify the name of the target architecture, optionally suffixed by one or