Add tunning of ldpw for THunderX.
2016-09-12 Andrew Pinski <apinski@cavium.com> * config/aarch64/aarch64-tuning-flags.def (SLOW_UNALIGNED_LDPW): New tuning option. * config/aarch64/aarch64.c (thunderx_tunings): Enable AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW. (aarch64_operands_ok_for_ldpstp): Return false if AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW and the mode was SImode and the alignment is less than 8 byte. (aarch64_operands_adjust_ok_for_ldpstp): Likewise. 2016-09-12 Andrew Pinski <apinski@cavium.com> * gcc.target/aarch64/thunderxloadpair.c: New testcase. * gcc.target/aarch64/thunderxnoloadpair.c: New testcase. From-SVN: r240102
This commit is contained in:
parent
47f138d178
commit
54700e2e7f
@ -1,3 +1,14 @@
|
||||
2016-09-12 Andrew Pinski <apinski@cavium.com>
|
||||
|
||||
* config/aarch64/aarch64-tuning-flags.def (SLOW_UNALIGNED_LDPW):
|
||||
New tuning option.
|
||||
* config/aarch64/aarch64.c (thunderx_tunings): Enable
|
||||
AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW.
|
||||
(aarch64_operands_ok_for_ldpstp): Return false if
|
||||
AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW and the mode
|
||||
was SImode and the alignment is less than 8 byte.
|
||||
(aarch64_operands_adjust_ok_for_ldpstp): Likewise.
|
||||
|
||||
2016-09-12 Orlando Arias <oarias@knights.ucf.edu>
|
||||
|
||||
PR target/77570
|
||||
|
@ -29,3 +29,8 @@
|
||||
AARCH64_TUNE_ to give an enum name. */
|
||||
|
||||
AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS)
|
||||
|
||||
/* Don't create non-8 byte aligned load/store pair. That is if the
|
||||
two load/stores are not at least 8 byte aligned don't create load/store
|
||||
pairs. */
|
||||
AARCH64_EXTRA_TUNING_OPTION ("slow_unaligned_ldpw", SLOW_UNALIGNED_LDPW)
|
||||
|
@ -712,7 +712,7 @@ static const struct tune_params thunderx_tunings =
|
||||
0, /* max_case_values. */
|
||||
0, /* cache_line_size. */
|
||||
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
|
||||
(AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) /* tune_flags. */
|
||||
};
|
||||
|
||||
static const struct tune_params xgene1_tunings =
|
||||
@ -13629,6 +13629,15 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
|
||||
if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
|
||||
return false;
|
||||
|
||||
/* If we have SImode and slow unaligned ldp,
|
||||
check the alignment to be at least 8 byte. */
|
||||
if (mode == SImode
|
||||
&& (aarch64_tune_params.extra_tuning_flags
|
||||
& AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
|
||||
&& !optimize_size
|
||||
&& MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
|
||||
return false;
|
||||
|
||||
/* Check if the addresses are in the form of [base+offset]. */
|
||||
extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
|
||||
if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
|
||||
@ -13788,6 +13797,15 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
|
||||
return false;
|
||||
}
|
||||
|
||||
/* If we have SImode and slow unaligned ldp,
|
||||
check the alignment to be at least 8 byte. */
|
||||
if (mode == SImode
|
||||
&& (aarch64_tune_params.extra_tuning_flags
|
||||
& AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
|
||||
&& !optimize_size
|
||||
&& MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
|
||||
return false;
|
||||
|
||||
if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
|
||||
rclass_1 = FP_REGS;
|
||||
else
|
||||
|
@ -1,3 +1,8 @@
|
||||
2016-09-12 Andrew Pinski <apinski@cavium.com>
|
||||
|
||||
* gcc.target/aarch64/thunderxloadpair.c: New testcase.
|
||||
* gcc.target/aarch64/thunderxnoloadpair.c: New testcase.
|
||||
|
||||
2016-09-12 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* gcc.dg/compat/scalar-by-value-4_x.c: Also test passing of
|
||||
|
20
gcc/testsuite/gcc.target/aarch64/thunderxloadpair.c
Normal file
20
gcc/testsuite/gcc.target/aarch64/thunderxloadpair.c
Normal file
@ -0,0 +1,20 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mcpu=thunderx" } */
|
||||
|
||||
struct ldp
|
||||
{
|
||||
long long c;
|
||||
int a, b;
|
||||
};
|
||||
|
||||
|
||||
int f(struct ldp *a)
|
||||
{
|
||||
return a->a + a->b;
|
||||
}
|
||||
|
||||
|
||||
/* We know the alignement of a->a to be 8 byte aligned so it is profitable
|
||||
to do ldp. */
|
||||
/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 1 } } */
|
||||
|
17
gcc/testsuite/gcc.target/aarch64/thunderxnoloadpair.c
Normal file
17
gcc/testsuite/gcc.target/aarch64/thunderxnoloadpair.c
Normal file
@ -0,0 +1,17 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mcpu=thunderx" } */
|
||||
|
||||
struct noldp
|
||||
{
|
||||
int a, b;
|
||||
};
|
||||
|
||||
|
||||
int f(struct noldp *a)
|
||||
{
|
||||
return a->a + a->b;
|
||||
}
|
||||
|
||||
/* We know the alignement of a->a to be 4 byte aligned so it is not profitable
|
||||
to do ldp. */
|
||||
/* { dg-final { scan-assembler-not "ldp\tw\[0-9\]+, w\[0-9\]" } } */
|
Loading…
x
Reference in New Issue
Block a user