Enable store fusion on Power10.
gcc/ChangeLog: * config/rs6000/rs6000-cpus.def (ISA_3_1_MASKS_SERVER): Add OPTION_MASK_P10_FUSION_2STORE. (POWERPC_MASKS): Likewise. * config/rs6000/rs6000.c (rs6000_option_override_internal): Enable store fusion for Power10. (is_fusable_store): New. (power10_sched_reorder): Likewise. (rs6000_sched_reorder): Do Power10 specific reordering. (rs6000_sched_reorder2): Likewise. * config/rs6000/rs6000.opt: Add new option. gcc/testsuite/ChangeLog: * gcc.target/powerpc/fusion-p10-stst.c: New test. * gcc.target/powerpc/fusion-p10-stst2.c: New test.
This commit is contained in:
parent
02dbf5d127
commit
d73c44800b
@ -90,7 +90,8 @@
|
||||
| OPTION_MASK_P10_FUSION_2LOGICAL \
|
||||
| OPTION_MASK_P10_FUSION_LOGADD \
|
||||
| OPTION_MASK_P10_FUSION_ADDLOG \
|
||||
| OPTION_MASK_P10_FUSION_2ADD)
|
||||
| OPTION_MASK_P10_FUSION_2ADD \
|
||||
| OPTION_MASK_P10_FUSION_2STORE)
|
||||
|
||||
/* Flags that need to be turned off if -mno-power9-vector. */
|
||||
#define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW \
|
||||
@ -143,6 +144,7 @@
|
||||
| OPTION_MASK_P10_FUSION_LOGADD \
|
||||
| OPTION_MASK_P10_FUSION_ADDLOG \
|
||||
| OPTION_MASK_P10_FUSION_2ADD \
|
||||
| OPTION_MASK_P10_FUSION_2STORE \
|
||||
| OPTION_MASK_HTM \
|
||||
| OPTION_MASK_ISEL \
|
||||
| OPTION_MASK_MFCRF \
|
||||
|
@ -4498,6 +4498,10 @@ rs6000_option_override_internal (bool global_init_p)
|
||||
&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2ADD) == 0)
|
||||
rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2ADD;
|
||||
|
||||
if (TARGET_POWER10
|
||||
&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2STORE) == 0)
|
||||
rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2STORE;
|
||||
|
||||
/* Turn off vector pair/mma options on non-power10 systems. */
|
||||
else if (!TARGET_POWER10 && TARGET_MMA)
|
||||
{
|
||||
@ -18933,6 +18937,89 @@ power9_sched_reorder2 (rtx_insn **ready, int lastpos)
|
||||
return cached_can_issue_more;
|
||||
}
|
||||
|
||||
/* Determine if INSN is a store to memory that can be fused with a similar
|
||||
adjacent store. */
|
||||
|
||||
static bool
|
||||
is_fusable_store (rtx_insn *insn, rtx *str_mem)
|
||||
{
|
||||
/* Insn must be a non-prefixed base+disp form store. */
|
||||
if (is_store_insn (insn, str_mem)
|
||||
&& get_attr_prefixed (insn) == PREFIXED_NO
|
||||
&& get_attr_update (insn) == UPDATE_NO
|
||||
&& get_attr_indexed (insn) == INDEXED_NO)
|
||||
{
|
||||
/* Further restrictions by mode and size. */
|
||||
if (!MEM_SIZE_KNOWN_P (*str_mem))
|
||||
return false;
|
||||
|
||||
machine_mode mode = GET_MODE (*str_mem);
|
||||
HOST_WIDE_INT size = MEM_SIZE (*str_mem);
|
||||
|
||||
if (INTEGRAL_MODE_P (mode))
|
||||
/* Must be word or dword size. */
|
||||
return (size == 4 || size == 8);
|
||||
else if (FLOAT_MODE_P (mode))
|
||||
/* Must be dword size. */
|
||||
return (size == 8);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Do Power10 specific reordering of the ready list. */
|
||||
|
||||
static int
|
||||
power10_sched_reorder (rtx_insn **ready, int lastpos)
|
||||
{
|
||||
rtx mem1;
|
||||
|
||||
/* Do store fusion during sched2 only. */
|
||||
if (!reload_completed)
|
||||
return cached_can_issue_more;
|
||||
|
||||
/* If the prior insn finished off a store fusion pair then simply
|
||||
reset the counter and return, nothing more to do. */
|
||||
if (load_store_pendulum != 0)
|
||||
{
|
||||
load_store_pendulum = 0;
|
||||
return cached_can_issue_more;
|
||||
}
|
||||
|
||||
/* Try to pair certain store insns to adjacent memory locations
|
||||
so that the hardware will fuse them to a single operation. */
|
||||
if (TARGET_P10_FUSION && TARGET_P10_FUSION_2STORE
|
||||
&& is_fusable_store (last_scheduled_insn, &mem1))
|
||||
{
|
||||
|
||||
/* A fusable store was just scheduled. Scan the ready list for another
|
||||
store that it can fuse with. */
|
||||
int pos = lastpos;
|
||||
while (pos >= 0)
|
||||
{
|
||||
rtx mem2;
|
||||
/* GPR stores can be ascending or descending offsets, FPR/VSR stores
|
||||
must be ascending only. */
|
||||
if (is_fusable_store (ready[pos], &mem2)
|
||||
&& ((INTEGRAL_MODE_P (GET_MODE (mem1))
|
||||
&& adjacent_mem_locations (mem1, mem2))
|
||||
|| (FLOAT_MODE_P (GET_MODE (mem1))
|
||||
&& (adjacent_mem_locations (mem1, mem2) == mem1))))
|
||||
{
|
||||
/* Found a fusable store. Move it to the end of the ready list
|
||||
so it is scheduled next. */
|
||||
move_to_end_of_ready (ready, pos, lastpos);
|
||||
|
||||
load_store_pendulum = -1;
|
||||
break;
|
||||
}
|
||||
pos--;
|
||||
}
|
||||
}
|
||||
|
||||
return cached_can_issue_more;
|
||||
}
|
||||
|
||||
/* We are about to begin issuing insns for this clock cycle. */
|
||||
|
||||
static int
|
||||
@ -18959,6 +19046,10 @@ rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
|
||||
if (rs6000_tune == PROCESSOR_POWER6)
|
||||
load_store_pendulum = 0;
|
||||
|
||||
/* Do Power10 dependent reordering. */
|
||||
if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
|
||||
power10_sched_reorder (ready, n_ready - 1);
|
||||
|
||||
return rs6000_issue_rate ();
|
||||
}
|
||||
|
||||
@ -18980,6 +19071,10 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
|
||||
&& recog_memoized (last_scheduled_insn) >= 0)
|
||||
return power9_sched_reorder2 (ready, *pn_ready - 1);
|
||||
|
||||
/* Do Power10 dependent reordering. */
|
||||
if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
|
||||
return power10_sched_reorder (ready, *pn_ready - 1);
|
||||
|
||||
return cached_can_issue_more;
|
||||
}
|
||||
|
||||
|
@ -514,6 +514,10 @@ mpower10-fusion-2add
|
||||
Target Undocumented Mask(P10_FUSION_2ADD) Var(rs6000_isa_flags)
|
||||
Fuse dependent pairs of add or vaddudm instructions for better performance on power10.
|
||||
|
||||
mpower10-fusion-2store
|
||||
Target Undocumented Mask(P10_FUSION_2STORE) Var(rs6000_isa_flags)
|
||||
Fuse certain store operations together for better performance on power10.
|
||||
|
||||
mcrypto
|
||||
Target Mask(CRYPTO) Var(rs6000_isa_flags)
|
||||
Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.
|
||||
|
31
gcc/testsuite/gcc.target/powerpc/fusion-p10-stst.c
Normal file
31
gcc/testsuite/gcc.target/powerpc/fusion-p10-stst.c
Normal file
@ -0,0 +1,31 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
|
||||
|
||||
/* Verify store fusion is enabled */
|
||||
|
||||
void fuse_stw (int *i, int a, int b, int c)
|
||||
{
|
||||
i[1] = a;
|
||||
i[5] = b;
|
||||
i[2] = c;
|
||||
}
|
||||
|
||||
void fuse_std (long *i, long a, long b, long c)
|
||||
{
|
||||
i[1] = a;
|
||||
i[5] = b;
|
||||
i[2] = c;
|
||||
}
|
||||
|
||||
void fuse_stfd (double *i, double a, double b, double c)
|
||||
{
|
||||
i[1] = a;
|
||||
i[5] = b;
|
||||
i[2] = c;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {stw 4,4\(3\)\n\tstw 6,8\(3\)} 1 { target lp64 } } } */
|
||||
/* { dg-final { scan-assembler-times {stw 4,4\(3\)\n\tstw 6,8\(3\)} 2 { target ilp32 } } } */
|
||||
/* { dg-final { scan-assembler-times {std 4,8\(3\)\n\tstd 6,16\(3\)} 1 { target lp64 } } } */
|
||||
/* { dg-final { scan-assembler-times {stfd 1,8\(3\)\n\tstfd 3,16\(3\)} 1 } } */
|
||||
|
30
gcc/testsuite/gcc.target/powerpc/fusion-p10-stst2.c
Normal file
30
gcc/testsuite/gcc.target/powerpc/fusion-p10-stst2.c
Normal file
@ -0,0 +1,30 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -mno-power10-fusion -O2" } */
|
||||
|
||||
/* Verify store fusion is disabled */
|
||||
|
||||
void fuse_stw (int *i, int a, int b, int c)
|
||||
{
|
||||
i[1] = a;
|
||||
i[5] = b;
|
||||
i[2] = c;
|
||||
}
|
||||
|
||||
void fuse_std (long *i, long a, long b, long c)
|
||||
{
|
||||
i[1] = a;
|
||||
i[5] = b;
|
||||
i[2] = c;
|
||||
}
|
||||
|
||||
void fuse_stfd (double *i, double a, double b, double c)
|
||||
{
|
||||
i[1] = a;
|
||||
i[5] = b;
|
||||
i[2] = c;
|
||||
}
|
||||
|
||||
|
||||
/* { dg-final { scan-assembler-not {stw 4,4\(3\)\n\tstw 6,8\(3\)} } } */
|
||||
/* { dg-final { scan-assembler-not {std 4,8\(3\)\n\tstd 6,16\(3\)} { target lp64 } } } */
|
||||
/* { dg-final { scan-assembler-not {stfd 1,8\(3\)\n\tstfd 3,16\(3\)} } } */
|
Loading…
Reference in New Issue
Block a user