rs6000: Do not allow combining of multiple assemble quads [PR103548]
The compiler will gladly CSE the result of two __builtin_mma_build_acc calls with the same four vector arguments, leading to illegal MMA code being generated. The fix here is to make the mma_assemble_acc pattern use a unspec_volatile to stop the CSE from happening. 2021-12-14 Peter Bergner <bergner@linux.ibm.com> gcc/ PR target/103548 * config/rs6000/mma.md (UNSPEC_MMA_ASSEMBLE): Rename unspec from this... (UNSPEC_VSX_ASSEMBLE): ...to this. (UNSPECV_MMA_ASSEMBLE): New unspecv. (vsx_assemble_pair): Use UNSPEC_VSX_ASSEMBLE. (*vsx_assemble_pair): Likewise. (mma_assemble_acc): Use UNSPECV_MMA_ASSEMBLE. (*mma_assemble_acc): Likewise. * config/rs6000/rs6000.c (rs6000_split_multireg_move): Handle UNSPEC_VOLATILE. Use UNSPEC_VSX_ASSEMBLE and UNSPECV_MMA_ASSEMBLE. gcc/testsuite/ PR target/103548 * gcc.target/powerpc/mma-builtin-10-pair.c: New test. * gcc.target/powerpc/mma-builtin-10-quad.c: New test.
This commit is contained in:
parent
ca39102e10
commit
15c02ab256
@ -29,7 +29,7 @@
|
||||
;; Constants for creating unspecs
|
||||
|
||||
(define_c_enum "unspec"
|
||||
[UNSPEC_MMA_ASSEMBLE
|
||||
[UNSPEC_VSX_ASSEMBLE
|
||||
UNSPEC_MMA_EXTRACT
|
||||
UNSPEC_MMA_PMXVBF16GER2
|
||||
UNSPEC_MMA_PMXVBF16GER2NN
|
||||
@ -94,7 +94,8 @@
|
||||
])
|
||||
|
||||
(define_c_enum "unspecv"
|
||||
[UNSPECV_MMA_XXSETACCZ
|
||||
[UNSPECV_MMA_ASSEMBLE
|
||||
UNSPECV_MMA_XXSETACCZ
|
||||
])
|
||||
|
||||
;; MMA instructions with 1 accumulator argument
|
||||
@ -333,7 +334,7 @@
|
||||
{
|
||||
rtx src = gen_rtx_UNSPEC (OOmode,
|
||||
gen_rtvec (2, operands[1], operands[2]),
|
||||
UNSPEC_MMA_ASSEMBLE);
|
||||
UNSPEC_VSX_ASSEMBLE);
|
||||
emit_move_insn (operands[0], src);
|
||||
DONE;
|
||||
})
|
||||
@ -345,7 +346,7 @@
|
||||
[(set (match_operand:OO 0 "vsx_register_operand" "=&wa")
|
||||
(unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
|
||||
(match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
|
||||
UNSPEC_MMA_ASSEMBLE))]
|
||||
UNSPEC_VSX_ASSEMBLE))]
|
||||
"TARGET_MMA"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
@ -353,7 +354,7 @@
|
||||
{
|
||||
rtx src = gen_rtx_UNSPEC (OOmode,
|
||||
gen_rtvec (2, operands[1], operands[2]),
|
||||
UNSPEC_MMA_ASSEMBLE);
|
||||
UNSPEC_VSX_ASSEMBLE);
|
||||
rs6000_split_multireg_move (operands[0], src);
|
||||
DONE;
|
||||
})
|
||||
@ -399,10 +400,10 @@
|
||||
(match_operand:V16QI 4 "mma_assemble_input_operand")]
|
||||
"TARGET_MMA"
|
||||
{
|
||||
rtx src = gen_rtx_UNSPEC (XOmode,
|
||||
gen_rtvec (4, operands[1], operands[2],
|
||||
operands[3], operands[4]),
|
||||
UNSPEC_MMA_ASSEMBLE);
|
||||
rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode,
|
||||
gen_rtvec (4, operands[1], operands[2],
|
||||
operands[3], operands[4]),
|
||||
UNSPECV_MMA_ASSEMBLE);
|
||||
emit_move_insn (operands[0], src);
|
||||
DONE;
|
||||
})
|
||||
@ -412,21 +413,22 @@
|
||||
|
||||
(define_insn_and_split "*mma_assemble_acc"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
|
||||
(match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
|
||||
(match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
|
||||
(match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
|
||||
UNSPEC_MMA_ASSEMBLE))]
|
||||
(unspec_volatile:XO
|
||||
[(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
|
||||
(match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
|
||||
(match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
|
||||
(match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
|
||||
UNSPECV_MMA_ASSEMBLE))]
|
||||
"TARGET_MMA
|
||||
&& fpr_reg_operand (operands[0], XOmode)"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(const_int 0)]
|
||||
{
|
||||
rtx src = gen_rtx_UNSPEC (XOmode,
|
||||
gen_rtvec (4, operands[1], operands[2],
|
||||
operands[3], operands[4]),
|
||||
UNSPEC_MMA_ASSEMBLE);
|
||||
rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode,
|
||||
gen_rtvec (4, operands[1], operands[2],
|
||||
operands[3], operands[4]),
|
||||
UNSPECV_MMA_ASSEMBLE);
|
||||
rs6000_split_multireg_move (operands[0], src);
|
||||
DONE;
|
||||
})
|
||||
|
@ -27071,9 +27071,11 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
||||
return;
|
||||
}
|
||||
|
||||
if (GET_CODE (src) == UNSPEC)
|
||||
if (GET_CODE (src) == UNSPEC
|
||||
|| GET_CODE (src) == UNSPEC_VOLATILE)
|
||||
{
|
||||
gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
|
||||
gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE
|
||||
|| XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
|
||||
gcc_assert (REG_P (dst));
|
||||
if (GET_MODE (src) == XOmode)
|
||||
gcc_assert (FP_REGNO_P (REGNO (dst)));
|
||||
|
21
gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c
Normal file
21
gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c
Normal file
@ -0,0 +1,21 @@
|
||||
/* { dg-require-effective-target power10_ok } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
|
||||
|
||||
typedef unsigned char vec_t __attribute__((vector_size(16)));
|
||||
|
||||
void
|
||||
foo (__vector_pair *dst, vec_t *src)
|
||||
{
|
||||
__vector_pair pair0, pair1;
|
||||
/* Adjacent loads should be combined into one lxvp instruction
|
||||
and identical build pairs should be combined. */
|
||||
__builtin_vsx_build_pair (&pair0, src[0], src[1]);
|
||||
__builtin_vsx_build_pair (&pair1, src[0], src[1]);
|
||||
dst[0] = pair0;
|
||||
dst[2] = pair1;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not {\mlxv\M} } } */
|
||||
/* { dg-final { scan-assembler-not {\mstxv\M} } } */
|
||||
/* { dg-final { scan-assembler-times {\mlxvp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */
|
23
gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c
Normal file
23
gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c
Normal file
@ -0,0 +1,23 @@
|
||||
/* { dg-require-effective-target power10_ok } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
|
||||
|
||||
typedef unsigned char vec_t __attribute__((vector_size(16)));
|
||||
|
||||
void
|
||||
foo (__vector_quad *dst, vec_t *src)
|
||||
{
|
||||
__vector_quad quad0, quad1;
|
||||
/* Adjacent loads should be combined into two lxvp instructions.
|
||||
and identical build accs should not be combined. */
|
||||
__builtin_mma_build_acc (&quad0, src[0], src[1], src[2], src[3]);
|
||||
__builtin_mma_build_acc (&quad1, src[0], src[1], src[2], src[3]);
|
||||
dst[0] = quad0;
|
||||
dst[2] = quad1;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not {\mlxv\M} } } */
|
||||
/* { dg-final { scan-assembler-not {\mstxv\M} } } */
|
||||
/* { dg-final { scan-assembler-times {\mlxvp\M} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxxmtacc\M} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxxmfacc\M} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */
|
Loading…
Reference in New Issue
Block a user