rs6000: Do not allow combining of multiple assemble quads [PR103548]

The compiler will gladly CSE the result of two __builtin_mma_build_acc
calls with the same four vector arguments, leading to illegal MMA
code being generated.  The fix here is to make the mma_assemble_acc
pattern use a unspec_volatile to stop the CSE from happening.

2021-12-14  Peter Bergner  <bergner@linux.ibm.com>

gcc/
	PR target/103548
	* config/rs6000/mma.md (UNSPEC_MMA_ASSEMBLE): Rename unspec from this...
	(UNSPEC_VSX_ASSEMBLE): ...to this.
	(UNSPECV_MMA_ASSEMBLE): New unspecv.
	(vsx_assemble_pair): Use UNSPEC_VSX_ASSEMBLE.
	(*vsx_assemble_pair): Likewise.
	(mma_assemble_acc): Use UNSPECV_MMA_ASSEMBLE.
	(*mma_assemble_acc): Likewise.
	* config/rs6000/rs6000.c (rs6000_split_multireg_move): Handle
	UNSPEC_VOLATILE.  Use UNSPEC_VSX_ASSEMBLE and UNSPECV_MMA_ASSEMBLE.

gcc/testsuite/
	PR target/103548
	* gcc.target/powerpc/mma-builtin-10-pair.c: New test.
	* gcc.target/powerpc/mma-builtin-10-quad.c: New test.
This commit is contained in:
Peter Bergner 2021-12-14 14:50:41 -06:00
parent ca39102e10
commit 15c02ab256
4 changed files with 68 additions and 20 deletions

View File

@ -29,7 +29,7 @@
;; Constants for creating unspecs
(define_c_enum "unspec"
[UNSPEC_MMA_ASSEMBLE
[UNSPEC_VSX_ASSEMBLE
UNSPEC_MMA_EXTRACT
UNSPEC_MMA_PMXVBF16GER2
UNSPEC_MMA_PMXVBF16GER2NN
@ -94,7 +94,8 @@
])
(define_c_enum "unspecv"
[UNSPECV_MMA_XXSETACCZ
[UNSPECV_MMA_ASSEMBLE
UNSPECV_MMA_XXSETACCZ
])
;; MMA instructions with 1 accumulator argument
@ -333,7 +334,7 @@
{
rtx src = gen_rtx_UNSPEC (OOmode,
gen_rtvec (2, operands[1], operands[2]),
UNSPEC_MMA_ASSEMBLE);
UNSPEC_VSX_ASSEMBLE);
emit_move_insn (operands[0], src);
DONE;
})
@ -345,7 +346,7 @@
[(set (match_operand:OO 0 "vsx_register_operand" "=&wa")
(unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
(match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
UNSPEC_MMA_ASSEMBLE))]
UNSPEC_VSX_ASSEMBLE))]
"TARGET_MMA"
"#"
"&& reload_completed"
@ -353,7 +354,7 @@
{
rtx src = gen_rtx_UNSPEC (OOmode,
gen_rtvec (2, operands[1], operands[2]),
UNSPEC_MMA_ASSEMBLE);
UNSPEC_VSX_ASSEMBLE);
rs6000_split_multireg_move (operands[0], src);
DONE;
})
@ -399,10 +400,10 @@
(match_operand:V16QI 4 "mma_assemble_input_operand")]
"TARGET_MMA"
{
rtx src = gen_rtx_UNSPEC (XOmode,
gen_rtvec (4, operands[1], operands[2],
operands[3], operands[4]),
UNSPEC_MMA_ASSEMBLE);
rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode,
gen_rtvec (4, operands[1], operands[2],
operands[3], operands[4]),
UNSPECV_MMA_ASSEMBLE);
emit_move_insn (operands[0], src);
DONE;
})
@ -412,21 +413,22 @@
(define_insn_and_split "*mma_assemble_acc"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
(match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
(match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
(match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
UNSPEC_MMA_ASSEMBLE))]
(unspec_volatile:XO
[(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
(match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
(match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
(match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
UNSPECV_MMA_ASSEMBLE))]
"TARGET_MMA
&& fpr_reg_operand (operands[0], XOmode)"
"#"
"&& reload_completed"
[(const_int 0)]
{
rtx src = gen_rtx_UNSPEC (XOmode,
gen_rtvec (4, operands[1], operands[2],
operands[3], operands[4]),
UNSPEC_MMA_ASSEMBLE);
rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode,
gen_rtvec (4, operands[1], operands[2],
operands[3], operands[4]),
UNSPECV_MMA_ASSEMBLE);
rs6000_split_multireg_move (operands[0], src);
DONE;
})

View File

@ -27071,9 +27071,11 @@ rs6000_split_multireg_move (rtx dst, rtx src)
return;
}
if (GET_CODE (src) == UNSPEC)
if (GET_CODE (src) == UNSPEC
|| GET_CODE (src) == UNSPEC_VOLATILE)
{
gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE
|| XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
gcc_assert (REG_P (dst));
if (GET_MODE (src) == XOmode)
gcc_assert (FP_REGNO_P (REGNO (dst)));

View File

@ -0,0 +1,21 @@
/* { dg-require-effective-target power10_ok } */
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
typedef unsigned char vec_t __attribute__((vector_size(16)));
void
foo (__vector_pair *dst, vec_t *src)
{
__vector_pair pair0, pair1;
/* Adjacent loads should be combined into one lxvp instruction
and identical build pairs should be combined. */
__builtin_vsx_build_pair (&pair0, src[0], src[1]);
__builtin_vsx_build_pair (&pair1, src[0], src[1]);
dst[0] = pair0;
dst[2] = pair1;
}
/* { dg-final { scan-assembler-not {\mlxv\M} } } */
/* { dg-final { scan-assembler-not {\mstxv\M} } } */
/* { dg-final { scan-assembler-times {\mlxvp\M} 1 } } */
/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */

View File

@ -0,0 +1,23 @@
/* { dg-require-effective-target power10_ok } */
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
typedef unsigned char vec_t __attribute__((vector_size(16)));
void
foo (__vector_quad *dst, vec_t *src)
{
__vector_quad quad0, quad1;
/* Adjacent loads should be combined into two lxvp instructions.
and identical build accs should not be combined. */
__builtin_mma_build_acc (&quad0, src[0], src[1], src[2], src[3]);
__builtin_mma_build_acc (&quad1, src[0], src[1], src[2], src[3]);
dst[0] = quad0;
dst[2] = quad1;
}
/* { dg-final { scan-assembler-not {\mlxv\M} } } */
/* { dg-final { scan-assembler-not {\mstxv\M} } } */
/* { dg-final { scan-assembler-times {\mlxvp\M} 4 } } */
/* { dg-final { scan-assembler-times {\mxxmtacc\M} 2 } } */
/* { dg-final { scan-assembler-times {\mxxmfacc\M} 2 } } */
/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */