rs6000: Add new __builtin_vsx_build_pair and __builtin_mma_build_acc built-ins

The __builtin_vsx_assemble_pair and __builtin_mma_assemble_acc built-ins
currently assign their first source operand to the first VSX register
in a pair/quad, their second operand to the second register in a pair/quad, etc.
This is not endian friendly and forces the user to generate different calls
depending on endianness.  In agreement with the POWER LLVM team, we've
decided to lightly deprecate the assemble built-ins and replace them with
"build" built-ins that automatically handle endianness so the same built-in
call and be used for both little-endian and big-endian compiles.  We are not
removing the assemble built-ins, since there is code in the wild that use
them, but we are removing their documentation to encourage the use of the
new "build" variants.

gcc/
	* config/rs6000/rs6000-builtin.def (build_pair): New built-in.
	(build_acc): Likewise.
	* config/rs6000/rs6000-call.c (mma_expand_builtin): Swap assemble
	source operands in little-endian mode.
	(rs6000_gimple_fold_mma_builtin): Handle VSX_BUILTIN_BUILD_PAIR.
	(mma_init_builtins): Likewise.
	* config/rs6000/rs6000.c (rs6000_split_multireg_move): Handle endianness
	ordering for the MMA assemble and build source operands.
	* doc/extend.texi (__builtin_vsx_build_acc, __builtin_mma_build_pair):
	Document.
	(__builtin_mma_assemble_acc, __builtin_mma_assemble_pair): Remove
	documentation.

gcc/testsuite/
	* gcc.target/powerpc/mma-builtin-4.c (__builtin_vsx_build_pair): Add
	tests.  Update expected counts.
	* gcc.target/powerpc/mma-builtin-5.c (__builtin_mma_build_acc): Add
	tests.  Update expected counts.
This commit is contained in:
Peter Bergner 2021-06-10 13:54:12 -05:00
parent 5ae4a73057
commit 00d07ec6e1
6 changed files with 58 additions and 13 deletions

View File

@ -3265,6 +3265,7 @@ BU_MMA_2 (DISASSEMBLE_ACC, "disassemble_acc", QUAD, mma_disassemble_acc)
BU_MMA_V2 (DISASSEMBLE_PAIR, "disassemble_pair", PAIR, vsx_disassemble_pair)
BU_COMPAT (VSX_BUILTIN_DISASSEMBLE_PAIR, "mma_disassemble_pair")
BU_MMA_V3 (BUILD_PAIR, "build_pair", MISC, vsx_assemble_pair)
BU_MMA_V3 (ASSEMBLE_PAIR, "assemble_pair", MISC, vsx_assemble_pair)
BU_COMPAT (VSX_BUILTIN_ASSEMBLE_PAIR, "mma_assemble_pair")
BU_MMA_3 (XVBF16GER2, "xvbf16ger2", MISC, mma_xvbf16ger2)
@ -3297,6 +3298,7 @@ BU_MMA_3 (XVI8GER4SPP, "xvi8ger4spp", QUAD, mma_xvi8ger4spp)
BU_MMA_3 (XVI16GER2PP, "xvi16ger2pp", QUAD, mma_xvi16ger2pp)
BU_MMA_3 (XVI16GER2SPP, "xvi16ger2spp", QUAD, mma_xvi16ger2spp)
BU_MMA_5 (BUILD_ACC, "build_acc", MISC, mma_assemble_acc)
BU_MMA_5 (ASSEMBLE_ACC, "assemble_acc", MISC, mma_assemble_acc)
BU_MMA_5 (PMXVF32GER, "pmxvf32ger", MISC, mma_pmxvf32ger)
BU_MMA_5 (PMXVF64GER, "pmxvf64ger", PAIR, mma_pmxvf64ger)

View File

@ -10244,12 +10244,23 @@ mma_expand_builtin (tree exp, rtx target, bool *expandedp)
pat = GEN_FCN (icode) (op[0], op[1]);
break;
case 3:
/* The ASSEMBLE builtin source operands are reversed in little-endian
mode, so reorder them. */
if (fcode == VSX_BUILTIN_ASSEMBLE_PAIR_INTERNAL && !WORDS_BIG_ENDIAN)
std::swap (op[1], op[2]);
pat = GEN_FCN (icode) (op[0], op[1], op[2]);
break;
case 4:
pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
break;
case 5:
/* The ASSEMBLE builtin source operands are reversed in little-endian
mode, so reorder them. */
if (fcode == MMA_BUILTIN_ASSEMBLE_ACC_INTERNAL && !WORDS_BIG_ENDIAN)
{
std::swap (op[1], op[4]);
std::swap (op[2], op[3]);
}
pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
break;
case 6:
@ -11961,7 +11972,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
gcc_unreachable ();
}
if (fncode == VSX_BUILTIN_ASSEMBLE_PAIR)
if (fncode == VSX_BUILTIN_BUILD_PAIR || fncode == VSX_BUILTIN_ASSEMBLE_PAIR)
lhs = make_ssa_name (vector_pair_type_node);
else
lhs = make_ssa_name (vector_quad_type_node);
@ -14293,8 +14304,10 @@ mma_init_builtins (void)
machine_mode mode = insn_data[icode].operand[j].mode;
if (gimple_func && mode == XOmode)
op[nopnds++] = build_pointer_type (vector_quad_type_node);
else if (gimple_func && mode == OOmode
&& d->code == VSX_BUILTIN_ASSEMBLE_PAIR)
else if (gimple_func
&& mode == OOmode
&& (d->code == VSX_BUILTIN_BUILD_PAIR
|| d->code == VSX_BUILTIN_ASSEMBLE_PAIR))
op[nopnds++] = build_pointer_type (vector_pair_type_node);
else
/* MMA uses unsigned types. */

View File

@ -16807,9 +16807,11 @@ rs6000_split_multireg_move (rtx dst, rtx src)
gcc_assert (VSX_REGNO_P (REGNO (dst)));
reg_mode = GET_MODE (XVECEXP (src, 0, 0));
for (int i = 0; i < XVECLEN (src, 0); i++)
int nvecs = XVECLEN (src, 0);
for (int i = 0; i < nvecs; i++)
{
rtx dst_i = gen_rtx_REG (reg_mode, reg + i);
int index = WORDS_BIG_ENDIAN ? i : nvecs - 1 - i;
rtx dst_i = gen_rtx_REG (reg_mode, reg + index);
emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
}

View File

@ -20723,10 +20723,10 @@ void __builtin_mma_xxmtacc (__vector_quad *);
void __builtin_mma_xxmfacc (__vector_quad *);
void __builtin_mma_xxsetaccz (__vector_quad *);
void __builtin_mma_assemble_acc (__vector_quad *, vec_t, vec_t, vec_t, vec_t);
void __builtin_mma_build_acc (__vector_quad *, vec_t, vec_t, vec_t, vec_t);
void __builtin_mma_disassemble_acc (void *, __vector_quad *);
void __builtin_vsx_assemble_pair (__vector_pair *, vec_t, vec_t);
void __builtin_vsx_build_pair (__vector_pair *, vec_t, vec_t);
void __builtin_vsx_disassemble_pair (void *, __vector_pair *);
vec_t __builtin_vsx_xvcvspbf16 (vec_t);

View File

@ -20,6 +20,14 @@ foo2 (__vector_pair *dst, vec_t *src)
*dst = pair;
}
void
foo3 (__vector_pair *dst, vec_t *src)
{
__vector_pair pair;
__builtin_vsx_build_pair (&pair, src[4], src[0]);
*dst = pair;
}
void
bar (vec_t *dst, __vector_pair *src)
{
@ -54,8 +62,12 @@ bar2 (vec_t *dst, __vector_pair *src)
# error "__has_builtin (__builtin_mma_disassemble_pair) failed"
#endif
/* { dg-final { scan-assembler-times {\mlxv\M} 4 } } */
#if !__has_builtin (__builtin_vsx_build_pair)
# error "__has_builtin (__builtin_vsx_build_pair) failed"
#endif
/* { dg-final { scan-assembler-times {\mlxv\M} 6 } } */
/* { dg-final { scan-assembler-times {\mlxvp\M} 2 } } */
/* { dg-final { scan-assembler-times {\mstxv\M} 4 } } */
/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */
/* { dg-final { scan-assembler-times {\mstxvp\M} 3 } } */

View File

@ -12,6 +12,14 @@ foo (__vector_quad *dst, vec_t *src)
*dst = acc;
}
void
foo2 (__vector_quad *dst, vec_t *src)
{
__vector_quad acc;
__builtin_mma_build_acc (&acc, src[12], src[8], src[4], src[0]);
*dst = acc;
}
void
bar (vec_t *dst, __vector_quad *src)
{
@ -23,9 +31,17 @@ bar (vec_t *dst, __vector_quad *src)
dst[12] = res[3];
}
/* { dg-final { scan-assembler-times {\mlxv\M} 4 } } */
#if !__has_builtin (__builtin_mma_assemble_acc)
# error "__has_builtin (__builtin_mma_assemble_acc) failed"
#endif
#if !__has_builtin (__builtin_mma_build_acc)
# error "__has_builtin (__builtin_mma_build_acc) failed"
#endif
/* { dg-final { scan-assembler-times {\mlxv\M} 8 } } */
/* { dg-final { scan-assembler-times {\mlxvp\M} 2 } } */
/* { dg-final { scan-assembler-times {\mstxv\M} 4 } } */
/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */
/* { dg-final { scan-assembler-times {\mxxmfacc\M} 2 } } */
/* { dg-final { scan-assembler-times {\mxxmtacc\M} 2 } } */
/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */
/* { dg-final { scan-assembler-times {\mxxmfacc\M} 3 } } */
/* { dg-final { scan-assembler-times {\mxxmtacc\M} 3 } } */