rs6000: MMA built-in dies with incorrect sharing of tree nodes error

When we expand our MMA built-ins into gimple, we erroneously reused the
accumulator memory reference for both the source input value as well as
the destination output value.  This led to a tree sharing error.
The solution is to create separate memory references for the input
and output values.

2020-09-01  Peter Bergner  <bergner@linux.ibm.com>

gcc/
	PR target/96808
	* config/rs6000/rs6000-call.c (rs6000_gimple_fold_mma_builtin): Do not
	reuse accumulator memory reference for source and destination accesses.

gcc/testsuite/
	PR target/96808
	* gcc.target/powerpc/pr96808.c: New test.
This commit is contained in:
Peter Bergner 2020-09-01 13:47:44 -05:00
parent b1850c617b
commit 8bc0f24d7a
2 changed files with 62 additions and 7 deletions

View File

@ -11471,12 +11471,8 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
/* Convert this built-in into an internal version that uses pass-by-value
arguments. The internal built-in follows immediately after this one. */
new_decl = rs6000_builtin_decls[fncode + 1];
tree lhs, mem, op[MAX_MMA_OPERANDS];
tree lhs, op[MAX_MMA_OPERANDS];
tree acc = gimple_call_arg (stmt, 0);
if (TREE_CODE (acc) == PARM_DECL)
mem = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (acc)), acc);
else
mem = build_simple_mem_ref (acc);
push_gimplify_context (true);
if ((attr & RS6000_BTC_QUAD) != 0)
@ -11486,7 +11482,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
op[0] = make_ssa_name (vector_quad_type_node);
for (unsigned i = 1; i < nopnds; i++)
op[i] = gimple_call_arg (stmt, i);
gimplify_assign (op[0], mem, &new_seq);
gimplify_assign (op[0], build_simple_mem_ref (acc), &new_seq);
}
else
{
@ -11536,7 +11532,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
lhs = make_ssa_name (vector_quad_type_node);
gimple_call_set_lhs (new_call, lhs);
gimple_seq_add_stmt (&new_seq, new_call);
gimplify_assign (mem, lhs, &new_seq);
gimplify_assign (build_simple_mem_ref (acc), lhs, &new_seq);
pop_gimplify_context (NULL);
gsi_replace_with_seq (gsi, new_seq, true);

View File

@ -0,0 +1,59 @@
/* PR target/96808 */
/* { dg-do compile } */
/* { dg-require-effective-target power10_ok } */
/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
/* Verify we do not ICE on the tests below. */
void
old_ok (__vector_quad *dst, vector unsigned char vc)
{
__vector_quad vq;
__builtin_mma_xxsetaccz(&vq);
__builtin_mma_xvf32gerpp(&vq, vc, vc);
*dst = vq;
}
void
test0 (__vector_quad *dst, vector unsigned char vc)
{
__vector_quad vq[2];
__builtin_mma_xxsetaccz(&vq[1]);
__builtin_mma_xvf32gerpp(&vq[1], vc, vc);
*dst = vq[1];
}
void
test1 (__vector_quad *dst, vector unsigned char vc)
{
__vector_quad vq[2][2];
__builtin_mma_xxsetaccz(&vq[1][1]);
__builtin_mma_xvf32gerpp(&vq[1][1], vc, vc);
*dst = vq[1][1];
}
void
test2 (__vector_quad *dst, vector unsigned char vc)
{
struct {
__vector_quad dummy;
__vector_quad acc;
} vq;
__builtin_mma_xxsetaccz(&vq.acc);
__builtin_mma_xvf32gerpp(&vq.acc, vc, vc);
*dst = vq.acc;
}
void
test3 (__vector_quad *dst, vector unsigned char vc)
{
__builtin_mma_xxsetaccz(&dst[1]);
__builtin_mma_xvf32gerpp(&dst[1], vc, vc);
}
void
test4 (__vector_quad *dst[], vector unsigned char vc)
{
__builtin_mma_xxsetaccz(&dst[1][2]);
__builtin_mma_xvf32gerpp(&dst[1][2], vc, vc);
}