tree-optimization/94988 - enhance SM some more

This enhances store-order preserving store motion to handle the case
of non-invariant dependent stores in the sequence of unconditionally
executed stores on exit by re-issueing them as part of the sequence
of stores on the exit.  This fixes the observed regression of
gcc.target/i386/pr64110.c which relies on store-motion of 'b'
for a loop like

  for (int i = 0; i < j; ++i)
    *b++ = x;

where for correctness we now no longer apply store-motion.  With
the patch we emit the correct

  tem = b;
  for (int i = 0; i < j; ++i)
    {
      tem = tem + 1;
      *tem = x;
    }
  b = tem;
  *tem = x;

preserving the original order of stores.  A testcase reflecting
the miscompilation done by earlier GCC is added as well.

This also fixes the reported ICE in PR95025 and adds checking code
to catch it earlier - the issue was not-supported refs propagation
leaving stray refs in the sequence.

2020-05-11  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/94988
	PR tree-optimization/95025
	* tree-ssa-loop-im.c (seq_entry): Make a struct, add from.
	(sm_seq_push_down): Take extra parameter denoting where we
	moved the ref to.
	(execute_sm_exit): Re-issue sm_other stores in the correct
	order.
	(sm_seq_valid_bb): When always executed, allow sm_other to
	prevail inbetween sm_ord and record their stored value.
	(hoist_memory_references): Adjust refs_not_supported propagation
	and prune sm_other from the end of the ordered sequences.

	* gcc.dg/torture/pr94988.c: New testcase.
	* gcc.dg/torture/pr95025.c: Likewise.
	* gcc.dg/torture/pr95045.c: Likewise.
	* g++.dg/asan/pr95025.C: New testcase.
This commit is contained in:
Richard Biener 2020-05-08 12:03:30 +02:00
parent 892c7427ee
commit b6ff3ddecf
7 changed files with 241 additions and 49 deletions

View File

@ -1,3 +1,17 @@
2020-05-11 Richard Biener <rguenther@suse.de>
PR tree-optimization/94988
PR tree-optimization/95025
* tree-ssa-loop-im.c (seq_entry): Make a struct, add from.
(sm_seq_push_down): Take extra parameter denoting where we
moved the ref to.
(execute_sm_exit): Re-issue sm_other stores in the correct
order.
(sm_seq_valid_bb): When always executed, allow sm_other to
prevail inbetween sm_ord and record their stored value.
(hoist_memory_references): Adjust refs_not_supported propagation
and prune sm_other from the end of the ordered sequences.
2020-05-11 Felix Yang <felix.yang@huawei.com>
PR target/94991

View File

@ -1,3 +1,12 @@
2020-05-11 Richard Biener <rguenther@suse.de>
PR tree-optimization/94988
PR tree-optimization/95025
* gcc.dg/torture/pr94988.c: New testcase.
* gcc.dg/torture/pr95025.c: Likewise.
* gcc.dg/torture/pr95045.c: Likewise.
* g++.dg/asan/pr95025.C: New testcase.
2020-05-11 Jakub Jelinek <jakub@redhat.com>
Tobias Burnus <tobias@codesourcery.com>

View File

@ -0,0 +1,28 @@
// { dg-do compile }
// { dg-options "-O2 -fsanitize=address" }
struct a {
int b;
} * c;
struct d {
d *e;
};
struct f {
bool done;
d *g;
};
int h;
int i(f *j) {
if (j->g) {
j->g = j->g->e;
return h;
}
j->done = true;
return 0;
}
void k(bool j) { c->b = j; }
void l() {
f a;
for (; !(&a)->done; i(&a))
k(true);
}

View File

@ -0,0 +1,20 @@
/* { dg-do run } */
short *b;
void __attribute__((noipa))
bar (short x, int j)
{
for (int i = 0; i < j; ++i)
*b++ = x;
}
int
main()
{
b = (short *)&b;
bar (0, 1);
if ((short)(__UINTPTR_TYPE__)b != 0)
__builtin_abort ();
return 0;
}

View File

@ -0,0 +1,13 @@
/* { dg-do compile } */
static int a;
short b;
int *c;
void d() {
for (;; a -= 1)
for (; b; b += 1) {
*c ^= 5;
if (a)
return;
}
}

View File

@ -0,0 +1,29 @@
/* { dg-do run } */
int a, c, f;
long b;
char d;
int e[3];
int g[9][3][2];
int main()
{
{
h:
for (f = 0; f <= 5; f++) {
b = 3;
for (; b >= 0; b--) {
e[2] = d = 0;
for (; d <= 3; d++) {
g[8][2][0] = e[1] = c = 0;
for (; c <= 1; c++)
e[c + 1] = g[d + 5][2][c] = 4;
}
if (a)
goto h;
}
}
}
if (e[2] != 4)
__builtin_abort ();
return 0;
}

View File

@ -2209,7 +2209,14 @@ execute_sm (class loop *loop, im_mem_ref *ref,
able to execute in arbitrary order with respect to other stores
sm_other is used for stores we do not try to apply store motion to. */
enum sm_kind { sm_ord, sm_unord, sm_other };
typedef std::pair<unsigned, sm_kind> seq_entry;
struct seq_entry
{
seq_entry (unsigned f, sm_kind k, tree fr = NULL)
: first (f), second (k), from (fr) {}
unsigned first;
sm_kind second;
tree from;
};
static void
execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
@ -2218,35 +2225,54 @@ execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
/* Sink the stores to exit from the loop. */
for (unsigned i = seq.length (); i > 0; --i)
{
if (seq[i-1].second != kind)
continue;
im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
sm_aux *aux = *aux_map.get (ref);
if (!aux->store_flag)
if (seq[i-1].second == sm_other)
{
gassign *store;
store = gimple_build_assign (unshare_expr (ref->mem.ref),
aux->tmp_var);
gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Re-issueing dependent store of ");
print_generic_expr (dump_file, ref->mem.ref);
fprintf (dump_file, " from loop %d on exit %d -> %d\n",
loop->num, ex->src->index, ex->dest->index);
}
gassign *store = gimple_build_assign (unshare_expr (ref->mem.ref),
seq[i-1].from);
gsi_insert_on_edge (ex, store);
}
else
execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var, aux->store_flag,
loop_preheader_edge (loop), &aux->flag_bbs);
{
sm_aux *aux = *aux_map.get (ref);
if (!aux->store_flag)
{
gassign *store;
store = gimple_build_assign (unshare_expr (ref->mem.ref),
aux->tmp_var);
gsi_insert_on_edge (ex, store);
}
else
execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var,
aux->store_flag,
loop_preheader_edge (loop), &aux->flag_bbs);
}
}
}
/* Push the SM candidate at index PTR in the sequence SEQ down until
we hit the next SM candidate. Return true if that went OK and
false if we could not disambiguate agains another unrelated ref. */
false if we could not disambiguate agains another unrelated ref.
Update *AT to the index where the candidate now resides. */
static bool
sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr)
sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
{
*at = ptr;
for (; ptr > 0; --ptr)
{
seq_entry &new_cand = seq[ptr];
seq_entry &against = seq[ptr-1];
if (against.second == sm_ord)
if (against.second == sm_ord
|| (against.second == sm_other && against.from != NULL_TREE))
/* Found the tail of the sequence. */
break;
if (!refs_independent_p (memory_accesses.refs_list[new_cand.first],
@ -2255,6 +2281,7 @@ sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr)
/* ??? Prune new_cand from the list of refs to apply SM to. */
return false;
std::swap (new_cand, against);
*at = ptr - 1;
}
return true;
}
@ -2367,37 +2394,41 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
not order-preserving SM code. */
if (first_edge_seq[i].first != edge_seq[i].first)
{
bitmap_set_bit (refs_not_supported,
first_edge_seq[i].first);
bitmap_set_bit (refs_not_supported, edge_seq[i].first);
first_edge_seq[i].second = sm_unord;
if (first_edge_seq[i].second == sm_ord)
bitmap_set_bit (refs_not_supported,
first_edge_seq[i].first);
if (edge_seq[i].second == sm_ord)
bitmap_set_bit (refs_not_supported, edge_seq[i].first);
first_edge_seq[i].second = sm_other;
}
/* sm_unord prevails. */
/* sm_other prevails. */
else if (first_edge_seq[i].second != edge_seq[i].second)
{
/* This is just an optimization. */
gcc_assert (bitmap_bit_p (refs_not_supported,
first_edge_seq[i].first));
first_edge_seq[i].second = sm_unord;
first_edge_seq[i].second = sm_other;
}
}
/* Any excess elements become sm_unord since they are now
/* Any excess elements become sm_other since they are now
coonditionally executed. */
if (first_edge_seq.length () > edge_seq.length ())
{
for (unsigned i = edge_seq.length ();
i < first_edge_seq.length (); ++i)
{
bitmap_set_bit (refs_not_supported,
first_edge_seq[i].first);
first_edge_seq[i].second = sm_unord;
if (first_edge_seq[i].second == sm_ord)
bitmap_set_bit (refs_not_supported,
first_edge_seq[i].first);
first_edge_seq[i].second = sm_other;
}
}
else if (edge_seq.length () > first_edge_seq.length ())
{
for (unsigned i = first_edge_seq.length ();
i < edge_seq.length (); ++i)
bitmap_set_bit (refs_not_supported, edge_seq[i].first);
if (edge_seq[i].second == sm_ord)
bitmap_set_bit (refs_not_supported, edge_seq[i].first);
}
}
/* Use the sequence from the first edge and push SMs down. */
@ -2407,17 +2438,13 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
break;
unsigned id = first_edge_seq[i].first;
seq.safe_push (first_edge_seq[i]);
unsigned new_idx;
if (first_edge_seq[i].second == sm_ord
&& !sm_seq_push_down (seq, seq.length () - 1))
&& !sm_seq_push_down (seq, seq.length () - 1, &new_idx))
{
bitmap_set_bit (refs_not_supported, id);
/* ??? Mark it sm_unord but it's now "somewhere" ... */
for (unsigned i = seq.length (); i != 0; --i)
if (seq[i - 1].first == id)
{
seq[i - 1].second = sm_unord;
break;
}
/* Mark it sm_other. */
seq[new_idx].second = sm_other;
}
}
return 1;
@ -2429,21 +2456,21 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
/* One of the stores we want to apply SM to and we've not yet seen. */
else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
{
seq.safe_push (std::make_pair (data->ref, sm_ord));
seq.safe_push (seq_entry (data->ref, sm_ord));
/* 1) push it down the queue until a SMed
and not ignored ref is reached, skipping all not SMed refs
and ignored refs via non-TBAA disambiguation. */
if (!sm_seq_push_down (seq, seq.length () - 1))
unsigned new_idx;
if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx)
/* If that fails but we did not fork yet continue, we'll see
to re-materialize all of the stores in the sequence then.
Further stores will only be pushed up to this one. */
&& forked)
{
bitmap_set_bit (refs_not_supported, data->ref);
/* ??? Mark it sm_unord but it's now "somewhere" ... */
for (unsigned i = seq.length (); i != 0; --i)
if (seq[i - 1].first == data->ref)
{
seq[i - 1].second = sm_unord;
break;
}
/* Mark it sm_other. */
seq[new_idx].second = sm_other;
}
/* 2) check whether we've seen all refs we want to SM and if so
@ -2453,7 +2480,8 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
}
else
/* Another store not part of the final sequence. Simply push it. */
seq.safe_push (std::make_pair (data->ref, sm_other));
seq.safe_push (seq_entry (data->ref, sm_other,
gimple_assign_rhs1 (def)));
vdef = gimple_vuse (def);
}
@ -2513,21 +2541,72 @@ hoist_memory_references (class loop *loop, bitmap mem_refs,
std::pair<edge, vec<seq_entry> > *seq;
FOR_EACH_VEC_ELT (sms, i, seq)
{
bool need_to_push = false;
for (unsigned i = 0; i < seq->second.length (); ++i)
{
if (seq->second[i].second == sm_other)
sm_kind kind = seq->second[i].second;
if (kind == sm_other && seq->second[i].from == NULL_TREE)
break;
unsigned id = seq->second[i].first;
if (bitmap_bit_p (refs_not_supported, id))
seq->second[i].second = sm_other;
else if (!sm_seq_push_down (seq->second, i))
unsigned new_idx;
if (kind == sm_ord
&& bitmap_bit_p (refs_not_supported, id))
{
if (bitmap_set_bit (refs_not_supported, id))
changed = true;
seq->second[i].second = sm_other;
gcc_assert (seq->second[i].from == NULL_TREE);
need_to_push = true;
}
else if (need_to_push
&& !sm_seq_push_down (seq->second, i, &new_idx))
{
/* We need to push down both sm_ord and sm_other
but for the latter we need to disqualify all
following refs. */
if (kind == sm_ord)
{
if (bitmap_set_bit (refs_not_supported, id))
changed = true;
seq->second[new_idx].second = sm_other;
}
else
{
for (unsigned j = seq->second.length () - 1;
j > new_idx; --j)
if (seq->second[j].second == sm_ord
&& bitmap_set_bit (refs_not_supported,
seq->second[j].first))
changed = true;
seq->second.truncate (new_idx);
break;
}
}
}
}
}
std::pair<edge, vec<seq_entry> > *seq;
FOR_EACH_VEC_ELT (sms, i, seq)
{
/* Prune sm_other from the end. */
while (!seq->second.is_empty ()
&& seq->second.last ().second == sm_other)
seq->second.pop ();
/* Prune duplicates from the start. */
auto_bitmap seen (&lim_bitmap_obstack);
unsigned j, k;
for (j = k = 0; j < seq->second.length (); ++j)
if (bitmap_set_bit (seen, seq->second[j].first))
{
if (k != j)
seq->second[k] = seq->second[j];
++k;
}
seq->second.truncate (k);
/* And verify. */
seq_entry *e;
FOR_EACH_VEC_ELT (seq->second, j, e)
gcc_assert (e->second == sm_ord
|| (e->second == sm_other && e->from != NULL_TREE));
}
/* Verify dependence for refs we cannot handle with the order preserving
code (refs_not_supported) or prune them from mem_refs. */
@ -2540,7 +2619,7 @@ hoist_memory_references (class loop *loop, bitmap mem_refs,
/* We've now verified store order for ref with respect to all other
stores in the loop does not matter. */
else
unord_refs.safe_push (std::make_pair (i, sm_unord));
unord_refs.safe_push (seq_entry (i, sm_unord));
}
hash_map<im_mem_ref *, sm_aux *> aux_map;