tree-optimization/102436 - restore loop store motion

This restores a case of conditional store motion we fail to handle
after the rewrite.  We can recognize the special case of all
stores in a loop happening in a single conditionally executed
block which ensures stores are not re-ordered by executing them
in different loop iterations.  Separating out the case avoids
complicating the already complex main path.

2021-11-18  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/102436
	* tree-ssa-loop-im.c (execute_sm_if_changed): Add mode
	to just create the if structure and return the then block.
	(execute_sm): Add flag to indicate the var will re-use
	another flag var.
	(hoist_memory_references): Support a single conditional
	block with all stores as special case.

	* gcc.dg/torture/20211118-1.c: New testcase.
	* gcc.dg/tree-ssa/ssa-lim-18.c: Likewise.
This commit is contained in:
Richard Biener 2021-11-18 13:40:32 +01:00
parent 09d462146b
commit 0fc859f5ef
3 changed files with 199 additions and 9 deletions

View File

@ -0,0 +1,27 @@
/* { dg-do run } */
unsigned p[3];
void __attribute__((noipa))
bar (float *q, int n, int m)
{
for (int i = 0; i < m; ++i)
{
if (i == n)
{
unsigned a = p[1];
p[1] = a + 1;
*q = 1.;
}
q++;
}
}
int main()
{
#if __SIZEOF_FLOAT__ == __SIZEOF_INT__
bar ((float *)p, 1, 3);
if (((float *)p)[1] != 1.)
__builtin_abort ();
#endif
return 0;
}

View File

@ -0,0 +1,19 @@
/* { dg-do compile } */
/* { dg-options "-O -fstrict-aliasing -fdump-tree-lim2-details" } */
unsigned p;
void foo (float *q)
{
for (int i = 0; i < 256; ++i)
{
if (p)
{
unsigned a = p;
*(q++) = 1.;
p = a + 1;
}
}
}
/* { dg-final { scan-tree-dump-times "Executing store motion" 1 "lim2" } } */

View File

@ -1911,10 +1911,13 @@ first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
}
}
if (lsm_flag) <--
MEM = lsm; <--
MEM = lsm; <-- (X)
In case MEM and TMP_VAR are NULL the function will return the then
block so the caller can insert (X) and other related stmts.
*/
static void
static basic_block
execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
edge preheader, hash_set <basic_block> *flag_bbs,
edge &append_cond_position, edge &last_cond_fallthru)
@ -2009,10 +2012,13 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
NULL_TREE, NULL_TREE);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
gsi = gsi_start_bb (then_bb);
/* Insert actual store. */
stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
if (mem)
{
gsi = gsi_start_bb (then_bb);
stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
edge e1 = single_succ_edge (new_bb);
edge e2 = make_edge (new_bb, then_bb,
@ -2060,6 +2066,8 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
update_stmt (phi);
}
}
return then_bb;
}
/* When REF is set on the location, set flag indicating the store. */
@ -2117,7 +2125,8 @@ struct sm_aux
static void
execute_sm (class loop *loop, im_mem_ref *ref,
hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt)
hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt,
bool use_other_flag_var)
{
gassign *load;
struct fmt_data fmt_data;
@ -2146,7 +2155,7 @@ execute_sm (class loop *loop, im_mem_ref *ref,
|| (! flag_store_data_races && ! always_stored)))
multi_threaded_model_p = true;
if (multi_threaded_model_p)
if (multi_threaded_model_p && !use_other_flag_var)
aux->store_flag
= execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs);
else
@ -2182,7 +2191,7 @@ execute_sm (class loop *loop, im_mem_ref *ref,
lim_data->tgt_loop = loop;
gsi_insert_before (&gsi, load, GSI_SAME_STMT);
if (multi_threaded_model_p)
if (aux->store_flag)
{
load = gimple_build_assign (aux->store_flag, boolean_false_node);
lim_data = init_lim_data (load);
@ -2555,6 +2564,140 @@ hoist_memory_references (class loop *loop, bitmap mem_refs,
unsigned i;
bitmap_iterator bi;
/* There's a special case we can use ordered re-materialization for
conditionally excuted stores which is when all stores in the loop
happen in the same basic-block. In that case we know we'll reach
all stores and thus can simply process that BB and emit a single
conditional block of ordered materializations. See PR102436. */
basic_block single_store_bb = NULL;
EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num],
0, i, bi)
{
bool fail = false;
ref = memory_accesses.refs_list[i];
for (auto loc : ref->accesses_in_loop)
if (!gimple_vdef (loc.stmt))
;
else if (!single_store_bb)
{
single_store_bb = gimple_bb (loc.stmt);
bool conditional = false;
for (edge e : exits)
if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb))
{
/* Conditional as seen from e. */
conditional = true;
break;
}
if (!conditional)
{
fail = true;
break;
}
}
else if (single_store_bb != gimple_bb (loc.stmt))
{
fail = true;
break;
}
if (fail)
{
single_store_bb = NULL;
break;
}
}
if (single_store_bb)
{
/* Analyze the single block with stores. */
auto_bitmap fully_visited;
auto_bitmap refs_not_supported;
auto_bitmap refs_not_in_seq;
auto_vec<seq_entry> seq;
bitmap_copy (refs_not_in_seq, mem_refs);
int res = sm_seq_valid_bb (loop, single_store_bb, NULL_TREE,
seq, refs_not_in_seq, refs_not_supported,
false, fully_visited);
if (res != 1)
{
/* Unhandled refs can still fail this. */
bitmap_clear (mem_refs);
return;
}
/* We cannot handle sm_other since we neither remember the
stored location nor the value at the point we execute them. */
for (unsigned i = 0; i < seq.length (); ++i)
{
unsigned new_i;
if (seq[i].second == sm_other
&& seq[i].from != NULL_TREE)
seq[i].from = NULL_TREE;
else if ((seq[i].second == sm_ord
|| (seq[i].second == sm_other
&& seq[i].from != NULL_TREE))
&& !sm_seq_push_down (seq, i, &new_i))
{
bitmap_set_bit (refs_not_supported, seq[new_i].first);
seq[new_i].second = sm_other;
seq[new_i].from = NULL_TREE;
}
}
bitmap_and_compl_into (mem_refs, refs_not_supported);
if (bitmap_empty_p (mem_refs))
return;
/* Prune seq. */
while (seq.last ().second == sm_other
&& seq.last ().from == NULL_TREE)
seq.pop ();
hash_map<im_mem_ref *, sm_aux *> aux_map;
/* Execute SM but delay the store materialization for ordered
sequences on exit. */
bool first_p = true;
EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
{
ref = memory_accesses.refs_list[i];
execute_sm (loop, ref, aux_map, true, !first_p);
first_p = false;
}
/* Get at the single flag variable we eventually produced. */
im_mem_ref *ref
= memory_accesses.refs_list[bitmap_first_set_bit (mem_refs)];
sm_aux *aux = *aux_map.get (ref);
/* Materialize ordered store sequences on exits. */
edge e;
FOR_EACH_VEC_ELT (exits, i, e)
{
edge append_cond_position = NULL;
edge last_cond_fallthru = NULL;
edge insert_e = e;
/* Construct the single flag variable control flow and insert
the ordered seq of stores in the then block. With
-fstore-data-races we can do the stores unconditionally. */
if (aux->store_flag)
insert_e
= single_pred_edge
(execute_sm_if_changed (e, NULL_TREE, NULL_TREE,
aux->store_flag,
loop_preheader_edge (loop),
&aux->flag_bbs, append_cond_position,
last_cond_fallthru));
execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord,
append_cond_position, last_cond_fallthru);
gsi_commit_one_edge_insert (insert_e, NULL);
}
for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
iter != aux_map.end (); ++iter)
delete (*iter).second;
return;
}
/* To address PR57359 before actually applying store-motion check
the candidates found for validity with regards to reordering
relative to other stores which we until here disambiguated using
@ -2693,7 +2836,8 @@ hoist_memory_references (class loop *loop, bitmap mem_refs,
EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
{
ref = memory_accesses.refs_list[i];
execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i));
execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i),
false);
}
/* Materialize ordered store sequences on exits. */