tree-optimization/102436 - restore loop store motion
This restores a case of conditional store motion we fail to handle after the rewrite. We can recognize the special case of all stores in a loop happening in a single conditionally executed block which ensures stores are not re-ordered by executing them in different loop iterations. Separating out the case avoids complicating the already complex main path. 2021-11-18 Richard Biener <rguenther@suse.de> PR tree-optimization/102436 * tree-ssa-loop-im.c (execute_sm_if_changed): Add mode to just create the if structure and return the then block. (execute_sm): Add flag to indicate the var will re-use another flag var. (hoist_memory_references): Support a single conditional block with all stores as special case. * gcc.dg/torture/20211118-1.c: New testcase. * gcc.dg/tree-ssa/ssa-lim-18.c: Likewise.
This commit is contained in:
parent
09d462146b
commit
0fc859f5ef
27
gcc/testsuite/gcc.dg/torture/20211118-1.c
Normal file
27
gcc/testsuite/gcc.dg/torture/20211118-1.c
Normal file
@ -0,0 +1,27 @@
|
||||
/* { dg-do run } */
|
||||
|
||||
unsigned p[3];
|
||||
void __attribute__((noipa))
|
||||
bar (float *q, int n, int m)
|
||||
{
|
||||
for (int i = 0; i < m; ++i)
|
||||
{
|
||||
if (i == n)
|
||||
{
|
||||
unsigned a = p[1];
|
||||
p[1] = a + 1;
|
||||
*q = 1.;
|
||||
}
|
||||
q++;
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
#if __SIZEOF_FLOAT__ == __SIZEOF_INT__
|
||||
bar ((float *)p, 1, 3);
|
||||
if (((float *)p)[1] != 1.)
|
||||
__builtin_abort ();
|
||||
#endif
|
||||
return 0;
|
||||
}
|
19
gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-18.c
Normal file
19
gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-18.c
Normal file
@ -0,0 +1,19 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O -fstrict-aliasing -fdump-tree-lim2-details" } */
|
||||
|
||||
unsigned p;
|
||||
|
||||
void foo (float *q)
|
||||
{
|
||||
for (int i = 0; i < 256; ++i)
|
||||
{
|
||||
if (p)
|
||||
{
|
||||
unsigned a = p;
|
||||
*(q++) = 1.;
|
||||
p = a + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Executing store motion" 1 "lim2" } } */
|
@ -1911,10 +1911,13 @@ first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
|
||||
}
|
||||
}
|
||||
if (lsm_flag) <--
|
||||
MEM = lsm; <--
|
||||
MEM = lsm; <-- (X)
|
||||
|
||||
In case MEM and TMP_VAR are NULL the function will return the then
|
||||
block so the caller can insert (X) and other related stmts.
|
||||
*/
|
||||
|
||||
static void
|
||||
static basic_block
|
||||
execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
|
||||
edge preheader, hash_set <basic_block> *flag_bbs,
|
||||
edge &append_cond_position, edge &last_cond_fallthru)
|
||||
@ -2009,10 +2012,13 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
|
||||
NULL_TREE, NULL_TREE);
|
||||
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
|
||||
|
||||
gsi = gsi_start_bb (then_bb);
|
||||
/* Insert actual store. */
|
||||
stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
|
||||
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
|
||||
if (mem)
|
||||
{
|
||||
gsi = gsi_start_bb (then_bb);
|
||||
stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
|
||||
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
|
||||
}
|
||||
|
||||
edge e1 = single_succ_edge (new_bb);
|
||||
edge e2 = make_edge (new_bb, then_bb,
|
||||
@ -2060,6 +2066,8 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
|
||||
update_stmt (phi);
|
||||
}
|
||||
}
|
||||
|
||||
return then_bb;
|
||||
}
|
||||
|
||||
/* When REF is set on the location, set flag indicating the store. */
|
||||
@ -2117,7 +2125,8 @@ struct sm_aux
|
||||
|
||||
static void
|
||||
execute_sm (class loop *loop, im_mem_ref *ref,
|
||||
hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt)
|
||||
hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt,
|
||||
bool use_other_flag_var)
|
||||
{
|
||||
gassign *load;
|
||||
struct fmt_data fmt_data;
|
||||
@ -2146,7 +2155,7 @@ execute_sm (class loop *loop, im_mem_ref *ref,
|
||||
|| (! flag_store_data_races && ! always_stored)))
|
||||
multi_threaded_model_p = true;
|
||||
|
||||
if (multi_threaded_model_p)
|
||||
if (multi_threaded_model_p && !use_other_flag_var)
|
||||
aux->store_flag
|
||||
= execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs);
|
||||
else
|
||||
@ -2182,7 +2191,7 @@ execute_sm (class loop *loop, im_mem_ref *ref,
|
||||
lim_data->tgt_loop = loop;
|
||||
gsi_insert_before (&gsi, load, GSI_SAME_STMT);
|
||||
|
||||
if (multi_threaded_model_p)
|
||||
if (aux->store_flag)
|
||||
{
|
||||
load = gimple_build_assign (aux->store_flag, boolean_false_node);
|
||||
lim_data = init_lim_data (load);
|
||||
@ -2555,6 +2564,140 @@ hoist_memory_references (class loop *loop, bitmap mem_refs,
|
||||
unsigned i;
|
||||
bitmap_iterator bi;
|
||||
|
||||
/* There's a special case we can use ordered re-materialization for
|
||||
conditionally excuted stores which is when all stores in the loop
|
||||
happen in the same basic-block. In that case we know we'll reach
|
||||
all stores and thus can simply process that BB and emit a single
|
||||
conditional block of ordered materializations. See PR102436. */
|
||||
basic_block single_store_bb = NULL;
|
||||
EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num],
|
||||
0, i, bi)
|
||||
{
|
||||
bool fail = false;
|
||||
ref = memory_accesses.refs_list[i];
|
||||
for (auto loc : ref->accesses_in_loop)
|
||||
if (!gimple_vdef (loc.stmt))
|
||||
;
|
||||
else if (!single_store_bb)
|
||||
{
|
||||
single_store_bb = gimple_bb (loc.stmt);
|
||||
bool conditional = false;
|
||||
for (edge e : exits)
|
||||
if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb))
|
||||
{
|
||||
/* Conditional as seen from e. */
|
||||
conditional = true;
|
||||
break;
|
||||
}
|
||||
if (!conditional)
|
||||
{
|
||||
fail = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (single_store_bb != gimple_bb (loc.stmt))
|
||||
{
|
||||
fail = true;
|
||||
break;
|
||||
}
|
||||
if (fail)
|
||||
{
|
||||
single_store_bb = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (single_store_bb)
|
||||
{
|
||||
/* Analyze the single block with stores. */
|
||||
auto_bitmap fully_visited;
|
||||
auto_bitmap refs_not_supported;
|
||||
auto_bitmap refs_not_in_seq;
|
||||
auto_vec<seq_entry> seq;
|
||||
bitmap_copy (refs_not_in_seq, mem_refs);
|
||||
int res = sm_seq_valid_bb (loop, single_store_bb, NULL_TREE,
|
||||
seq, refs_not_in_seq, refs_not_supported,
|
||||
false, fully_visited);
|
||||
if (res != 1)
|
||||
{
|
||||
/* Unhandled refs can still fail this. */
|
||||
bitmap_clear (mem_refs);
|
||||
return;
|
||||
}
|
||||
|
||||
/* We cannot handle sm_other since we neither remember the
|
||||
stored location nor the value at the point we execute them. */
|
||||
for (unsigned i = 0; i < seq.length (); ++i)
|
||||
{
|
||||
unsigned new_i;
|
||||
if (seq[i].second == sm_other
|
||||
&& seq[i].from != NULL_TREE)
|
||||
seq[i].from = NULL_TREE;
|
||||
else if ((seq[i].second == sm_ord
|
||||
|| (seq[i].second == sm_other
|
||||
&& seq[i].from != NULL_TREE))
|
||||
&& !sm_seq_push_down (seq, i, &new_i))
|
||||
{
|
||||
bitmap_set_bit (refs_not_supported, seq[new_i].first);
|
||||
seq[new_i].second = sm_other;
|
||||
seq[new_i].from = NULL_TREE;
|
||||
}
|
||||
}
|
||||
bitmap_and_compl_into (mem_refs, refs_not_supported);
|
||||
if (bitmap_empty_p (mem_refs))
|
||||
return;
|
||||
|
||||
/* Prune seq. */
|
||||
while (seq.last ().second == sm_other
|
||||
&& seq.last ().from == NULL_TREE)
|
||||
seq.pop ();
|
||||
|
||||
hash_map<im_mem_ref *, sm_aux *> aux_map;
|
||||
|
||||
/* Execute SM but delay the store materialization for ordered
|
||||
sequences on exit. */
|
||||
bool first_p = true;
|
||||
EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
|
||||
{
|
||||
ref = memory_accesses.refs_list[i];
|
||||
execute_sm (loop, ref, aux_map, true, !first_p);
|
||||
first_p = false;
|
||||
}
|
||||
|
||||
/* Get at the single flag variable we eventually produced. */
|
||||
im_mem_ref *ref
|
||||
= memory_accesses.refs_list[bitmap_first_set_bit (mem_refs)];
|
||||
sm_aux *aux = *aux_map.get (ref);
|
||||
|
||||
/* Materialize ordered store sequences on exits. */
|
||||
edge e;
|
||||
FOR_EACH_VEC_ELT (exits, i, e)
|
||||
{
|
||||
edge append_cond_position = NULL;
|
||||
edge last_cond_fallthru = NULL;
|
||||
edge insert_e = e;
|
||||
/* Construct the single flag variable control flow and insert
|
||||
the ordered seq of stores in the then block. With
|
||||
-fstore-data-races we can do the stores unconditionally. */
|
||||
if (aux->store_flag)
|
||||
insert_e
|
||||
= single_pred_edge
|
||||
(execute_sm_if_changed (e, NULL_TREE, NULL_TREE,
|
||||
aux->store_flag,
|
||||
loop_preheader_edge (loop),
|
||||
&aux->flag_bbs, append_cond_position,
|
||||
last_cond_fallthru));
|
||||
execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord,
|
||||
append_cond_position, last_cond_fallthru);
|
||||
gsi_commit_one_edge_insert (insert_e, NULL);
|
||||
}
|
||||
|
||||
for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
|
||||
iter != aux_map.end (); ++iter)
|
||||
delete (*iter).second;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* To address PR57359 before actually applying store-motion check
|
||||
the candidates found for validity with regards to reordering
|
||||
relative to other stores which we until here disambiguated using
|
||||
@ -2693,7 +2836,8 @@ hoist_memory_references (class loop *loop, bitmap mem_refs,
|
||||
EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
|
||||
{
|
||||
ref = memory_accesses.refs_list[i];
|
||||
execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i));
|
||||
execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i),
|
||||
false);
|
||||
}
|
||||
|
||||
/* Materialize ordered store sequences on exits. */
|
||||
|
Loading…
x
Reference in New Issue
Block a user