backport: re PR tree-optimization/82726 (ICE in verify_ssa during GIMPLE pass: pcom)
Backport from mainline 2017-11-15 Bin Cheng <bin.cheng@arm.com> PR tree-optimization/82726 PR tree-optimization/70754 * tree-predcom.c (order_drefs_by_pos): New function. (combine_chains): Move code setting has_max_use_after to... (try_combine_chains): ...here. New parameter. Sort combined chains according to position information. (tree_predictive_commoning_loop): Update call to above function. (update_pos_for_combined_chains, pcom_stmt_dominates_stmt_p): New. 2017-11-15 Bin Cheng <bin.cheng@arm.com> PR tree-optimization/82726 Revert 2017-01-23 Bin Cheng <bin.cheng@arm.com> PR tree-optimization/70754 * tree-predcom.c (stmt_combining_refs): New parameter INSERT_BEFORE. (reassociate_to_the_same_stmt): New parameter INSERT_BEFORE. Insert combined stmt before it if not NULL. (combine_chains): Process refs reversely and compute dominance point for root ref. Revert 2017-02-23 Bin Cheng <bin.cheng@arm.com> PR tree-optimization/79663 * tree-predcom.c (combine_chains): Process refs in reverse order only for ZERO length chains, and add explaining comment. gcc/testsuite Backport from mainline 2017-11-15 Bin Cheng <bin.cheng@arm.com> PR tree-optimization/82726 * gcc.dg/tree-ssa/pr82726.c: New test. From-SVN: r255828
This commit is contained in:
parent
b1c275e2c5
commit
3c6ef044d3
|
@ -1,3 +1,37 @@
|
|||
2017-12-19 Bin Cheng <bin.cheng@arm.com>
|
||||
|
||||
Backport from mainline
|
||||
2017-11-15 Bin Cheng <bin.cheng@arm.com>
|
||||
|
||||
PR tree-optimization/82726
|
||||
PR tree-optimization/70754
|
||||
* tree-predcom.c (order_drefs_by_pos): New function.
|
||||
(combine_chains): Move code setting has_max_use_after to...
|
||||
(try_combine_chains): ...here. New parameter. Sort combined chains
|
||||
according to position information.
|
||||
(tree_predictive_commoning_loop): Update call to above function.
|
||||
(update_pos_for_combined_chains, pcom_stmt_dominates_stmt_p): New.
|
||||
|
||||
2017-11-15 Bin Cheng <bin.cheng@arm.com>
|
||||
|
||||
PR tree-optimization/82726
|
||||
Revert
|
||||
2017-01-23 Bin Cheng <bin.cheng@arm.com>
|
||||
|
||||
PR tree-optimization/70754
|
||||
* tree-predcom.c (stmt_combining_refs): New parameter INSERT_BEFORE.
|
||||
(reassociate_to_the_same_stmt): New parameter INSERT_BEFORE. Insert
|
||||
combined stmt before it if not NULL.
|
||||
(combine_chains): Process refs reversely and compute dominance point
|
||||
for root ref.
|
||||
|
||||
Revert
|
||||
2017-02-23 Bin Cheng <bin.cheng@arm.com>
|
||||
|
||||
PR tree-optimization/79663
|
||||
* tree-predcom.c (combine_chains): Process refs in reverse order
|
||||
only for ZERO length chains, and add explaining comment.
|
||||
|
||||
2017-12-19 Sebastian Huber <sebastian.huber@embedded-brains.de>
|
||||
|
||||
Backport from mainline
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
2017-12-19 Bin Cheng <bin.cheng@arm.com>
|
||||
|
||||
Backport from mainline
|
||||
2017-11-15 Bin Cheng <bin.cheng@arm.com>
|
||||
|
||||
PR tree-optimization/82726
|
||||
* gcc.dg/tree-ssa/pr82726.c: New test.
|
||||
|
||||
2017-12-19 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
Backported from mainline
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 --param tree-reassoc-width=4" } */
|
||||
/* { dg-additional-options "-mavx2" { target { x86_64-*-* i?86-*-* } } } */
|
||||
|
||||
#define N 40
|
||||
#define M 128
|
||||
unsigned int in[N+M];
|
||||
unsigned short out[N];
|
||||
|
||||
/* Outer-loop vectorization. */
|
||||
|
||||
void
|
||||
foo (){
|
||||
int i,j;
|
||||
unsigned int diff;
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
diff = 0;
|
||||
for (j = 0; j < M; j+=8) {
|
||||
diff += in[j+i];
|
||||
}
|
||||
out[i]=(unsigned short)diff;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
|
@ -943,6 +943,17 @@ order_drefs (const void *a, const void *b)
|
|||
return (*da)->pos - (*db)->pos;
|
||||
}
|
||||
|
||||
/* Compares two drefs A and B by their position. Callback for qsort. */
|
||||
|
||||
static int
|
||||
order_drefs_by_pos (const void *a, const void *b)
|
||||
{
|
||||
const dref *const da = (const dref *) a;
|
||||
const dref *const db = (const dref *) b;
|
||||
|
||||
return (*da)->pos - (*db)->pos;
|
||||
}
|
||||
|
||||
/* Returns root of the CHAIN. */
|
||||
|
||||
static inline dref
|
||||
|
@ -2164,11 +2175,10 @@ remove_name_from_operation (gimple *stmt, tree op)
|
|||
}
|
||||
|
||||
/* Reassociates the expression in that NAME1 and NAME2 are used so that they
|
||||
are combined in a single statement, and returns this statement. Note the
|
||||
statement is inserted before INSERT_BEFORE if it's not NULL. */
|
||||
are combined in a single statement, and returns this statement. */
|
||||
|
||||
static gimple *
|
||||
reassociate_to_the_same_stmt (tree name1, tree name2, gimple *insert_before)
|
||||
reassociate_to_the_same_stmt (tree name1, tree name2)
|
||||
{
|
||||
gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
|
||||
gassign *new_stmt, *tmp_stmt;
|
||||
|
@ -2225,12 +2235,6 @@ reassociate_to_the_same_stmt (tree name1, tree name2, gimple *insert_before)
|
|||
var = create_tmp_reg (type, "predreastmp");
|
||||
new_name = make_ssa_name (var);
|
||||
new_stmt = gimple_build_assign (new_name, code, name1, name2);
|
||||
if (insert_before && stmt_dominates_stmt_p (insert_before, s1))
|
||||
bsi = gsi_for_stmt (insert_before);
|
||||
else
|
||||
bsi = gsi_for_stmt (s1);
|
||||
|
||||
gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
|
||||
|
||||
var = create_tmp_reg (type, "predreastmp");
|
||||
tmp_name = make_ssa_name (var);
|
||||
|
@ -2247,6 +2251,7 @@ reassociate_to_the_same_stmt (tree name1, tree name2, gimple *insert_before)
|
|||
s1 = gsi_stmt (bsi);
|
||||
update_stmt (s1);
|
||||
|
||||
gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
|
||||
gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
|
||||
|
||||
return new_stmt;
|
||||
|
@ -2255,11 +2260,10 @@ reassociate_to_the_same_stmt (tree name1, tree name2, gimple *insert_before)
|
|||
/* Returns the statement that combines references R1 and R2. In case R1
|
||||
and R2 are not used in the same statement, but they are used with an
|
||||
associative and commutative operation in the same expression, reassociate
|
||||
the expression so that they are used in the same statement. The combined
|
||||
statement is inserted before INSERT_BEFORE if it's not NULL. */
|
||||
the expression so that they are used in the same statement. */
|
||||
|
||||
static gimple *
|
||||
stmt_combining_refs (dref r1, dref r2, gimple *insert_before)
|
||||
stmt_combining_refs (dref r1, dref r2)
|
||||
{
|
||||
gimple *stmt1, *stmt2;
|
||||
tree name1 = name_for_ref (r1);
|
||||
|
@ -2270,7 +2274,7 @@ stmt_combining_refs (dref r1, dref r2, gimple *insert_before)
|
|||
if (stmt1 == stmt2)
|
||||
return stmt1;
|
||||
|
||||
return reassociate_to_the_same_stmt (name1, name2, insert_before);
|
||||
return reassociate_to_the_same_stmt (name1, name2);
|
||||
}
|
||||
|
||||
/* Tries to combine chains CH1 and CH2 together. If this succeeds, the
|
||||
|
@ -2283,8 +2287,7 @@ combine_chains (chain_p ch1, chain_p ch2)
|
|||
enum tree_code op = ERROR_MARK;
|
||||
bool swap = false;
|
||||
chain_p new_chain;
|
||||
int i, j, num;
|
||||
gimple *root_stmt;
|
||||
unsigned i;
|
||||
tree rslt_type = NULL_TREE;
|
||||
|
||||
if (ch1 == ch2)
|
||||
|
@ -2305,9 +2308,6 @@ combine_chains (chain_p ch1, chain_p ch2)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
ch1->combined = true;
|
||||
ch2->combined = true;
|
||||
|
||||
if (swap)
|
||||
std::swap (ch1, ch2);
|
||||
|
||||
|
@ -2319,69 +2319,65 @@ combine_chains (chain_p ch1, chain_p ch2)
|
|||
new_chain->rslt_type = rslt_type;
|
||||
new_chain->length = ch1->length;
|
||||
|
||||
gimple *insert = NULL;
|
||||
num = ch1->refs.length ();
|
||||
i = (new_chain->length == 0) ? num - 1 : 0;
|
||||
j = (new_chain->length == 0) ? -1 : 1;
|
||||
/* For ZERO length chain, process refs in reverse order so that dominant
|
||||
position is ready when it comes to the root ref.
|
||||
For non-ZERO length chain, process refs in order. See PR79663. */
|
||||
for (; num > 0; num--, i += j)
|
||||
for (i = 0; (ch1->refs.iterate (i, &r1)
|
||||
&& ch2->refs.iterate (i, &r2)); i++)
|
||||
{
|
||||
r1 = ch1->refs[i];
|
||||
r2 = ch2->refs[i];
|
||||
nw = XCNEW (struct dref_d);
|
||||
nw->stmt = stmt_combining_refs (r1, r2);
|
||||
nw->distance = r1->distance;
|
||||
|
||||
/* For ZERO length chain, insert combined stmt of root ref at dominant
|
||||
position. */
|
||||
nw->stmt = stmt_combining_refs (r1, r2, i == 0 ? insert : NULL);
|
||||
/* For ZERO length chain, record dominant position where combined stmt
|
||||
of root ref should be inserted. In this case, though all root refs
|
||||
dominate following ones, it's possible that combined stmt doesn't.
|
||||
See PR70754. */
|
||||
if (new_chain->length == 0
|
||||
&& (insert == NULL || stmt_dominates_stmt_p (nw->stmt, insert)))
|
||||
insert = nw->stmt;
|
||||
|
||||
new_chain->refs.safe_push (nw);
|
||||
}
|
||||
if (new_chain->length == 0)
|
||||
{
|
||||
/* Restore the order for ZERO length chain's refs. */
|
||||
num = new_chain->refs.length () >> 1;
|
||||
for (i = 0, j = new_chain->refs.length () - 1; i < num; i++, j--)
|
||||
std::swap (new_chain->refs[i], new_chain->refs[j]);
|
||||
|
||||
/* For ZERO length chain, has_max_use_after must be true since root
|
||||
combined stmt must dominates others. */
|
||||
new_chain->has_max_use_after = true;
|
||||
return new_chain;
|
||||
}
|
||||
|
||||
new_chain->has_max_use_after = false;
|
||||
root_stmt = get_chain_root (new_chain)->stmt;
|
||||
for (i = 1; new_chain->refs.iterate (i, &nw); i++)
|
||||
{
|
||||
if (nw->distance == new_chain->length
|
||||
&& !stmt_dominates_stmt_p (nw->stmt, root_stmt))
|
||||
{
|
||||
new_chain->has_max_use_after = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ch1->combined = true;
|
||||
ch2->combined = true;
|
||||
return new_chain;
|
||||
}
|
||||
|
||||
/* Try to combine the CHAINS. */
|
||||
/* Recursively update position information of all offspring chains to ROOT
|
||||
chain's position information. */
|
||||
|
||||
static void
|
||||
try_combine_chains (vec<chain_p> *chains)
|
||||
update_pos_for_combined_chains (chain_p root)
|
||||
{
|
||||
chain_p ch1 = root->ch1, ch2 = root->ch2;
|
||||
dref ref, ref1, ref2;
|
||||
for (unsigned j = 0; (root->refs.iterate (j, &ref)
|
||||
&& ch1->refs.iterate (j, &ref1)
|
||||
&& ch2->refs.iterate (j, &ref2)); ++j)
|
||||
ref1->pos = ref2->pos = ref->pos;
|
||||
|
||||
if (ch1->type == CT_COMBINATION)
|
||||
update_pos_for_combined_chains (ch1);
|
||||
if (ch2->type == CT_COMBINATION)
|
||||
update_pos_for_combined_chains (ch2);
|
||||
}
|
||||
|
||||
/* Returns true if statement S1 dominates statement S2. */
|
||||
|
||||
static bool
|
||||
pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
|
||||
{
|
||||
basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
|
||||
|
||||
if (!bb1 || s1 == s2)
|
||||
return true;
|
||||
|
||||
if (bb1 == bb2)
|
||||
return gimple_uid (s1) < gimple_uid (s2);
|
||||
|
||||
return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
|
||||
}
|
||||
|
||||
/* Try to combine the CHAINS in LOOP. */
|
||||
|
||||
static void
|
||||
try_combine_chains (struct loop *loop, vec<chain_p> *chains)
|
||||
{
|
||||
unsigned i, j;
|
||||
chain_p ch1, ch2, cch;
|
||||
auto_vec<chain_p> worklist;
|
||||
bool combined_p = false;
|
||||
|
||||
FOR_EACH_VEC_ELT (*chains, i, ch1)
|
||||
if (chain_can_be_combined_p (ch1))
|
||||
|
@ -2403,6 +2399,78 @@ try_combine_chains (vec<chain_p> *chains)
|
|||
{
|
||||
worklist.safe_push (cch);
|
||||
chains->safe_push (cch);
|
||||
combined_p = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!combined_p)
|
||||
return;
|
||||
|
||||
/* Setup UID for all statements in dominance order. */
|
||||
basic_block *bbs = get_loop_body (loop);
|
||||
renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes);
|
||||
free (bbs);
|
||||
|
||||
/* Re-association in combined chains may generate statements different to
|
||||
order of references of the original chain. We need to keep references
|
||||
of combined chain in dominance order so that all uses will be inserted
|
||||
after definitions. Note:
|
||||
A) This is necessary for all combined chains.
|
||||
B) This is only necessary for ZERO distance references because other
|
||||
references inherit value from loop carried PHIs.
|
||||
|
||||
We first update position information for all combined chains. */
|
||||
dref ref;
|
||||
for (i = 0; chains->iterate (i, &ch1); ++i)
|
||||
{
|
||||
if (ch1->type != CT_COMBINATION || ch1->combined)
|
||||
continue;
|
||||
|
||||
for (j = 0; ch1->refs.iterate (j, &ref); ++j)
|
||||
ref->pos = gimple_uid (ref->stmt);
|
||||
|
||||
update_pos_for_combined_chains (ch1);
|
||||
}
|
||||
/* Then sort references according to newly updated position information. */
|
||||
for (i = 0; chains->iterate (i, &ch1); ++i)
|
||||
{
|
||||
if (ch1->type != CT_COMBINATION && !ch1->combined)
|
||||
continue;
|
||||
|
||||
/* Find the first reference with non-ZERO distance. */
|
||||
if (ch1->length == 0)
|
||||
j = ch1->refs.length();
|
||||
else
|
||||
{
|
||||
for (j = 0; ch1->refs.iterate (j, &ref); ++j)
|
||||
if (ref->distance != 0)
|
||||
break;
|
||||
}
|
||||
|
||||
/* Sort all ZERO distance references by position. */
|
||||
qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
|
||||
|
||||
if (ch1->combined)
|
||||
continue;
|
||||
|
||||
/* For ZERO length chain, has_max_use_after must be true since root
|
||||
combined stmt must dominates others. */
|
||||
if (ch1->length == 0)
|
||||
{
|
||||
ch1->has_max_use_after = true;
|
||||
continue;
|
||||
}
|
||||
/* Check if there is use at max distance after root for combined chains
|
||||
and set flag accordingly. */
|
||||
ch1->has_max_use_after = false;
|
||||
gimple *root_stmt = get_chain_root (ch1)->stmt;
|
||||
for (j = 1; ch1->refs.iterate (j, &ref); ++j)
|
||||
{
|
||||
if (ref->distance == ch1->length
|
||||
&& !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
|
||||
{
|
||||
ch1->has_max_use_after = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -2564,7 +2632,7 @@ tree_predictive_commoning_loop (struct loop *loop)
|
|||
prepare_initializers (loop, chains);
|
||||
|
||||
/* Try to combine the chains that are always worked with together. */
|
||||
try_combine_chains (&chains);
|
||||
try_combine_chains (loop, &chains);
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue