tree-optimization/102880 - improve CD-DCE

The PR shows a missed control-dependent DCE caused by CFG cleanup
merging a forwarder resulting in a partially degenerate PHI node.
With control-dependent DCE we need to mark control dependences
of incoming edges into PHIs as necessary but that is unnecessarily
conservative for the case when two edges have the same value.
There is no easy way to mark only a subset of control dependences
of both edges necessary so the fix is to produce forwarder blocks
where then the control dependence captures the requirements more
precisely.

For gcc.dg/tree-ssa/ssa-dom-thread-7.c the number of edges in the
CFG decrease as we have commonized PHI arguments which in turn
results in different threadings.  The testcase is too complex
and the dump scanning too simple to do anything meaningful here
but to adjust the number of expected threads.

The same CFG massaging could be useful at RTL expansion time to
reduce the number of copies we need to insert on edges.

FAIL: gcc.dg/tree-ssa/ssa-hoist-4.c scan-tree-dump-times optimized "MAX_EXPR" 1

2021-11-12  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/102880
	* tree-ssa-dce.c (sort_phi_args): New function.
	(make_forwarders_with_degenerate_phis): Likewise.
	(perform_tree_ssa_dce): Call
	make_forwarders_with_degenerate_phis.

	* gcc.dg/tree-ssa/pr102880.c: New testcase.
	* gcc.dg/tree-ssa/pr69270-3.c: Robustify.
	* gcc.dg/tree-ssa/ssa-dom-thread-7.c: Change the number of
	expected threadings.
This commit is contained in:
Richard Biener 2021-11-12 10:21:22 +01:00
parent f98f373dd8
commit 0452064503
4 changed files with 196 additions and 6 deletions

View File

@ -0,0 +1,27 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-optimized" } */
void foo(void);
static int b, c, d, e, f, ah;
static short g, ai, am, aq, as;
static char an, at, av, ax, ay;
static char a(char h, char i) { return i == 0 || h && i == 1 ? 0 : h % i; }
static void ae(int h) {
if (a(b, h))
foo();
}
int main() {
ae(1);
ay = a(0, ay);
ax = a(g, aq);
at = a(0, as);
av = a(c, 1);
an = a(am, f);
int al = e || ((a(1, ah) && b) & d) == 2;
ai = al;
}
/* We should eliminate the call to foo. */
/* { dg-final { scan-tree-dump-not "foo" "optimized" } } */

View File

@ -3,7 +3,7 @@
/* We're looking for a constant argument a PHI node. There /* We're looking for a constant argument a PHI node. There
should only be one if we unpropagate correctly. */ should only be one if we unpropagate correctly. */
/* { dg-final { scan-tree-dump-times ", 1" 1 "uncprop1"} } */ /* { dg-final { scan-tree-dump-times "<1\|, 1" 1 "uncprop1"} } */
typedef long unsigned int size_t; typedef long unsigned int size_t;
typedef union gimple_statement_d *gimple; typedef union gimple_statement_d *gimple;

View File

@ -11,7 +11,7 @@
to change decisions in switch expansion which in turn can expose new to change decisions in switch expansion which in turn can expose new
jump threading opportunities. Skip the later tests on aarch64. */ jump threading opportunities. Skip the later tests on aarch64. */
/* { dg-final { scan-tree-dump-not "Jumps threaded" "dom3" { target { ! aarch64*-*-* } } } } */ /* { dg-final { scan-tree-dump-not "Jumps threaded" "dom3" { target { ! aarch64*-*-* } } } } */
/* { dg-final { scan-tree-dump "Jumps threaded: 11" "thread2" { target { ! aarch64*-*-* } } } } */ /* { dg-final { scan-tree-dump "Jumps threaded: 7" "thread2" { target { ! aarch64*-*-* } } } } */
/* { dg-final { scan-tree-dump "Jumps threaded: 18" "thread2" { target { aarch64*-*-* } } } } */ /* { dg-final { scan-tree-dump "Jumps threaded: 18" "thread2" { target { aarch64*-*-* } } } } */
enum STATE { enum STATE {

View File

@ -67,6 +67,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-scalar-evolution.h" #include "tree-scalar-evolution.h"
#include "tree-ssa-propagate.h" #include "tree-ssa-propagate.h"
#include "gimple-fold.h" #include "gimple-fold.h"
#include "tree-ssa.h"
static struct stmt_stats static struct stmt_stats
{ {
@ -1612,6 +1613,164 @@ tree_dce_done (bool aggressive)
worklist.release (); worklist.release ();
} }
/* Sort PHI argument values for make_forwarders_with_degenerate_phis. */
static int
sort_phi_args (const void *a_, const void *b_)
{
auto *a = (const std::pair<edge, hashval_t> *) a_;
auto *b = (const std::pair<edge, hashval_t> *) b_;
hashval_t ha = a->second;
hashval_t hb = b->second;
if (ha < hb)
return -1;
else if (ha > hb)
return 1;
else
return 0;
}
/* Look for a non-virtual PHIs and make a forwarder block when all PHIs
have the same argument on a set of edges. This is to not consider
control dependences of individual edges for same values but only for
the common set. */
static unsigned
make_forwarders_with_degenerate_phis (function *fn)
{
unsigned todo = 0;
basic_block bb;
FOR_EACH_BB_FN (bb, fn)
{
/* Only PHIs with three or more arguments have opportunities. */
if (EDGE_COUNT (bb->preds) < 3)
continue;
/* Do not touch loop headers. */
if (bb->loop_father->header == bb)
continue;
/* Take one PHI node as template to look for identical
arguments. Build a vector of candidates forming sets
of argument edges with equal values. Note optimality
depends on the particular choice of the template PHI
since equal arguments are unordered leaving other PHIs
with more than one set of equal arguments within this
argument range unsorted. We'd have to break ties by
looking at other PHI nodes. */
gphi_iterator gsi = gsi_start_nonvirtual_phis (bb);
if (gsi_end_p (gsi))
continue;
gphi *phi = gsi.phi ();
auto_vec<std::pair<edge, hashval_t>, 8> args;
for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
{
edge e = gimple_phi_arg_edge (phi, i);
/* Skip abnormal edges since we cannot redirect them. */
if (e->flags & EDGE_ABNORMAL)
continue;
/* Skip loop exit edges when we are in loop-closed SSA form
since the forwarder we'd create does not have a PHI node. */
if (loops_state_satisfies_p (LOOP_CLOSED_SSA)
&& loop_exit_edge_p (e->src->loop_father, e))
continue;
args.safe_push (std::make_pair (e, iterative_hash_expr
(gimple_phi_arg_def (phi, i), 0)));
}
if (args.length () < 2)
continue;
args.qsort (sort_phi_args);
/* From the candidates vector now verify true candidates for
forwarders and create them. */
gphi *vphi = get_virtual_phi (bb);
unsigned start = 0;
while (start < args.length () - 1)
{
unsigned i;
for (i = start + 1; i < args.length (); ++i)
if (!operand_equal_p (PHI_ARG_DEF_FROM_EDGE (phi, args[start].first),
PHI_ARG_DEF_FROM_EDGE (phi, args[i].first)))
break;
/* args[start]..args[i-1] are equal. */
if (start != i - 1)
{
/* Check all PHI nodes for argument equality. */
bool equal = true;
gphi_iterator gsi2 = gsi;
gsi_next (&gsi2);
for (; !gsi_end_p (gsi2); gsi_next (&gsi2))
{
gphi *phi2 = gsi2.phi ();
if (virtual_operand_p (gimple_phi_result (phi2)))
continue;
tree start_arg
= PHI_ARG_DEF_FROM_EDGE (phi2, args[start].first);
for (unsigned j = start + 1; j < i; ++j)
{
if (!operand_equal_p (start_arg,
PHI_ARG_DEF_FROM_EDGE
(phi2, args[j].first)))
{
/* Another PHI might have a shorter set of
equivalent args. Go for that. */
i = j;
if (j == start + 1)
equal = false;
break;
}
}
if (!equal)
break;
}
if (equal)
{
/* If we are asked to forward all edges the block
has all degenerate PHIs. Do nothing in that case. */
if (start == 0
&& i == args.length ()
&& args.length () == gimple_phi_num_args (phi))
break;
/* Instead of using make_forwarder_block we are
rolling our own variant knowing that the forwarder
does not need PHI nodes apart from eventually
a virtual one. */
auto_vec<tree, 8> vphi_args;
if (vphi)
{
vphi_args.reserve_exact (i - start);
for (unsigned j = start; j < i; ++j)
vphi_args.quick_push
(PHI_ARG_DEF_FROM_EDGE (vphi, args[j].first));
}
free_dominance_info (fn, CDI_DOMINATORS);
basic_block forwarder = split_edge (args[start].first);
for (unsigned j = start + 1; j < i; ++j)
{
edge e = args[j].first;
redirect_edge_and_branch_force (e, forwarder);
redirect_edge_var_map_clear (e);
}
if (vphi)
{
tree def = copy_ssa_name (vphi_args[0]);
gphi *vphi_copy = create_phi_node (def, forwarder);
for (unsigned j = start; j < i; ++j)
add_phi_arg (vphi_copy, vphi_args[j - start],
args[j].first, UNKNOWN_LOCATION);
SET_PHI_ARG_DEF
(vphi, single_succ_edge (forwarder)->dest_idx, def);
}
todo |= TODO_cleanup_cfg;
}
}
/* Continue searching for more opportunities. */
start = i;
}
}
return todo;
}
/* Main routine to eliminate dead code. /* Main routine to eliminate dead code.
AGGRESSIVE controls the aggressiveness of the algorithm. AGGRESSIVE controls the aggressiveness of the algorithm.
@ -1630,8 +1789,7 @@ static unsigned int
perform_tree_ssa_dce (bool aggressive) perform_tree_ssa_dce (bool aggressive)
{ {
bool something_changed = 0; bool something_changed = 0;
unsigned todo = 0;
calculate_dominance_info (CDI_DOMINATORS);
/* Preheaders are needed for SCEV to work. /* Preheaders are needed for SCEV to work.
Simple lateches and recorded exits improve chances that loop will Simple lateches and recorded exits improve chances that loop will
@ -1644,6 +1802,11 @@ perform_tree_ssa_dce (bool aggressive)
| LOOPS_HAVE_RECORDED_EXITS); | LOOPS_HAVE_RECORDED_EXITS);
} }
if (aggressive)
todo |= make_forwarders_with_degenerate_phis (cfun);
calculate_dominance_info (CDI_DOMINATORS);
tree_dce_init (aggressive); tree_dce_init (aggressive);
if (aggressive) if (aggressive)
@ -1701,9 +1864,9 @@ perform_tree_ssa_dce (bool aggressive)
free_numbers_of_iterations_estimates (cfun); free_numbers_of_iterations_estimates (cfun);
if (in_loop_pipeline) if (in_loop_pipeline)
scev_reset (); scev_reset ();
return TODO_update_ssa | TODO_cleanup_cfg; todo |= TODO_update_ssa | TODO_cleanup_cfg;
} }
return 0; return todo;
} }
/* Pass entry points. */ /* Pass entry points. */