re PR tree-optimization/71437 (Performance regression after r235817)

PR tree-optimization/71437
	* tree-ssa-dom.c (dom_opt_dom_walker): Remove thread_across_edge
	member function.  Implementation moved into after_dom_children
	member function and into the threader's thread_outgoing_edges
	function.
	(dom_opt_dom_walker::after_dom_children): Simplify by moving
	some code into new thread_outgoing_edges.
	* tree-ssa-threadedge.c (thread_across_edge): Make static and simplify
	definition.  Simplify marker handling (do it here).   Assume we always
	have the available expression and the const/copies tables.
	(thread_outgoing_edges): New function extracted from tree-ssa-dom.c
	and tree-vrp.c
	* tree-ssa-threadedge.h (thread_outgoing_edges): Declare.
	* tree-vrp.c (equiv_stack): No longer file scoped.
	(vrp_dom_walker): New class.
	(vrp_dom_walker::before_dom_children): New member function.
	(vrp_dom_walker::after_dom_children): Likewise.
	(identify_jump_threads):  Setup domwalker.  Use it rather than
	walking edges in a random order by hand.  Simplify setup/finalization.
	(finalize_jump_threads): Remove.
	(vrp_finalize): Do not call identify_jump_threads here.
	(execute_vrp): Do it here instead and call thread_through_all_blocks
	here too.

From-SVN: r246208
This commit is contained in:
Jeff Law 2017-03-16 13:21:33 -06:00 committed by Jeff Law
parent 8d7437be47
commit c87550223a
5 changed files with 193 additions and 160 deletions

View File

@ -1,5 +1,29 @@
2017-03-16 Jeff Law <law@redhat.com>
PR tree-optimization/71437
* tree-ssa-dom.c (dom_opt_dom_walker): Remove thread_across_edge
member function. Implementation moved into after_dom_children
member function and into the threader's thread_outgoing_edges
function.
(dom_opt_dom_walker::after_dom_children): Simplify by moving
some code into new thread_outgoing_edges.
* tree-ssa-threadedge.c (thread_across_edge): Make static and simplify
definition. Simplify marker handling (do it here). Assume we always
have the available expression and the const/copies tables.
(thread_outgoing_edges): New function extracted from tree-ssa-dom.c
and tree-vrp.c
* tree-ssa-threadedge.h (thread_outgoing_edges): Declare.
* tree-vrp.c (equiv_stack): No longer file scoped.
(vrp_dom_walker): New class.
(vrp_dom_walker::before_dom_children): New member function.
(vrp_dom_walker::after_dom_children): Likewise.
(identify_jump_threads): Setup domwalker. Use it rather than
walking edges in a random order by hand. Simplify setup/finalization.
(finalize_jump_threads): Remove.
(vrp_finalize): Do not call identify_jump_threads here.
(execute_vrp): Do it here instead and call thread_through_all_blocks
here too.
PR tree-optimization/71437
* tree-ssa-dom.c (pfn_simplify): Add basic_block argument. All
callers changed.

View File

@ -350,7 +350,6 @@ public:
virtual void after_dom_children (basic_block);
private:
void thread_across_edge (edge);
/* Unwindable equivalences, both const/copy and expression varieties. */
class const_and_copies *m_const_and_copies;
@ -816,39 +815,6 @@ record_temporary_equivalences (edge e,
}
}
/* Wrapper for common code to attempt to thread an edge. For example,
it handles lazily building the dummy condition and the bookkeeping
when jump threading is successful. */
void
dom_opt_dom_walker::thread_across_edge (edge e)
{
if (! m_dummy_cond)
m_dummy_cond =
gimple_build_cond (NE_EXPR,
integer_zero_node, integer_zero_node,
NULL, NULL);
/* Push a marker on both stacks so we can unwind the tables back to their
current state. */
m_avail_exprs_stack->push_marker ();
m_const_and_copies->push_marker ();
/* With all the edge equivalences in the tables, go ahead and attempt
to thread through E->dest. */
::thread_across_edge (m_dummy_cond, e,
m_const_and_copies, m_avail_exprs_stack,
simplify_stmt_for_jump_threading);
/* And restore the various tables to their state before
we threaded this edge.
XXX The code in tree-ssa-threadedge.c will restore the state of
the const_and_copies table. We we just have to restore the expression
table. */
m_avail_exprs_stack->pop_to_marker ();
}
/* PHI nodes can create equivalences too.
Ignoring any alternatives which are the same as the result, if
@ -1224,38 +1190,13 @@ dom_opt_dom_walker::before_dom_children (basic_block bb)
void
dom_opt_dom_walker::after_dom_children (basic_block bb)
{
gimple *last;
if (! m_dummy_cond)
m_dummy_cond = gimple_build_cond (NE_EXPR, integer_zero_node,
integer_zero_node, NULL, NULL);
/* If we have an outgoing edge to a block with multiple incoming and
outgoing edges, then we may be able to thread the edge, i.e., we
may be able to statically determine which of the outgoing edges
will be traversed when the incoming edge from BB is traversed. */
if (single_succ_p (bb)
&& (single_succ_edge (bb)->flags & EDGE_ABNORMAL) == 0
&& potentially_threadable_block (single_succ (bb)))
{
thread_across_edge (single_succ_edge (bb));
}
else if ((last = last_stmt (bb))
&& gimple_code (last) == GIMPLE_COND
&& EDGE_COUNT (bb->succs) == 2
&& (EDGE_SUCC (bb, 0)->flags & EDGE_ABNORMAL) == 0
&& (EDGE_SUCC (bb, 1)->flags & EDGE_ABNORMAL) == 0)
{
edge true_edge, false_edge;
extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
/* Only try to thread the edge if it reaches a target block with
more than one predecessor and more than one successor. */
if (potentially_threadable_block (true_edge->dest))
thread_across_edge (true_edge);
/* Similarly for the ELSE arm. */
if (potentially_threadable_block (false_edge->dest))
thread_across_edge (false_edge);
}
thread_outgoing_edges (bb, m_dummy_cond, m_const_and_copies,
m_avail_exprs_stack,
simplify_stmt_for_jump_threading);
/* These remove expressions local to BB from the tables. */
m_avail_exprs_stack->pop_to_marker ();

View File

@ -1100,16 +1100,18 @@ thread_through_normal_block (edge e,
SIMPLIFY is a pass-specific function used to simplify statements. */
void
static void
thread_across_edge (gcond *dummy_cond,
edge e,
class const_and_copies *const_and_copies,
class avail_exprs_stack *avail_exprs_stack,
tree (*simplify) (gimple *, gimple *,
class avail_exprs_stack *, basic_block))
pfn_simplify simplify)
{
bitmap visited = BITMAP_ALLOC (NULL);
const_and_copies->push_marker ();
avail_exprs_stack->push_marker ();
stmt_count = 0;
vec<jump_thread_edge *> *path = new vec<jump_thread_edge *> ();
@ -1132,6 +1134,7 @@ thread_across_edge (gcond *dummy_cond,
propagate_threaded_block_debug_into (path->last ()->e->dest,
e->dest);
const_and_copies->pop_to_marker ();
avail_exprs_stack->pop_to_marker ();
BITMAP_FREE (visited);
register_jump_thread (path);
return;
@ -1156,6 +1159,7 @@ thread_across_edge (gcond *dummy_cond,
{
BITMAP_FREE (visited);
const_and_copies->pop_to_marker ();
avail_exprs_stack->pop_to_marker ();
return;
}
}
@ -1182,6 +1186,7 @@ thread_across_edge (gcond *dummy_cond,
if (taken_edge->flags & EDGE_ABNORMAL)
{
const_and_copies->pop_to_marker ();
avail_exprs_stack->pop_to_marker ();
BITMAP_FREE (visited);
return;
}
@ -1196,8 +1201,7 @@ thread_across_edge (gcond *dummy_cond,
/* Push a fresh marker so we can unwind the equivalences created
for each of E->dest's successors. */
const_and_copies->push_marker ();
if (avail_exprs_stack)
avail_exprs_stack->push_marker ();
avail_exprs_stack->push_marker ();
/* Avoid threading to any block we have already visited. */
bitmap_clear (visited);
@ -1240,12 +1244,71 @@ thread_across_edge (gcond *dummy_cond,
delete_jump_thread_path (path);
/* And unwind the equivalence table. */
if (avail_exprs_stack)
avail_exprs_stack->pop_to_marker ();
avail_exprs_stack->pop_to_marker ();
const_and_copies->pop_to_marker ();
}
BITMAP_FREE (visited);
}
const_and_copies->pop_to_marker ();
avail_exprs_stack->pop_to_marker ();
}
/* Examine the outgoing edges from BB and conditionally
try to thread them.
DUMMY_COND is a shared cond_expr used by condition simplification as scratch,
to avoid allocating memory.
CONST_AND_COPIES is used to undo temporary equivalences created during the
walk of E->dest.
The available expression table is referenced vai AVAIL_EXPRS_STACK.
SIMPLIFY is a pass-specific function used to simplify statements. */
void
thread_outgoing_edges (basic_block bb, gcond *dummy_cond,
class const_and_copies *const_and_copies,
class avail_exprs_stack *avail_exprs_stack,
tree (*simplify) (gimple *, gimple *,
class avail_exprs_stack *,
basic_block))
{
int flags = (EDGE_IGNORE | EDGE_COMPLEX | EDGE_ABNORMAL);
gimple *last;
/* If we have an outgoing edge to a block with multiple incoming and
outgoing edges, then we may be able to thread the edge, i.e., we
may be able to statically determine which of the outgoing edges
will be traversed when the incoming edge from BB is traversed. */
if (single_succ_p (bb)
&& (single_succ_edge (bb)->flags & flags) == 0
&& potentially_threadable_block (single_succ (bb)))
{
thread_across_edge (dummy_cond, single_succ_edge (bb),
const_and_copies, avail_exprs_stack,
simplify);
}
else if ((last = last_stmt (bb))
&& gimple_code (last) == GIMPLE_COND
&& EDGE_COUNT (bb->succs) == 2
&& (EDGE_SUCC (bb, 0)->flags & flags) == 0
&& (EDGE_SUCC (bb, 1)->flags & flags) == 0)
{
edge true_edge, false_edge;
extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
/* Only try to thread the edge if it reaches a target block with
more than one predecessor and more than one successor. */
if (potentially_threadable_block (true_edge->dest))
thread_across_edge (dummy_cond, true_edge,
const_and_copies, avail_exprs_stack, simplify);
/* Similarly for the ELSE arm. */
if (potentially_threadable_block (false_edge->dest))
thread_across_edge (dummy_cond, false_edge,
const_and_copies, avail_exprs_stack, simplify);
}
}

View File

@ -30,10 +30,10 @@ extern void threadedge_initialize_values (void);
extern void threadedge_finalize_values (void);
extern bool potentially_threadable_block (basic_block);
extern void propagate_threaded_block_debug_into (basic_block, basic_block);
extern void thread_across_edge (gcond *, edge,
const_and_copies *,
avail_exprs_stack *,
tree (*) (gimple *, gimple *,
avail_exprs_stack *, basic_block));
extern void thread_outgoing_edges (basic_block, gcond *,
const_and_copies *,
avail_exprs_stack *,
tree (*) (gimple *, gimple *,
avail_exprs_stack *, basic_block));
#endif /* GCC_TREE_SSA_THREADEDGE_H */

View File

@ -10746,9 +10746,6 @@ vrp_fold_stmt (gimple_stmt_iterator *si)
return simplify_stmt_using_ranges (si);
}
/* Unwindable const/copy equivalences. */
const_and_copies *equiv_stack;
/* Return the LHS of any ASSERT_EXPR where OP appears as the first
argument to the ASSERT_EXPR and in which the ASSERT_EXPR dominates
BB. If no such ASSERT_EXPR is found, return OP. */
@ -10879,6 +10876,74 @@ simplify_stmt_for_jump_threading (gimple *stmt, gimple *within_stmt,
return NULL_TREE;
}
class vrp_dom_walker : public dom_walker
{
public:
vrp_dom_walker (cdi_direction direction,
class const_and_copies *const_and_copies,
class avail_exprs_stack *avail_exprs_stack)
: dom_walker (direction, true),
m_const_and_copies (const_and_copies),
m_avail_exprs_stack (avail_exprs_stack),
m_dummy_cond (NULL) {}
virtual edge before_dom_children (basic_block);
virtual void after_dom_children (basic_block);
private:
class const_and_copies *m_const_and_copies;
class avail_exprs_stack *m_avail_exprs_stack;
gcond *m_dummy_cond;
};
/* Called before processing dominator children of BB. We want to look
at ASSERT_EXPRs and record information from them in the appropriate
tables.
We could look at other statements here. It's not seen as likely
to significantly increase the jump threads we discover. */
edge
vrp_dom_walker::before_dom_children (basic_block bb)
{
gimple_stmt_iterator gsi;
for (gsi = gsi_start_nondebug_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple *stmt = gsi_stmt (gsi);
if (gimple_assign_single_p (stmt)
&& TREE_CODE (gimple_assign_rhs1 (stmt)) == ASSERT_EXPR)
{
tree lhs = gimple_assign_lhs (stmt);
tree rhs1 = gimple_assign_rhs1 (stmt);
m_const_and_copies->record_const_or_copy (lhs,
TREE_OPERAND (rhs1, 0));
continue;
}
break;
}
return NULL;
}
/* Called after processing dominator children of BB. This is where we
actually call into the threader. */
void
vrp_dom_walker::after_dom_children (basic_block bb)
{
if (!m_dummy_cond)
m_dummy_cond = gimple_build_cond (NE_EXPR,
integer_zero_node, integer_zero_node,
NULL, NULL);
thread_outgoing_edges (bb, m_dummy_cond, m_const_and_copies,
m_avail_exprs_stack,
simplify_stmt_for_jump_threading);
m_avail_exprs_stack->pop_to_marker ();
m_const_and_copies->pop_to_marker ();
}
/* Blocks which have more than one predecessor and more than
one successor present jump threading opportunities, i.e.,
when the block is reached from a specific predecessor, we
@ -10902,8 +10967,6 @@ simplify_stmt_for_jump_threading (gimple *stmt, gimple *within_stmt,
static void
identify_jump_threads (void)
{
basic_block bb;
gcond *dummy;
int i;
edge e;
@ -10926,69 +10989,15 @@ identify_jump_threads (void)
/* Allocate our unwinder stack to unwind any temporary equivalences
that might be recorded. */
equiv_stack = new const_and_copies ();
const_and_copies *equiv_stack = new const_and_copies ();
/* To avoid lots of silly node creation, we create a single
conditional and just modify it in-place when attempting to
thread jumps. */
dummy = gimple_build_cond (EQ_EXPR,
integer_zero_node, integer_zero_node,
NULL, NULL);
hash_table<expr_elt_hasher> *avail_exprs
= new hash_table<expr_elt_hasher> (1024);
avail_exprs_stack *avail_exprs_stack
= new class avail_exprs_stack (avail_exprs);
/* Walk through all the blocks finding those which present a
potential jump threading opportunity. We could set this up
as a dominator walker and record data during the walk, but
I doubt it's worth the effort for the classes of jump
threading opportunities we are trying to identify at this
point in compilation. */
FOR_EACH_BB_FN (bb, cfun)
{
gimple *last;
/* If the generic jump threading code does not find this block
interesting, then there is nothing to do. */
if (! potentially_threadable_block (bb))
continue;
last = last_stmt (bb);
/* We're basically looking for a switch or any kind of conditional with
integral or pointer type arguments. Note the type of the second
argument will be the same as the first argument, so no need to
check it explicitly.
We also handle the case where there are no statements in the
block. This come up with forwarder blocks that are not
optimized away because they lead to a loop header. But we do
want to thread through them as we can sometimes thread to the
loop exit which is obviously profitable. */
if (!last
|| gimple_code (last) == GIMPLE_SWITCH
|| (gimple_code (last) == GIMPLE_COND
&& TREE_CODE (gimple_cond_lhs (last)) == SSA_NAME
&& (INTEGRAL_TYPE_P (TREE_TYPE (gimple_cond_lhs (last)))
|| POINTER_TYPE_P (TREE_TYPE (gimple_cond_lhs (last))))
&& (TREE_CODE (gimple_cond_rhs (last)) == SSA_NAME
|| is_gimple_min_invariant (gimple_cond_rhs (last)))))
{
edge_iterator ei;
/* We've got a block with multiple predecessors and multiple
successors which also ends in a suitable conditional or
switch statement. For each predecessor, see if we can thread
it to a specific successor. */
FOR_EACH_EDGE (e, ei, bb->preds)
{
/* Do not thread across edges marked to ignoreor abnormal
edges in the CFG. */
if (e->flags & (EDGE_IGNORE | EDGE_COMPLEX))
continue;
thread_across_edge (dummy, e, equiv_stack, NULL,
simplify_stmt_for_jump_threading);
}
}
}
vrp_dom_walker walker (CDI_DOMINATORS, equiv_stack, avail_exprs_stack);
walker.walk (cfun->cfg->x_entry_block_ptr);
/* Clear EDGE_IGNORE. */
FOR_EACH_VEC_ELT (to_remove_edges, i, e)
@ -10997,19 +11006,8 @@ identify_jump_threads (void)
/* We do not actually update the CFG or SSA graphs at this point as
ASSERT_EXPRs are still in the IL and cfg cleanup code does not yet
handle ASSERT_EXPRs gracefully. */
}
/* We identified all the jump threading opportunities earlier, but could
not transform the CFG at that time. This routine transforms the
CFG and arranges for the dominator tree to be rebuilt if necessary.
Note the SSA graph update will occur during the normal TODO
processing by the pass manager. */
static void
finalize_jump_threads (void)
{
thread_through_all_blocks (false);
delete equiv_stack;
delete avail_exprs_stack;
}
/* Free VRP lattice. */
@ -11075,10 +11073,6 @@ vrp_finalize (bool warn_array_bounds_p)
if (warn_array_bounds && warn_array_bounds_p)
check_all_array_refs ();
/* We must identify jump threading opportunities before we release
the datastructures built by VRP. */
identify_jump_threads ();
}
/* evrp_dom_walker visits the basic blocks in the dominance order and set
@ -11670,6 +11664,11 @@ execute_vrp (bool warn_array_bounds_p)
vrp_initialize ();
ssa_propagate (vrp_visit_stmt, vrp_visit_phi_node);
vrp_finalize (warn_array_bounds_p);
/* We must identify jump threading opportunities before we release
the datastructures built by VRP. */
identify_jump_threads ();
vrp_free_lattice ();
free_numbers_of_iterations_estimates (cfun);
@ -11686,7 +11685,13 @@ execute_vrp (bool warn_array_bounds_p)
duplication and CFG manipulation. */
update_ssa (TODO_update_ssa);
finalize_jump_threads ();
/* We identified all the jump threading opportunities earlier, but could
not transform the CFG at that time. This routine transforms the
CFG and arranges for the dominator tree to be rebuilt if necessary.
Note the SSA graph update will occur during the normal TODO
processing by the pass manager. */
thread_through_all_blocks (false);
/* Remove dead edges from SWITCH_EXPR optimization. This leaves the
CFG in a broken state and requires a cfg_cleanup run. */