From c87550223ac148a5c50b7b3c785ef1f1f5ffd3ac Mon Sep 17 00:00:00 2001 From: Jeff Law Date: Thu, 16 Mar 2017 13:21:33 -0600 Subject: [PATCH] re PR tree-optimization/71437 (Performance regression after r235817) PR tree-optimization/71437 * tree-ssa-dom.c (dom_opt_dom_walker): Remove thread_across_edge member function. Implementation moved into after_dom_children member function and into the threader's thread_outgoing_edges function. (dom_opt_dom_walker::after_dom_children): Simplify by moving some code into new thread_outgoing_edges. * tree-ssa-threadedge.c (thread_across_edge): Make static and simplify definition. Simplify marker handling (do it here). Assume we always have the available expression and the const/copies tables. (thread_outgoing_edges): New function extracted from tree-ssa-dom.c and tree-vrp.c * tree-ssa-threadedge.h (thread_outgoing_edges): Declare. * tree-vrp.c (equiv_stack): No longer file scoped. (vrp_dom_walker): New class. (vrp_dom_walker::before_dom_children): New member function. (vrp_dom_walker::after_dom_children): Likewise. (identify_jump_threads): Setup domwalker. Use it rather than walking edges in a random order by hand. Simplify setup/finalization. (finalize_jump_threads): Remove. (vrp_finalize): Do not call identify_jump_threads here. (execute_vrp): Do it here instead and call thread_through_all_blocks here too. From-SVN: r246208 --- gcc/ChangeLog | 24 ++++++ gcc/tree-ssa-dom.c | 71 ++-------------- gcc/tree-ssa-threadedge.c | 77 +++++++++++++++-- gcc/tree-ssa-threadedge.h | 10 +-- gcc/tree-vrp.c | 171 ++++++++++++++++++++------------------ 5 files changed, 193 insertions(+), 160 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c8e5bdc7826..96e1592daae 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,29 @@ 2017-03-16 Jeff Law + PR tree-optimization/71437 + * tree-ssa-dom.c (dom_opt_dom_walker): Remove thread_across_edge + member function. Implementation moved into after_dom_children + member function and into the threader's thread_outgoing_edges + function. + (dom_opt_dom_walker::after_dom_children): Simplify by moving + some code into new thread_outgoing_edges. + * tree-ssa-threadedge.c (thread_across_edge): Make static and simplify + definition. Simplify marker handling (do it here). Assume we always + have the available expression and the const/copies tables. + (thread_outgoing_edges): New function extracted from tree-ssa-dom.c + and tree-vrp.c + * tree-ssa-threadedge.h (thread_outgoing_edges): Declare. + * tree-vrp.c (equiv_stack): No longer file scoped. + (vrp_dom_walker): New class. + (vrp_dom_walker::before_dom_children): New member function. + (vrp_dom_walker::after_dom_children): Likewise. + (identify_jump_threads): Setup domwalker. Use it rather than + walking edges in a random order by hand. Simplify setup/finalization. + (finalize_jump_threads): Remove. + (vrp_finalize): Do not call identify_jump_threads here. + (execute_vrp): Do it here instead and call thread_through_all_blocks + here too. + PR tree-optimization/71437 * tree-ssa-dom.c (pfn_simplify): Add basic_block argument. All callers changed. diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c index 32468e94777..c6ffc38e1a8 100644 --- a/gcc/tree-ssa-dom.c +++ b/gcc/tree-ssa-dom.c @@ -350,7 +350,6 @@ public: virtual void after_dom_children (basic_block); private: - void thread_across_edge (edge); /* Unwindable equivalences, both const/copy and expression varieties. */ class const_and_copies *m_const_and_copies; @@ -816,39 +815,6 @@ record_temporary_equivalences (edge e, } } -/* Wrapper for common code to attempt to thread an edge. For example, - it handles lazily building the dummy condition and the bookkeeping - when jump threading is successful. */ - -void -dom_opt_dom_walker::thread_across_edge (edge e) -{ - if (! m_dummy_cond) - m_dummy_cond = - gimple_build_cond (NE_EXPR, - integer_zero_node, integer_zero_node, - NULL, NULL); - - /* Push a marker on both stacks so we can unwind the tables back to their - current state. */ - m_avail_exprs_stack->push_marker (); - m_const_and_copies->push_marker (); - - /* With all the edge equivalences in the tables, go ahead and attempt - to thread through E->dest. */ - ::thread_across_edge (m_dummy_cond, e, - m_const_and_copies, m_avail_exprs_stack, - simplify_stmt_for_jump_threading); - - /* And restore the various tables to their state before - we threaded this edge. - - XXX The code in tree-ssa-threadedge.c will restore the state of - the const_and_copies table. We we just have to restore the expression - table. */ - m_avail_exprs_stack->pop_to_marker (); -} - /* PHI nodes can create equivalences too. Ignoring any alternatives which are the same as the result, if @@ -1224,38 +1190,13 @@ dom_opt_dom_walker::before_dom_children (basic_block bb) void dom_opt_dom_walker::after_dom_children (basic_block bb) { - gimple *last; + if (! m_dummy_cond) + m_dummy_cond = gimple_build_cond (NE_EXPR, integer_zero_node, + integer_zero_node, NULL, NULL); - /* If we have an outgoing edge to a block with multiple incoming and - outgoing edges, then we may be able to thread the edge, i.e., we - may be able to statically determine which of the outgoing edges - will be traversed when the incoming edge from BB is traversed. */ - if (single_succ_p (bb) - && (single_succ_edge (bb)->flags & EDGE_ABNORMAL) == 0 - && potentially_threadable_block (single_succ (bb))) - { - thread_across_edge (single_succ_edge (bb)); - } - else if ((last = last_stmt (bb)) - && gimple_code (last) == GIMPLE_COND - && EDGE_COUNT (bb->succs) == 2 - && (EDGE_SUCC (bb, 0)->flags & EDGE_ABNORMAL) == 0 - && (EDGE_SUCC (bb, 1)->flags & EDGE_ABNORMAL) == 0) - { - edge true_edge, false_edge; - - extract_true_false_edges_from_block (bb, &true_edge, &false_edge); - - /* Only try to thread the edge if it reaches a target block with - more than one predecessor and more than one successor. */ - if (potentially_threadable_block (true_edge->dest)) - thread_across_edge (true_edge); - - /* Similarly for the ELSE arm. */ - if (potentially_threadable_block (false_edge->dest)) - thread_across_edge (false_edge); - - } + thread_outgoing_edges (bb, m_dummy_cond, m_const_and_copies, + m_avail_exprs_stack, + simplify_stmt_for_jump_threading); /* These remove expressions local to BB from the tables. */ m_avail_exprs_stack->pop_to_marker (); diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c index 7f70b40fd0c..536c4717b72 100644 --- a/gcc/tree-ssa-threadedge.c +++ b/gcc/tree-ssa-threadedge.c @@ -1100,16 +1100,18 @@ thread_through_normal_block (edge e, SIMPLIFY is a pass-specific function used to simplify statements. */ -void +static void thread_across_edge (gcond *dummy_cond, edge e, class const_and_copies *const_and_copies, class avail_exprs_stack *avail_exprs_stack, - tree (*simplify) (gimple *, gimple *, - class avail_exprs_stack *, basic_block)) + pfn_simplify simplify) { bitmap visited = BITMAP_ALLOC (NULL); + const_and_copies->push_marker (); + avail_exprs_stack->push_marker (); + stmt_count = 0; vec *path = new vec (); @@ -1132,6 +1134,7 @@ thread_across_edge (gcond *dummy_cond, propagate_threaded_block_debug_into (path->last ()->e->dest, e->dest); const_and_copies->pop_to_marker (); + avail_exprs_stack->pop_to_marker (); BITMAP_FREE (visited); register_jump_thread (path); return; @@ -1156,6 +1159,7 @@ thread_across_edge (gcond *dummy_cond, { BITMAP_FREE (visited); const_and_copies->pop_to_marker (); + avail_exprs_stack->pop_to_marker (); return; } } @@ -1182,6 +1186,7 @@ thread_across_edge (gcond *dummy_cond, if (taken_edge->flags & EDGE_ABNORMAL) { const_and_copies->pop_to_marker (); + avail_exprs_stack->pop_to_marker (); BITMAP_FREE (visited); return; } @@ -1196,8 +1201,7 @@ thread_across_edge (gcond *dummy_cond, /* Push a fresh marker so we can unwind the equivalences created for each of E->dest's successors. */ const_and_copies->push_marker (); - if (avail_exprs_stack) - avail_exprs_stack->push_marker (); + avail_exprs_stack->push_marker (); /* Avoid threading to any block we have already visited. */ bitmap_clear (visited); @@ -1240,12 +1244,71 @@ thread_across_edge (gcond *dummy_cond, delete_jump_thread_path (path); /* And unwind the equivalence table. */ - if (avail_exprs_stack) - avail_exprs_stack->pop_to_marker (); + avail_exprs_stack->pop_to_marker (); const_and_copies->pop_to_marker (); } BITMAP_FREE (visited); } const_and_copies->pop_to_marker (); + avail_exprs_stack->pop_to_marker (); +} + +/* Examine the outgoing edges from BB and conditionally + try to thread them. + + DUMMY_COND is a shared cond_expr used by condition simplification as scratch, + to avoid allocating memory. + + CONST_AND_COPIES is used to undo temporary equivalences created during the + walk of E->dest. + + The available expression table is referenced vai AVAIL_EXPRS_STACK. + + SIMPLIFY is a pass-specific function used to simplify statements. */ + +void +thread_outgoing_edges (basic_block bb, gcond *dummy_cond, + class const_and_copies *const_and_copies, + class avail_exprs_stack *avail_exprs_stack, + tree (*simplify) (gimple *, gimple *, + class avail_exprs_stack *, + basic_block)) +{ + int flags = (EDGE_IGNORE | EDGE_COMPLEX | EDGE_ABNORMAL); + gimple *last; + + /* If we have an outgoing edge to a block with multiple incoming and + outgoing edges, then we may be able to thread the edge, i.e., we + may be able to statically determine which of the outgoing edges + will be traversed when the incoming edge from BB is traversed. */ + if (single_succ_p (bb) + && (single_succ_edge (bb)->flags & flags) == 0 + && potentially_threadable_block (single_succ (bb))) + { + thread_across_edge (dummy_cond, single_succ_edge (bb), + const_and_copies, avail_exprs_stack, + simplify); + } + else if ((last = last_stmt (bb)) + && gimple_code (last) == GIMPLE_COND + && EDGE_COUNT (bb->succs) == 2 + && (EDGE_SUCC (bb, 0)->flags & flags) == 0 + && (EDGE_SUCC (bb, 1)->flags & flags) == 0) + { + edge true_edge, false_edge; + + extract_true_false_edges_from_block (bb, &true_edge, &false_edge); + + /* Only try to thread the edge if it reaches a target block with + more than one predecessor and more than one successor. */ + if (potentially_threadable_block (true_edge->dest)) + thread_across_edge (dummy_cond, true_edge, + const_and_copies, avail_exprs_stack, simplify); + + /* Similarly for the ELSE arm. */ + if (potentially_threadable_block (false_edge->dest)) + thread_across_edge (dummy_cond, false_edge, + const_and_copies, avail_exprs_stack, simplify); + } } diff --git a/gcc/tree-ssa-threadedge.h b/gcc/tree-ssa-threadedge.h index 3516f149485..49dfa9c94d4 100644 --- a/gcc/tree-ssa-threadedge.h +++ b/gcc/tree-ssa-threadedge.h @@ -30,10 +30,10 @@ extern void threadedge_initialize_values (void); extern void threadedge_finalize_values (void); extern bool potentially_threadable_block (basic_block); extern void propagate_threaded_block_debug_into (basic_block, basic_block); -extern void thread_across_edge (gcond *, edge, - const_and_copies *, - avail_exprs_stack *, - tree (*) (gimple *, gimple *, - avail_exprs_stack *, basic_block)); +extern void thread_outgoing_edges (basic_block, gcond *, + const_and_copies *, + avail_exprs_stack *, + tree (*) (gimple *, gimple *, + avail_exprs_stack *, basic_block)); #endif /* GCC_TREE_SSA_THREADEDGE_H */ diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c index 82ef74261e4..4a09a57d727 100644 --- a/gcc/tree-vrp.c +++ b/gcc/tree-vrp.c @@ -10746,9 +10746,6 @@ vrp_fold_stmt (gimple_stmt_iterator *si) return simplify_stmt_using_ranges (si); } -/* Unwindable const/copy equivalences. */ -const_and_copies *equiv_stack; - /* Return the LHS of any ASSERT_EXPR where OP appears as the first argument to the ASSERT_EXPR and in which the ASSERT_EXPR dominates BB. If no such ASSERT_EXPR is found, return OP. */ @@ -10879,6 +10876,74 @@ simplify_stmt_for_jump_threading (gimple *stmt, gimple *within_stmt, return NULL_TREE; } +class vrp_dom_walker : public dom_walker +{ +public: + vrp_dom_walker (cdi_direction direction, + class const_and_copies *const_and_copies, + class avail_exprs_stack *avail_exprs_stack) + : dom_walker (direction, true), + m_const_and_copies (const_and_copies), + m_avail_exprs_stack (avail_exprs_stack), + m_dummy_cond (NULL) {} + + virtual edge before_dom_children (basic_block); + virtual void after_dom_children (basic_block); + +private: + class const_and_copies *m_const_and_copies; + class avail_exprs_stack *m_avail_exprs_stack; + + gcond *m_dummy_cond; +}; + +/* Called before processing dominator children of BB. We want to look + at ASSERT_EXPRs and record information from them in the appropriate + tables. + + We could look at other statements here. It's not seen as likely + to significantly increase the jump threads we discover. */ + +edge +vrp_dom_walker::before_dom_children (basic_block bb) +{ + gimple_stmt_iterator gsi; + + for (gsi = gsi_start_nondebug_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (gimple_assign_single_p (stmt) + && TREE_CODE (gimple_assign_rhs1 (stmt)) == ASSERT_EXPR) + { + tree lhs = gimple_assign_lhs (stmt); + tree rhs1 = gimple_assign_rhs1 (stmt); + m_const_and_copies->record_const_or_copy (lhs, + TREE_OPERAND (rhs1, 0)); + continue; + } + break; + } + return NULL; +} + +/* Called after processing dominator children of BB. This is where we + actually call into the threader. */ +void +vrp_dom_walker::after_dom_children (basic_block bb) +{ + if (!m_dummy_cond) + m_dummy_cond = gimple_build_cond (NE_EXPR, + integer_zero_node, integer_zero_node, + NULL, NULL); + + thread_outgoing_edges (bb, m_dummy_cond, m_const_and_copies, + m_avail_exprs_stack, + simplify_stmt_for_jump_threading); + + m_avail_exprs_stack->pop_to_marker (); + m_const_and_copies->pop_to_marker (); +} + /* Blocks which have more than one predecessor and more than one successor present jump threading opportunities, i.e., when the block is reached from a specific predecessor, we @@ -10902,8 +10967,6 @@ simplify_stmt_for_jump_threading (gimple *stmt, gimple *within_stmt, static void identify_jump_threads (void) { - basic_block bb; - gcond *dummy; int i; edge e; @@ -10926,69 +10989,15 @@ identify_jump_threads (void) /* Allocate our unwinder stack to unwind any temporary equivalences that might be recorded. */ - equiv_stack = new const_and_copies (); + const_and_copies *equiv_stack = new const_and_copies (); - /* To avoid lots of silly node creation, we create a single - conditional and just modify it in-place when attempting to - thread jumps. */ - dummy = gimple_build_cond (EQ_EXPR, - integer_zero_node, integer_zero_node, - NULL, NULL); + hash_table *avail_exprs + = new hash_table (1024); + avail_exprs_stack *avail_exprs_stack + = new class avail_exprs_stack (avail_exprs); - /* Walk through all the blocks finding those which present a - potential jump threading opportunity. We could set this up - as a dominator walker and record data during the walk, but - I doubt it's worth the effort for the classes of jump - threading opportunities we are trying to identify at this - point in compilation. */ - FOR_EACH_BB_FN (bb, cfun) - { - gimple *last; - - /* If the generic jump threading code does not find this block - interesting, then there is nothing to do. */ - if (! potentially_threadable_block (bb)) - continue; - - last = last_stmt (bb); - - /* We're basically looking for a switch or any kind of conditional with - integral or pointer type arguments. Note the type of the second - argument will be the same as the first argument, so no need to - check it explicitly. - - We also handle the case where there are no statements in the - block. This come up with forwarder blocks that are not - optimized away because they lead to a loop header. But we do - want to thread through them as we can sometimes thread to the - loop exit which is obviously profitable. */ - if (!last - || gimple_code (last) == GIMPLE_SWITCH - || (gimple_code (last) == GIMPLE_COND - && TREE_CODE (gimple_cond_lhs (last)) == SSA_NAME - && (INTEGRAL_TYPE_P (TREE_TYPE (gimple_cond_lhs (last))) - || POINTER_TYPE_P (TREE_TYPE (gimple_cond_lhs (last)))) - && (TREE_CODE (gimple_cond_rhs (last)) == SSA_NAME - || is_gimple_min_invariant (gimple_cond_rhs (last))))) - { - edge_iterator ei; - - /* We've got a block with multiple predecessors and multiple - successors which also ends in a suitable conditional or - switch statement. For each predecessor, see if we can thread - it to a specific successor. */ - FOR_EACH_EDGE (e, ei, bb->preds) - { - /* Do not thread across edges marked to ignoreor abnormal - edges in the CFG. */ - if (e->flags & (EDGE_IGNORE | EDGE_COMPLEX)) - continue; - - thread_across_edge (dummy, e, equiv_stack, NULL, - simplify_stmt_for_jump_threading); - } - } - } + vrp_dom_walker walker (CDI_DOMINATORS, equiv_stack, avail_exprs_stack); + walker.walk (cfun->cfg->x_entry_block_ptr); /* Clear EDGE_IGNORE. */ FOR_EACH_VEC_ELT (to_remove_edges, i, e) @@ -10997,19 +11006,8 @@ identify_jump_threads (void) /* We do not actually update the CFG or SSA graphs at this point as ASSERT_EXPRs are still in the IL and cfg cleanup code does not yet handle ASSERT_EXPRs gracefully. */ -} - -/* We identified all the jump threading opportunities earlier, but could - not transform the CFG at that time. This routine transforms the - CFG and arranges for the dominator tree to be rebuilt if necessary. - - Note the SSA graph update will occur during the normal TODO - processing by the pass manager. */ -static void -finalize_jump_threads (void) -{ - thread_through_all_blocks (false); delete equiv_stack; + delete avail_exprs_stack; } /* Free VRP lattice. */ @@ -11075,10 +11073,6 @@ vrp_finalize (bool warn_array_bounds_p) if (warn_array_bounds && warn_array_bounds_p) check_all_array_refs (); - - /* We must identify jump threading opportunities before we release - the datastructures built by VRP. */ - identify_jump_threads (); } /* evrp_dom_walker visits the basic blocks in the dominance order and set @@ -11670,6 +11664,11 @@ execute_vrp (bool warn_array_bounds_p) vrp_initialize (); ssa_propagate (vrp_visit_stmt, vrp_visit_phi_node); vrp_finalize (warn_array_bounds_p); + + /* We must identify jump threading opportunities before we release + the datastructures built by VRP. */ + identify_jump_threads (); + vrp_free_lattice (); free_numbers_of_iterations_estimates (cfun); @@ -11686,7 +11685,13 @@ execute_vrp (bool warn_array_bounds_p) duplication and CFG manipulation. */ update_ssa (TODO_update_ssa); - finalize_jump_threads (); + /* We identified all the jump threading opportunities earlier, but could + not transform the CFG at that time. This routine transforms the + CFG and arranges for the dominator tree to be rebuilt if necessary. + + Note the SSA graph update will occur during the normal TODO + processing by the pass manager. */ + thread_through_all_blocks (false); /* Remove dead edges from SWITCH_EXPR optimization. This leaves the CFG in a broken state and requires a cfg_cleanup run. */