re PR tree-optimization/71437 (Performance regression after r235817)

PR tree-optimization/71437 * tree-ssa-dom.c (dom_opt_dom_walker): Remove thread_across_edge member function. Implementation moved into after_dom_children member function and into the threader's thread_outgoing_edges function. (dom_opt_dom_walker::after_dom_children): Simplify by moving some code into new thread_outgoing_edges. * tree-ssa-threadedge.c (thread_across_edge): Make static and simplify definition. Simplify marker handling (do it here). Assume we always have the available expression and the const/copies tables. (thread_outgoing_edges): New function extracted from tree-ssa-dom.c and tree-vrp.c * tree-ssa-threadedge.h (thread_outgoing_edges): Declare. * tree-vrp.c (equiv_stack): No longer file scoped. (vrp_dom_walker): New class. (vrp_dom_walker::before_dom_children): New member function. (vrp_dom_walker::after_dom_children): Likewise. (identify_jump_threads): Setup domwalker. Use it rather than walking edges in a random order by hand. Simplify setup/finalization. (finalize_jump_threads): Remove. (vrp_finalize): Do not call identify_jump_threads here. (execute_vrp): Do it here instead and call thread_through_all_blocks here too. From-SVN: r246208
2017-03-16 13:21:33 -06:00 · 2017-03-16 13:21:33 -06:00 · c87550223a
commit c87550223a
parent 8d7437be47
5 changed files with 193 additions and 160 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,5 +1,29 @@
 2017-03-16  Jeff Law  <law@redhat.com>

+	PR tree-optimization/71437
+	* tree-ssa-dom.c (dom_opt_dom_walker): Remove thread_across_edge
+	member function.  Implementation moved into after_dom_children
+	member function and into the threader's thread_outgoing_edges
+	function.
+	(dom_opt_dom_walker::after_dom_children): Simplify by moving
+	some code into new thread_outgoing_edges.
+	* tree-ssa-threadedge.c (thread_across_edge): Make static and simplify
+	definition.  Simplify marker handling (do it here).   Assume we always
+	have the available expression and the const/copies tables.
+	(thread_outgoing_edges): New function extracted from tree-ssa-dom.c
+	and tree-vrp.c
+	* tree-ssa-threadedge.h (thread_outgoing_edges): Declare.
+	* tree-vrp.c (equiv_stack): No longer file scoped.
+	(vrp_dom_walker): New class.
+	(vrp_dom_walker::before_dom_children): New member function.
+	(vrp_dom_walker::after_dom_children): Likewise.
+	(identify_jump_threads):  Setup domwalker.  Use it rather than
+	walking edges in a random order by hand.  Simplify setup/finalization.
+	(finalize_jump_threads): Remove.
+	(vrp_finalize): Do not call identify_jump_threads here.
+	(execute_vrp): Do it here instead and call thread_through_all_blocks
+	here too.
+	
 	PR tree-optimization/71437
 	* tree-ssa-dom.c (pfn_simplify): Add basic_block argument.  All
 	callers changed.
--- a/gcc/tree-ssa-dom.c
+++ b/gcc/tree-ssa-dom.c
@ -350,7 +350,6 @@ public:
  virtual void after_dom_children (basic_block);

 private:
-  void thread_across_edge (edge);

  /* Unwindable equivalences, both const/copy and expression varieties.  */
  class const_and_copies *m_const_and_copies;
@ -816,39 +815,6 @@ record_temporary_equivalences (edge e,
    }
 }

-/* Wrapper for common code to attempt to thread an edge.  For example,
-   it handles lazily building the dummy condition and the bookkeeping
-   when jump threading is successful.  */
-
-void
-dom_opt_dom_walker::thread_across_edge (edge e)
-{
-  if (! m_dummy_cond)
-    m_dummy_cond =
-        gimple_build_cond (NE_EXPR,
-                           integer_zero_node, integer_zero_node,
-                           NULL, NULL);
-
-  /* Push a marker on both stacks so we can unwind the tables back to their
-     current state.  */
-  m_avail_exprs_stack->push_marker ();
-  m_const_and_copies->push_marker ();
-
-  /* With all the edge equivalences in the tables, go ahead and attempt
-     to thread through E->dest.  */
-  ::thread_across_edge (m_dummy_cond, e,
-		        m_const_and_copies, m_avail_exprs_stack,
-		        simplify_stmt_for_jump_threading);
-
-  /* And restore the various tables to their state before
-     we threaded this edge.
-
-     XXX The code in tree-ssa-threadedge.c will restore the state of
-     the const_and_copies table.  We we just have to restore the expression
-     table.  */
-  m_avail_exprs_stack->pop_to_marker ();
-}
-
 /* PHI nodes can create equivalences too.

   Ignoring any alternatives which are the same as the result, if
@ -1224,38 +1190,13 @@ dom_opt_dom_walker::before_dom_children (basic_block bb)
 void
 dom_opt_dom_walker::after_dom_children (basic_block bb)
 {
-  gimple *last;
+  if (! m_dummy_cond)
+    m_dummy_cond = gimple_build_cond (NE_EXPR, integer_zero_node,
+				      integer_zero_node, NULL, NULL);

-  /* If we have an outgoing edge to a block with multiple incoming and
-     outgoing edges, then we may be able to thread the edge, i.e., we
-     may be able to statically determine which of the outgoing edges
-     will be traversed when the incoming edge from BB is traversed.  */
-  if (single_succ_p (bb)
-      && (single_succ_edge (bb)->flags & EDGE_ABNORMAL) == 0
-      && potentially_threadable_block (single_succ (bb)))
-    {
-      thread_across_edge (single_succ_edge (bb));
-    }
-  else if ((last = last_stmt (bb))
-	   && gimple_code (last) == GIMPLE_COND
-	   && EDGE_COUNT (bb->succs) == 2
-	   && (EDGE_SUCC (bb, 0)->flags & EDGE_ABNORMAL) == 0
-	   && (EDGE_SUCC (bb, 1)->flags & EDGE_ABNORMAL) == 0)
-    {
-      edge true_edge, false_edge;
-
-      extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
-
-      /* Only try to thread the edge if it reaches a target block with
-	 more than one predecessor and more than one successor.  */
-      if (potentially_threadable_block (true_edge->dest))
-	thread_across_edge (true_edge);
-
-      /* Similarly for the ELSE arm.  */
-      if (potentially_threadable_block (false_edge->dest))
-	thread_across_edge (false_edge);
-
-    }
+  thread_outgoing_edges (bb, m_dummy_cond, m_const_and_copies,
+			 m_avail_exprs_stack,
+			 simplify_stmt_for_jump_threading);

  /* These remove expressions local to BB from the tables.  */
  m_avail_exprs_stack->pop_to_marker ();
--- a/gcc/tree-ssa-threadedge.c
+++ b/gcc/tree-ssa-threadedge.c
@ -1100,16 +1100,18 @@ thread_through_normal_block (edge e,

   SIMPLIFY is a pass-specific function used to simplify statements.  */

-void
+static void
 thread_across_edge (gcond *dummy_cond,
 		    edge e,
 		    class const_and_copies *const_and_copies,
 		    class avail_exprs_stack *avail_exprs_stack,
-		    tree (*simplify) (gimple *, gimple *,
-				      class avail_exprs_stack *, basic_block))
+		    pfn_simplify simplify)
 {
  bitmap visited = BITMAP_ALLOC (NULL);

+  const_and_copies->push_marker ();
+  avail_exprs_stack->push_marker ();
+
  stmt_count = 0;

  vec<jump_thread_edge *> *path = new vec<jump_thread_edge *> ();
@ -1132,6 +1134,7 @@ thread_across_edge (gcond *dummy_cond,
      propagate_threaded_block_debug_into (path->last ()->e->dest,
 					   e->dest);
      const_and_copies->pop_to_marker ();
+      avail_exprs_stack->pop_to_marker ();
      BITMAP_FREE (visited);
      register_jump_thread (path);
      return;
@ -1156,6 +1159,7 @@ thread_across_edge (gcond *dummy_cond,
 	{
 	  BITMAP_FREE (visited);
 	  const_and_copies->pop_to_marker ();
+          avail_exprs_stack->pop_to_marker ();
 	  return;
 	}
    }
@ -1182,6 +1186,7 @@ thread_across_edge (gcond *dummy_cond,
      if (taken_edge->flags & EDGE_ABNORMAL)
 	{
 	  const_and_copies->pop_to_marker ();
+          avail_exprs_stack->pop_to_marker ();
 	  BITMAP_FREE (visited);
 	  return;
 	}
@ -1196,8 +1201,7 @@ thread_across_edge (gcond *dummy_cond,
 	/* Push a fresh marker so we can unwind the equivalences created
 	   for each of E->dest's successors.  */
 	const_and_copies->push_marker ();
-	if (avail_exprs_stack)
-	  avail_exprs_stack->push_marker ();
+	avail_exprs_stack->push_marker ();

 	/* Avoid threading to any block we have already visited.  */
 	bitmap_clear (visited);
@ -1240,12 +1244,71 @@ thread_across_edge (gcond *dummy_cond,
 	  delete_jump_thread_path (path);

 	/* And unwind the equivalence table.  */
-	if (avail_exprs_stack)
-	  avail_exprs_stack->pop_to_marker ();
+	avail_exprs_stack->pop_to_marker ();
 	const_and_copies->pop_to_marker ();
      }
    BITMAP_FREE (visited);
  }

  const_and_copies->pop_to_marker ();
+  avail_exprs_stack->pop_to_marker ();
+}
+
+/* Examine the outgoing edges from BB and conditionally
+   try to thread them.
+
+   DUMMY_COND is a shared cond_expr used by condition simplification as scratch,
+   to avoid allocating memory.
+
+   CONST_AND_COPIES is used to undo temporary equivalences created during the
+   walk of E->dest.
+
+   The available expression table is referenced vai AVAIL_EXPRS_STACK.
+
+   SIMPLIFY is a pass-specific function used to simplify statements.  */
+
+void
+thread_outgoing_edges (basic_block bb, gcond *dummy_cond,
+		       class const_and_copies *const_and_copies,
+		       class avail_exprs_stack *avail_exprs_stack,
+		       tree (*simplify) (gimple *, gimple *,
+					 class avail_exprs_stack *,
+					 basic_block))
+{
+  int flags = (EDGE_IGNORE | EDGE_COMPLEX | EDGE_ABNORMAL);
+  gimple *last;
+
+  /* If we have an outgoing edge to a block with multiple incoming and
+     outgoing edges, then we may be able to thread the edge, i.e., we
+     may be able to statically determine which of the outgoing edges
+     will be traversed when the incoming edge from BB is traversed.  */
+  if (single_succ_p (bb)
+      && (single_succ_edge (bb)->flags & flags) == 0
+      && potentially_threadable_block (single_succ (bb)))
+    {
+      thread_across_edge (dummy_cond, single_succ_edge (bb),
+			  const_and_copies, avail_exprs_stack,
+			  simplify);
+    }
+  else if ((last = last_stmt (bb))
+	   && gimple_code (last) == GIMPLE_COND
+	   && EDGE_COUNT (bb->succs) == 2
+	   && (EDGE_SUCC (bb, 0)->flags & flags) == 0
+	   && (EDGE_SUCC (bb, 1)->flags & flags) == 0)
+    {
+      edge true_edge, false_edge;
+
+      extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
+
+      /* Only try to thread the edge if it reaches a target block with
+	 more than one predecessor and more than one successor.  */
+      if (potentially_threadable_block (true_edge->dest))
+	thread_across_edge (dummy_cond, true_edge,
+			    const_and_copies, avail_exprs_stack, simplify);
+
+      /* Similarly for the ELSE arm.  */
+      if (potentially_threadable_block (false_edge->dest))
+	thread_across_edge (dummy_cond, false_edge,
+			    const_and_copies, avail_exprs_stack, simplify);
+    }
 }
--- a/gcc/tree-ssa-threadedge.h
+++ b/gcc/tree-ssa-threadedge.h
@ -30,10 +30,10 @@ extern void threadedge_initialize_values (void);
 extern void threadedge_finalize_values (void);
 extern bool potentially_threadable_block (basic_block);
 extern void propagate_threaded_block_debug_into (basic_block, basic_block);
-extern void thread_across_edge (gcond *, edge,
-				const_and_copies *,
-				avail_exprs_stack *,
-				tree (*) (gimple *, gimple *,
-					  avail_exprs_stack *, basic_block));
+extern void thread_outgoing_edges (basic_block, gcond *,
+				   const_and_copies *,
+				   avail_exprs_stack *,
+				   tree (*) (gimple *, gimple *,
+					     avail_exprs_stack *, basic_block));

 #endif /* GCC_TREE_SSA_THREADEDGE_H */
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@ -10746,9 +10746,6 @@ vrp_fold_stmt (gimple_stmt_iterator *si)
  return simplify_stmt_using_ranges (si);
 }

-/* Unwindable const/copy equivalences.  */
-const_and_copies *equiv_stack;
-
 /* Return the LHS of any ASSERT_EXPR where OP appears as the first
   argument to the ASSERT_EXPR and in which the ASSERT_EXPR dominates
   BB.  If no such ASSERT_EXPR is found, return OP.  */
@ -10879,6 +10876,74 @@ simplify_stmt_for_jump_threading (gimple *stmt, gimple *within_stmt,
  return NULL_TREE;
 }

+class vrp_dom_walker : public dom_walker
+{
+public:
+  vrp_dom_walker (cdi_direction direction,
+		  class const_and_copies *const_and_copies,
+		  class avail_exprs_stack *avail_exprs_stack)
+    : dom_walker (direction, true),
+      m_const_and_copies (const_and_copies),
+      m_avail_exprs_stack (avail_exprs_stack),
+      m_dummy_cond (NULL) {}
+
+  virtual edge before_dom_children (basic_block);
+  virtual void after_dom_children (basic_block);
+
+private:
+  class const_and_copies *m_const_and_copies;
+  class avail_exprs_stack *m_avail_exprs_stack;
+
+  gcond *m_dummy_cond;
+};
+
+/* Called before processing dominator children of BB.  We want to look
+   at ASSERT_EXPRs and record information from them in the appropriate
+   tables.
+
+   We could look at other statements here.  It's not seen as likely
+   to significantly increase the jump threads we discover.  */
+
+edge
+vrp_dom_walker::before_dom_children (basic_block bb)
+{
+  gimple_stmt_iterator gsi;
+
+  for (gsi = gsi_start_nondebug_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple *stmt = gsi_stmt (gsi);
+      if (gimple_assign_single_p (stmt)
+         && TREE_CODE (gimple_assign_rhs1 (stmt)) == ASSERT_EXPR)
+	{
+	  tree lhs = gimple_assign_lhs (stmt);
+	  tree rhs1 = gimple_assign_rhs1 (stmt);
+	  m_const_and_copies->record_const_or_copy (lhs,
+						    TREE_OPERAND (rhs1, 0));
+	  continue;
+	}
+      break;
+    }
+  return NULL;
+}
+
+/* Called after processing dominator children of BB.  This is where we
+   actually call into the threader.  */
+void
+vrp_dom_walker::after_dom_children (basic_block bb)
+{
+  if (!m_dummy_cond)
+    m_dummy_cond = gimple_build_cond (NE_EXPR,
+				      integer_zero_node, integer_zero_node,
+				      NULL, NULL);
+
+  thread_outgoing_edges (bb, m_dummy_cond, m_const_and_copies,
+			 m_avail_exprs_stack,
+			 simplify_stmt_for_jump_threading);
+
+  m_avail_exprs_stack->pop_to_marker ();
+  m_const_and_copies->pop_to_marker ();
+}
+
 /* Blocks which have more than one predecessor and more than
   one successor present jump threading opportunities, i.e.,
   when the block is reached from a specific predecessor, we
@ -10902,8 +10967,6 @@ simplify_stmt_for_jump_threading (gimple *stmt, gimple *within_stmt,
 static void
 identify_jump_threads (void)
 {
-  basic_block bb;
-  gcond *dummy;
  int i;
  edge e;

@ -10926,69 +10989,15 @@ identify_jump_threads (void)

  /* Allocate our unwinder stack to unwind any temporary equivalences
     that might be recorded.  */
-  equiv_stack = new const_and_copies ();
+  const_and_copies *equiv_stack = new const_and_copies ();

-  /* To avoid lots of silly node creation, we create a single
-     conditional and just modify it in-place when attempting to
-     thread jumps.  */
-  dummy = gimple_build_cond (EQ_EXPR,
-			     integer_zero_node, integer_zero_node,
-			     NULL, NULL);
+  hash_table<expr_elt_hasher> *avail_exprs
+    = new hash_table<expr_elt_hasher> (1024);
+  avail_exprs_stack *avail_exprs_stack
+    = new class avail_exprs_stack (avail_exprs);

-  /* Walk through all the blocks finding those which present a
-     potential jump threading opportunity.  We could set this up
-     as a dominator walker and record data during the walk, but
-     I doubt it's worth the effort for the classes of jump
-     threading opportunities we are trying to identify at this
-     point in compilation.  */
-  FOR_EACH_BB_FN (bb, cfun)
-    {
-      gimple *last;
-
-      /* If the generic jump threading code does not find this block
-	 interesting, then there is nothing to do.  */
-      if (! potentially_threadable_block (bb))
-	continue;
-
-      last = last_stmt (bb);
-
-      /* We're basically looking for a switch or any kind of conditional with
-	 integral or pointer type arguments.  Note the type of the second
-	 argument will be the same as the first argument, so no need to
-	 check it explicitly. 
-
-	 We also handle the case where there are no statements in the
-	 block.  This come up with forwarder blocks that are not
-	 optimized away because they lead to a loop header.  But we do
-	 want to thread through them as we can sometimes thread to the
-	 loop exit which is obviously profitable.  */
-      if (!last
-	  || gimple_code (last) == GIMPLE_SWITCH
-	  || (gimple_code (last) == GIMPLE_COND
-      	      && TREE_CODE (gimple_cond_lhs (last)) == SSA_NAME
-	      && (INTEGRAL_TYPE_P (TREE_TYPE (gimple_cond_lhs (last)))
-		  || POINTER_TYPE_P (TREE_TYPE (gimple_cond_lhs (last))))
-	      && (TREE_CODE (gimple_cond_rhs (last)) == SSA_NAME
-		  || is_gimple_min_invariant (gimple_cond_rhs (last)))))
-	{
-	  edge_iterator ei;
-
-	  /* We've got a block with multiple predecessors and multiple
-	     successors which also ends in a suitable conditional or
-	     switch statement.  For each predecessor, see if we can thread
-	     it to a specific successor.  */
-	  FOR_EACH_EDGE (e, ei, bb->preds)
-	    {
-	      /* Do not thread across edges marked to ignoreor abnormal
-		 edges in the CFG.  */
-	      if (e->flags & (EDGE_IGNORE | EDGE_COMPLEX))
-		continue;
-
-	      thread_across_edge (dummy, e, equiv_stack, NULL,
-				  simplify_stmt_for_jump_threading);
-	    }
-	}
-    }
+  vrp_dom_walker walker (CDI_DOMINATORS, equiv_stack, avail_exprs_stack);
+  walker.walk (cfun->cfg->x_entry_block_ptr);

  /* Clear EDGE_IGNORE.  */
  FOR_EACH_VEC_ELT (to_remove_edges, i, e)
@ -10997,19 +11006,8 @@ identify_jump_threads (void)
  /* We do not actually update the CFG or SSA graphs at this point as
     ASSERT_EXPRs are still in the IL and cfg cleanup code does not yet
     handle ASSERT_EXPRs gracefully.  */
-}
-
-/* We identified all the jump threading opportunities earlier, but could
-   not transform the CFG at that time.  This routine transforms the
-   CFG and arranges for the dominator tree to be rebuilt if necessary.
-
-   Note the SSA graph update will occur during the normal TODO
-   processing by the pass manager.  */
-static void
-finalize_jump_threads (void)
-{
-  thread_through_all_blocks (false);
  delete equiv_stack;
+  delete avail_exprs_stack;
 }

 /* Free VRP lattice.  */
@ -11075,10 +11073,6 @@ vrp_finalize (bool warn_array_bounds_p)

  if (warn_array_bounds && warn_array_bounds_p)
    check_all_array_refs ();
-
-  /* We must identify jump threading opportunities before we release
-     the datastructures built by VRP.  */
-  identify_jump_threads ();
 }

 /* evrp_dom_walker visits the basic blocks in the dominance order and set
@ -11670,6 +11664,11 @@ execute_vrp (bool warn_array_bounds_p)
  vrp_initialize ();
  ssa_propagate (vrp_visit_stmt, vrp_visit_phi_node);
  vrp_finalize (warn_array_bounds_p);
+
+  /* We must identify jump threading opportunities before we release
+     the datastructures built by VRP.  */
+  identify_jump_threads ();
+
  vrp_free_lattice ();

  free_numbers_of_iterations_estimates (cfun);
@ -11686,7 +11685,13 @@ execute_vrp (bool warn_array_bounds_p)
     duplication and CFG manipulation.  */
  update_ssa (TODO_update_ssa);

-  finalize_jump_threads ();
+  /* We identified all the jump threading opportunities earlier, but could
+     not transform the CFG at that time.  This routine transforms the
+     CFG and arranges for the dominator tree to be rebuilt if necessary.
+
+     Note the SSA graph update will occur during the normal TODO
+     processing by the pass manager.  */
+  thread_through_all_blocks (false);

  /* Remove dead edges from SWITCH_EXPR optimization.  This leaves the
     CFG in a broken state and requires a cfg_cleanup run.  */