From 2f07b722ed0302ded58710b6d8485d85d785b4ee Mon Sep 17 00:00:00 2001
From: Bin Cheng <bin.cheng@arm.com>
Date: Tue, 2 Jun 2015 10:19:18 +0000
Subject: [PATCH] re PR tree-optimization/48052 (loop not vectorized if index
 is "unsigned int")

	PR tree-optimization/48052
	* cfgloop.h (struct control_iv): New.
	(struct loop): New field control_ivs.
	* tree-ssa-loop-niter.c : Include "stor-layout.h".
	(number_of_iterations_lt): Set no_overflow information.
	(number_of_iterations_exit): Init control iv in niter struct.
	(record_control_iv): New.
	(estimate_numbers_of_iterations_loop): Call record_control_iv.
	(loop_exits_before_overflow): New.  Interface factored out of
	scev_probably_wraps_p.
	(scev_probably_wraps_p): Factor loop niter related code into
	loop_exits_before_overflow.
	(free_numbers_of_iterations_estimates_loop): Free control ivs.
	* tree-ssa-loop-niter.h (free_loop_control_ivs): New.

	gcc/testsuite/ChangeLog
	PR tree-optimization/48052
	* gcc.dg/tree-ssa/scev-8.c: New.
	* gcc.dg/tree-ssa/scev-9.c: New.
	* gcc.dg/tree-ssa/scev-10.c: New.
	* gcc.dg/vect/pr48052.c: New.

From-SVN: r224020
---
 gcc/ChangeLog                           |  17 ++
 gcc/cfgloop.h                           |  11 +
 gcc/testsuite/ChangeLog                 |   8 +
 gcc/testsuite/gcc.dg/tree-ssa/scev-10.c |  23 ++
 gcc/testsuite/gcc.dg/tree-ssa/scev-8.c  |  62 +++++
 gcc/testsuite/gcc.dg/tree-ssa/scev-9.c  |  22 ++
 gcc/testsuite/gcc.dg/vect/pr48052.c     |  26 +++
 gcc/tree-ssa-loop-niter.c               | 288 +++++++++++++++++++-----
 gcc/tree-ssa-loop-niter.h               |   1 +
 9 files changed, 397 insertions(+), 61 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-10.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-8.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-9.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr48052.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ac791c4bf5d..9f3ec684279 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,20 @@
+2015-06-02  Bin Cheng  <bin.cheng@arm.com>
+
+	PR tree-optimization/48052
+	* cfgloop.h (struct control_iv): New.
+	(struct loop): New field control_ivs.
+	* tree-ssa-loop-niter.c : Include "stor-layout.h".
+	(number_of_iterations_lt): Set no_overflow information.
+	(number_of_iterations_exit): Init control iv in niter struct.
+	(record_control_iv): New.
+	(estimate_numbers_of_iterations_loop): Call record_control_iv.
+	(loop_exits_before_overflow): New.  Interface factored out of
+	scev_probably_wraps_p.
+	(scev_probably_wraps_p): Factor loop niter related code into
+	loop_exits_before_overflow.
+	(free_numbers_of_iterations_estimates_loop): Free control ivs.
+	* tree-ssa-loop-niter.h (free_loop_control_ivs): New.
+
 2015-06-02  Eric Botcazou  <ebotcazou@adacore.com>
 
 	* gimplify.c (gimplify_modify_expr): Do not create a DECL_DEBUG_EXPR if
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 1d845729e5e..b0116786d49 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -116,6 +116,14 @@ enum loop_estimation
   EST_LAST
 };
 
+/* The structure describing non-overflow control induction variable for
+   loop's exit edge.  */
+struct GTY ((chain_next ("%h.next"))) control_iv {
+  tree base;
+  tree step;
+  struct control_iv *next;
+};
+
 /* Structure to hold information for each natural loop.  */
 struct GTY ((chain_next ("%h.next"))) loop {
   /* Index into loops array.  */
@@ -203,6 +211,9 @@ struct GTY ((chain_next ("%h.next"))) loop {
   /* Upper bound on number of iterations of a loop.  */
   struct nb_iter_bound *bounds;
 
+  /* Non-overflow control ivs of a loop.  */
+  struct control_iv *control_ivs;
+
   /* Head of the cyclic list of the exits of the loop.  */
   struct loop_exit *exits;
 
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 9f93db005cd..39617815ba6 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,11 @@
+2015-06-02  Bin Cheng  <bin.cheng@arm.com>
+
+	PR tree-optimization/48052
+	* gcc.dg/tree-ssa/scev-8.c: New.
+	* gcc.dg/tree-ssa/scev-9.c: New.
+	* gcc.dg/tree-ssa/scev-10.c: New.
+	* gcc.dg/vect/pr48052.c: New.
+
 2015-06-02  Eric Botcazou  <ebotcazou@adacore.com>
 
 	* gcc.dg/vect/vec-scal-opt.c: Adjust regexp.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c
new file mode 100644
index 00000000000..fd815b2fe08
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo (signed char s, signed char l)
+{
+  signed char i;
+  int sum = 0;
+
+  for (i = s; i < l; i++)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+/* Address of array reference is scev.  */
+/* { dg-final { scan-tree-dump-times "use \[0-9\]\n  address" 1 "ivopts" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c
new file mode 100644
index 00000000000..766f674d55b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c
@@ -0,0 +1,62 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo1 (long long s, long long l)
+{
+  long long i;
+
+  for (i = s; i < l; i++)
+    {
+      a[(short)i] = 0;
+    }
+  return 0;
+}
+
+int
+foo2 (unsigned char s, unsigned char l, unsigned char c)
+{
+  unsigned char i, step = 1;
+  int sum = 0;
+
+  for (i = s; i < l; i++)
+    {
+      sum += a[c];
+      c += step;
+    }
+
+  return sum;
+}
+
+int
+foo3 (unsigned char s, unsigned char l, unsigned char c)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i != l; i += c)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+int
+foo4 (unsigned char s, unsigned char l)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i != l; i++)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+/* Address of array references are not scevs.  */
+/* { dg-final { scan-tree-dump-not "use \[0-9\]\n  address" "ivopts" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c
new file mode 100644
index 00000000000..22e96c00d48
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo (unsigned char s, unsigned char l)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i < l; i += 1)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+/* Address of array reference is scev.  */
+/* { dg-final { scan-tree-dump-times "use \[0-9\]\n  address" 1 "ivopts" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr48052.c b/gcc/testsuite/gcc.dg/vect/pr48052.c
new file mode 100644
index 00000000000..7e99c5508fd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr48052.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+
+int foo(int* A, int* B,  unsigned start, unsigned BS)
+{
+  int s;
+  for (unsigned k = start;  k < start + BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
+int bar(int* A, int* B, unsigned BS)
+{
+  int s;
+  for (unsigned k = 0;  k < BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 042f8dff2f1..3f6bebe63dc 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "wide-int.h"
 #include "inchash.h"
 #include "tree.h"
+#include "stor-layout.h"
 #include "fold-const.h"
 #include "calls.h"
 #include "hashtab.h"
@@ -1184,6 +1185,7 @@ number_of_iterations_lt (tree type, affine_iv *iv0, affine_iv *iv1,
       niter->niter = delta;
       niter->max = widest_int::from (wi::from_mpz (niter_type, bnds->up, false),
 				     TYPE_SIGN (niter_type));
+      niter->control.no_overflow = true;
       return true;
     }
 
@@ -1965,6 +1967,9 @@ number_of_iterations_exit (struct loop *loop, edge exit,
     return false;
 
   niter->assumptions = boolean_false_node;
+  niter->control.base = NULL_TREE;
+  niter->control.step = NULL_TREE;
+  niter->control.no_overflow = false;
   last = last_stmt (exit->src);
   if (!last)
     return false;
@@ -2744,6 +2749,29 @@ record_estimate (struct loop *loop, tree bound, const widest_int &i_bound,
   record_niter_bound (loop, new_i_bound, realistic, upper);
 }
 
+/* Records the control iv analyzed in NITER for LOOP if the iv is valid
+   and doesn't overflow.  */
+
+static void
+record_control_iv (struct loop *loop, struct tree_niter_desc *niter)
+{
+  struct control_iv *iv;
+
+  if (!niter->control.base || !niter->control.step)
+    return;
+
+  if (!integer_onep (niter->assumptions) || !niter->control.no_overflow)
+    return;
+
+  iv = ggc_alloc<control_iv> ();
+  iv->base = niter->control.base;
+  iv->step = niter->control.step;
+  iv->next = loop->control_ivs;
+  loop->control_ivs = iv;
+
+  return;
+}
+
 /* Record the estimate on number of iterations of LOOP based on the fact that
    the induction variable BASE + STEP * i evaluated in STMT does not wrap and
    its values belong to the range <LOW, HIGH>.  REALISTIC is true if the
@@ -3467,6 +3495,7 @@ estimate_numbers_of_iterations_loop (struct loop *loop)
       record_estimate (loop, niter, niter_desc.max,
 		       last_stmt (ex->src),
 		       true, ex == likely_exit, true);
+      record_control_iv (loop, &niter_desc);
     }
   exits.release ();
 
@@ -3773,6 +3802,189 @@ nowrap_type_p (tree type)
   return false;
 }
 
+/* Return true if we can prove LOOP is exited before evolution of induction
+   variabled {BASE, STEP} overflows with respect to its type bound.  */
+
+static bool
+loop_exits_before_overflow (tree base, tree step,
+			    gimple at_stmt, struct loop *loop)
+{
+  widest_int niter;
+  struct control_iv *civ;
+  struct nb_iter_bound *bound;
+  tree e, delta, step_abs, unsigned_base;
+  tree type = TREE_TYPE (step);
+  tree unsigned_type, valid_niter;
+
+  /* Don't issue signed overflow warnings.  */
+  fold_defer_overflow_warnings ();
+
+  /* Compute the number of iterations before we reach the bound of the
+     type, and verify that the loop is exited before this occurs.  */
+  unsigned_type = unsigned_type_for (type);
+  unsigned_base = fold_convert (unsigned_type, base);
+
+  if (tree_int_cst_sign_bit (step))
+    {
+      tree extreme = fold_convert (unsigned_type,
+				   lower_bound_in_type (type, type));
+      delta = fold_build2 (MINUS_EXPR, unsigned_type, unsigned_base, extreme);
+      step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
+			      fold_convert (unsigned_type, step));
+    }
+  else
+    {
+      tree extreme = fold_convert (unsigned_type,
+				   upper_bound_in_type (type, type));
+      delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, unsigned_base);
+      step_abs = fold_convert (unsigned_type, step);
+    }
+
+  valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
+
+  estimate_numbers_of_iterations_loop (loop);
+
+  if (max_loop_iterations (loop, &niter)
+      && wi::fits_to_tree_p (niter, TREE_TYPE (valid_niter))
+      && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
+			   wide_int_to_tree (TREE_TYPE (valid_niter),
+					     niter))) != NULL
+      && integer_nonzerop (e))
+    {
+      fold_undefer_and_ignore_overflow_warnings ();
+      return true;
+    }
+  if (at_stmt)
+    for (bound = loop->bounds; bound; bound = bound->next)
+      {
+	if (n_of_executions_at_most (at_stmt, bound, valid_niter))
+	  {
+	    fold_undefer_and_ignore_overflow_warnings ();
+	    return true;
+	  }
+      }
+  fold_undefer_and_ignore_overflow_warnings ();
+
+  /* Try to prove loop is exited before {base, step} overflows with the
+     help of analyzed loop control IV.  This is done only for IVs with
+     constant step because otherwise we don't have the information.  */
+  if (TREE_CODE (step) == INTEGER_CST)
+    for (civ = loop->control_ivs; civ; civ = civ->next)
+      {
+	enum tree_code code;
+	tree stepped, extreme, civ_type = TREE_TYPE (civ->step);
+
+	/* Have to consider type difference because operand_equal_p ignores
+	   that for constants.  */
+	if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (civ_type)
+	    || element_precision (type) != element_precision (civ_type))
+	  continue;
+
+	/* Only consider control IV with same step.  */
+	if (!operand_equal_p (step, civ->step, 0))
+	  continue;
+
+	/* Done proving if this is a no-overflow control IV.  */
+	if (operand_equal_p (base, civ->base, 0))
+	  return true;
+
+	/* If this is a before stepping control IV, in other words, we have
+
+	     {civ_base, step} = {base + step, step}
+
+	   Because civ {base + step, step} doesn't overflow during loop
+	   iterations, {base, step} will not overflow if we can prove the
+	   operation "base + step" does not overflow.  Specifically, we try
+	   to prove below conditions are satisfied:
+
+	     base <= UPPER_BOUND (type) - step  ;;step > 0
+	     base >= LOWER_BOUND (type) - step  ;;step < 0
+
+	   by proving the reverse conditions are false using loop's initial
+	   condition.  */
+	stepped = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, step);
+	if (operand_equal_p (stepped, civ->base, 0))
+	  {
+	    if (tree_int_cst_sign_bit (step))
+	      {
+		code = LT_EXPR;
+		extreme = lower_bound_in_type (type, type);
+	      }
+	    else
+	      {
+		code = GT_EXPR;
+		extreme = upper_bound_in_type (type, type);
+	      }
+	    extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
+	    e = fold_build2 (code, boolean_type_node, base, extreme);
+	    e = simplify_using_initial_conditions (loop, e);
+	    if (integer_zerop (e))
+	      return true;
+
+	    continue;
+	  }
+
+	/* Similar to above, only in this case we have:
+
+	     {civ_base, step} = {(signed T)((unsigned T)base + step), step}
+	     && TREE_TYPE (civ_base) = signed T.
+
+	   We prove that below condition is satisfied:
+
+	     (signed T)((unsigned T)base + step)
+	       == (signed T)(unsigned T)base + step
+	       == base + step
+
+	   because of exact the same reason as above.  This also proves
+	   there is no overflow in the operation "base + step", thus the
+	   induction variable {base, step} during loop iterations.
+
+	   This is necessary to handle cases as below:
+
+	     int foo (int *a, signed char s, signed char l)
+	       {
+		 signed char i;
+		 for (i = s; i < l; i++)
+		   a[i] = 0;
+		 return 0;
+	       }
+
+	   The variable I is firstly converted to type unsigned char,
+	   incremented, then converted back to type signed char.  */
+	if (!CONVERT_EXPR_P (civ->base) || TREE_TYPE (civ->base) != type)
+	  continue;
+	e = TREE_OPERAND (civ->base, 0);
+	if (TREE_CODE (e) != PLUS_EXPR
+	    || TREE_CODE (TREE_OPERAND (e, 1)) != INTEGER_CST
+	    || !operand_equal_p (step,
+				 fold_convert (type,
+					       TREE_OPERAND (e, 1)), 0))
+	  continue;
+	e = TREE_OPERAND (e, 0);
+	if (!CONVERT_EXPR_P (e) || !operand_equal_p (e, unsigned_base, 0))
+	  continue;
+	e = TREE_OPERAND (e, 0);
+	gcc_assert (operand_equal_p (e, base, 0));
+	if (tree_int_cst_sign_bit (step))
+	  {
+	    code = LT_EXPR;
+	    extreme = lower_bound_in_type (type, type);
+	  }
+	else
+	  {
+	    code = GT_EXPR;
+	    extreme = upper_bound_in_type (type, type);
+	  }
+	extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
+	e = fold_build2 (code, boolean_type_node, base, extreme);
+	e = simplify_using_initial_conditions (loop, e);
+	if (integer_zerop (e))
+	  return true;
+      }
+
+  return false;
+}
+
 /* Return false only when the induction variable BASE + STEP * I is
    known to not overflow: i.e. when the number of iterations is small
    enough with respect to the step and initial condition in order to
@@ -3788,13 +4000,6 @@ scev_probably_wraps_p (tree base, tree step,
 		       gimple at_stmt, struct loop *loop,
 		       bool use_overflow_semantics)
 {
-  tree delta, step_abs;
-  tree unsigned_type, valid_niter;
-  tree type = TREE_TYPE (step);
-  tree e;
-  widest_int niter;
-  struct nb_iter_bound *bound;
-
   /* FIXME: We really need something like
      http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html.
 
@@ -3828,56 +4033,8 @@ scev_probably_wraps_p (tree base, tree step,
   if (TREE_CODE (step) != INTEGER_CST)
     return true;
 
-  /* Don't issue signed overflow warnings.  */
-  fold_defer_overflow_warnings ();
-
-  /* Otherwise, compute the number of iterations before we reach the
-     bound of the type, and verify that the loop is exited before this
-     occurs.  */
-  unsigned_type = unsigned_type_for (type);
-  base = fold_convert (unsigned_type, base);
-
-  if (tree_int_cst_sign_bit (step))
-    {
-      tree extreme = fold_convert (unsigned_type,
-				   lower_bound_in_type (type, type));
-      delta = fold_build2 (MINUS_EXPR, unsigned_type, base, extreme);
-      step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
-			      fold_convert (unsigned_type, step));
-    }
-  else
-    {
-      tree extreme = fold_convert (unsigned_type,
-				   upper_bound_in_type (type, type));
-      delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, base);
-      step_abs = fold_convert (unsigned_type, step);
-    }
-
-  valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
-
-  estimate_numbers_of_iterations_loop (loop);
-
-  if (max_loop_iterations (loop, &niter)
-      && wi::fits_to_tree_p (niter, TREE_TYPE (valid_niter))
-      && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
-			   wide_int_to_tree (TREE_TYPE (valid_niter),
-					     niter))) != NULL
-      && integer_nonzerop (e))
-    {
-      fold_undefer_and_ignore_overflow_warnings ();
-      return false;
-    }
-  if (at_stmt)
-    for (bound = loop->bounds; bound; bound = bound->next)
-      {
-	if (n_of_executions_at_most (at_stmt, bound, valid_niter))
-	  {
-	    fold_undefer_and_ignore_overflow_warnings ();
-	    return false;
-	  }
-      }
-
-  fold_undefer_and_ignore_overflow_warnings ();
+  if (loop_exits_before_overflow (base, step, at_stmt, loop))
+    return false;
 
   /* At this point we still don't have a proof that the iv does not
      overflow: give up.  */
@@ -3889,17 +4046,26 @@ scev_probably_wraps_p (tree base, tree step,
 void
 free_numbers_of_iterations_estimates_loop (struct loop *loop)
 {
-  struct nb_iter_bound *bound, *next;
+  struct control_iv *civ;
+  struct nb_iter_bound *bound;
 
   loop->nb_iterations = NULL;
   loop->estimate_state = EST_NOT_COMPUTED;
-  for (bound = loop->bounds; bound; bound = next)
+  for (bound = loop->bounds; bound;)
     {
-      next = bound->next;
+      struct nb_iter_bound *next = bound->next;
       ggc_free (bound);
+      bound = next;
     }
-
   loop->bounds = NULL;
+
+  for (civ = loop->control_ivs; civ;)
+    {
+      struct control_iv *next = civ->next;
+      ggc_free (civ);
+      civ = next;
+    }
+  loop->control_ivs = NULL;
 }
 
 /* Frees the information on upper bounds on numbers of iterations of loops.  */
diff --git a/gcc/tree-ssa-loop-niter.h b/gcc/tree-ssa-loop-niter.h
index 7134906dd80..8d4f799c090 100644
--- a/gcc/tree-ssa-loop-niter.h
+++ b/gcc/tree-ssa-loop-niter.h
@@ -41,6 +41,7 @@ extern void estimate_numbers_of_iterations (void);
 extern bool stmt_dominates_stmt_p (gimple, gimple);
 extern bool nowrap_type_p (tree);
 extern bool scev_probably_wraps_p (tree, tree, gimple, struct loop *, bool);
+extern void free_loop_control_ivs (struct loop *);
 extern void free_numbers_of_iterations_estimates_loop (struct loop *);
 extern void free_numbers_of_iterations_estimates (void);
 extern void substitute_in_loop_info (struct loop *, tree, tree);