From 16ac9369b0741666f82391db187a5536e16d3638 Mon Sep 17 00:00:00 2001
From: Bin Cheng <bin.cheng@arm.com>
Date: Tue, 25 Jul 2017 08:56:26 +0000
Subject: [PATCH] backport: re PR tree-optimization/81388 (Incorrect code
 generation with -O1)

	Backport from 2017-07-20 trunk r250384.

	PR tree-optimization/81388
	Revert r238585:
	2016-07-21  Bin Cheng  <bin.cheng@arm.com>

	* tree-ssa-loop-niter.c (number_of_iterations_lt_to_ne): Clean up
	by removing computation of may_be_zero.

	gcc/testsuite
	PR tree-optimization/81388
	* gcc.dg/tree-ssa/pr81388-1.c: New test.
	* gcc.dg/tree-ssa/pr81388-2.c: New test.

From-SVN: r250497
---
 gcc/ChangeLog                             |  11 +++
 gcc/testsuite/ChangeLog                   |   8 ++
 gcc/testsuite/gcc.dg/tree-ssa/pr81388-1.c |  14 +++
 gcc/testsuite/gcc.dg/tree-ssa/pr81388-2.c |  14 +++
 gcc/tree-ssa-loop-niter.c                 | 105 ++++++++++++++++------
 5 files changed, 124 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr81388-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr81388-2.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9f6497945b4..271a32452ea 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2017-07-25  Bin Cheng  <bin.cheng@arm.com>
+
+	Backport from 2017-07-20 trunk r250384.
+
+	PR tree-optimization/81388
+	Revert r238585:
+	2016-07-21  Bin Cheng  <bin.cheng@arm.com>
+
+	* tree-ssa-loop-niter.c (number_of_iterations_lt_to_ne): Clean up
+	by removing computation of may_be_zero.
+
 2017-07-23  Uros Bizjak  <ubizjak@gmail.com>
 
 	PR target/80569
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 6f3d00b0bc1..71603a744f3 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,11 @@
+2017-07-25  Bin Cheng  <bin.cheng@arm.com>
+
+	Backport from 2017-07-20 trunk r250384.
+
+	PR tree-optimization/81388
+	* gcc.dg/tree-ssa/pr81388-1.c: New test.
+	* gcc.dg/tree-ssa/pr81388-2.c: New test.
+
 2017-07-23  Uros Bizjak  <ubizjak@gmail.com>
 
 	PR target/80569
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr81388-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr81388-1.c
new file mode 100644
index 00000000000..85c00e56144
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr81388-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-strict-overflow -fdump-tree-ivcanon-details" } */
+
+void bar();
+void foo(char *dst)
+{
+  char *const end = dst;
+  do {
+    bar();
+    dst += 2;
+  } while (dst < end);
+}
+
+/* { dg-final { scan-tree-dump " zero if " "ivcanon" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr81388-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr81388-2.c
new file mode 100644
index 00000000000..aa40588f358
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr81388-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivcanon-details" } */
+
+void bar();
+void foo(unsigned dst)
+{
+  unsigned end = dst;
+  do {
+    bar();
+    dst += 2;
+  } while (dst < end);
+}
+
+/* { dg-final { scan-tree-dump " zero if " "ivcanon" } } */
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index e67cd930946..6b829cc4d79 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -1142,8 +1142,12 @@ number_of_iterations_lt_to_ne (tree type, affine_iv *iv0, affine_iv *iv1,
   tree niter_type = TREE_TYPE (step);
   tree mod = fold_build2 (FLOOR_MOD_EXPR, niter_type, *delta, step);
   tree tmod;
-  tree assumption = boolean_true_node, bound;
-  tree type1 = (POINTER_TYPE_P (type)) ? sizetype : type;
+  mpz_t mmod;
+  tree assumption = boolean_true_node, bound, noloop;
+  bool ret = false, fv_comp_no_overflow;
+  tree type1 = type;
+  if (POINTER_TYPE_P (type))
+    type1 = sizetype;
 
   if (TREE_CODE (mod) != INTEGER_CST)
     return false;
@@ -1151,51 +1155,96 @@ number_of_iterations_lt_to_ne (tree type, affine_iv *iv0, affine_iv *iv1,
     mod = fold_build2 (MINUS_EXPR, niter_type, step, mod);
   tmod = fold_convert (type1, mod);
 
+  mpz_init (mmod);
+  wi::to_mpz (mod, mmod, UNSIGNED);
+  mpz_neg (mmod, mmod);
+
   /* If the induction variable does not overflow and the exit is taken,
-     then the computation of the final value does not overflow.  There
-     are three cases:
-       1) The case if the new final value is equal to the current one.
-       2) Induction varaible has pointer type, as the code cannot rely
-	  on the object to that the pointer points being placed at the
-	  end of the address space (and more pragmatically,
-	  TYPE_{MIN,MAX}_VALUE is not defined for pointers).
-       3) EXIT_MUST_BE_TAKEN is true, note it implies that the induction
-	  variable does not overflow.  */
-  if (!integer_zerop (mod) && !POINTER_TYPE_P (type) && !exit_must_be_taken)
+     then the computation of the final value does not overflow.  This is
+     also obviously the case if the new final value is equal to the
+     current one.  Finally, we postulate this for pointer type variables,
+     as the code cannot rely on the object to that the pointer points being
+     placed at the end of the address space (and more pragmatically,
+     TYPE_{MIN,MAX}_VALUE is not defined for pointers).  */
+  if (integer_zerop (mod) || POINTER_TYPE_P (type))
+    fv_comp_no_overflow = true;
+  else if (!exit_must_be_taken)
+    fv_comp_no_overflow = false;
+  else
+    fv_comp_no_overflow =
+	    (iv0->no_overflow && integer_nonzerop (iv0->step))
+	    || (iv1->no_overflow && integer_nonzerop (iv1->step));
+
+  if (integer_nonzerop (iv0->step))
     {
-      if (integer_nonzerop (iv0->step))
+      /* The final value of the iv is iv1->base + MOD, assuming that this
+	 computation does not overflow, and that
+	 iv0->base <= iv1->base + MOD.  */
+      if (!fv_comp_no_overflow)
 	{
-	  /* The final value of the iv is iv1->base + MOD, assuming
-	     that this computation does not overflow, and that
-	     iv0->base <= iv1->base + MOD.  */
 	  bound = fold_build2 (MINUS_EXPR, type1,
 			       TYPE_MAX_VALUE (type1), tmod);
 	  assumption = fold_build2 (LE_EXPR, boolean_type_node,
 				    iv1->base, bound);
+	  if (integer_zerop (assumption))
+	    goto end;
 	}
+      if (mpz_cmp (mmod, bnds->below) < 0)
+	noloop = boolean_false_node;
+      else if (POINTER_TYPE_P (type))
+	noloop = fold_build2 (GT_EXPR, boolean_type_node,
+			      iv0->base,
+			      fold_build_pointer_plus (iv1->base, tmod));
       else
+	noloop = fold_build2 (GT_EXPR, boolean_type_node,
+			      iv0->base,
+			      fold_build2 (PLUS_EXPR, type1,
+					   iv1->base, tmod));
+    }
+  else
+    {
+      /* The final value of the iv is iv0->base - MOD, assuming that this
+	 computation does not overflow, and that
+	 iv0->base - MOD <= iv1->base. */
+      if (!fv_comp_no_overflow)
 	{
-	  /* The final value of the iv is iv0->base - MOD, assuming
-	     that this computation does not overflow, and that
-	     iv0->base - MOD <= iv1->base.  */
 	  bound = fold_build2 (PLUS_EXPR, type1,
 			       TYPE_MIN_VALUE (type1), tmod);
 	  assumption = fold_build2 (GE_EXPR, boolean_type_node,
 				    iv0->base, bound);
+	  if (integer_zerop (assumption))
+	    goto end;
 	}
-      if (integer_zerop (assumption))
-	return false;
-      else if (!integer_nonzerop (assumption))
-	niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
-					  niter->assumptions, assumption);
+      if (mpz_cmp (mmod, bnds->below) < 0)
+	noloop = boolean_false_node;
+      else if (POINTER_TYPE_P (type))
+	noloop = fold_build2 (GT_EXPR, boolean_type_node,
+			      fold_build_pointer_plus (iv0->base,
+						       fold_build1 (NEGATE_EXPR,
+								    type1, tmod)),
+			      iv1->base);
+      else
+	noloop = fold_build2 (GT_EXPR, boolean_type_node,
+			      fold_build2 (MINUS_EXPR, type1,
+					   iv0->base, tmod),
+			      iv1->base);
     }
 
-  /* Since we are transforming LT to NE and DELTA is constant, there
-     is no need to compute may_be_zero because this loop must roll.  */
-
+  if (!integer_nonzerop (assumption))
+    niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+				      niter->assumptions,
+				      assumption);
+  if (!integer_zerop (noloop))
+    niter->may_be_zero = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
+				      niter->may_be_zero,
+				      noloop);
   bounds_add (bnds, wi::to_widest (mod), type);
   *delta = fold_build2 (PLUS_EXPR, niter_type, *delta, mod);
-  return true;
+
+  ret = true;
+end:
+  mpz_clear (mmod);
+  return ret;
 }
 
 /* Add assertions to NITER that ensure that the control variable of the loop