Simplify X * C1 == C2 with wrapping overflow

Odd numbers are invertible in Z / 2^n Z, so X * C1 == C2 can be rewritten as X == C2 * inv(C1) when overflow wraps. mod_inv should probably be updated to better match the other wide_int functions, but that's a separate issue. 2020-08-10 Marc Glisse <marc.glisse@inria.fr> PR tree-optimization/95433 * match.pd (X * C1 == C2): Handle wrapping overflow. * expr.c (maybe_optimize_mod_cmp): Qualify call to mod_inv. (mod_inv): Move... * wide-int.cc (mod_inv): ... here. * wide-int.h (mod_inv): Declare it. * gcc.dg/tree-ssa/pr95433-2.c: New file.
2020-08-10 12:50:42 +02:00 · 2020-08-10 12:50:42 +02:00 · 287522613d
commit 287522613d
parent 9939be5758
5 changed files with 68 additions and 35 deletions
--- a/gcc/expr.c
+++ b/gcc/expr.c
@ -11859,38 +11859,6 @@ string_constant (tree arg, tree *ptr_offset, tree *mem_size, tree *decl)
  return init;
 }

-/* Compute the modular multiplicative inverse of A modulo M
-   using extended Euclid's algorithm.  Assumes A and M are coprime.  */
-static wide_int
-mod_inv (const wide_int &a, const wide_int &b)
-{
-  /* Verify the assumption.  */
-  gcc_checking_assert (wi::eq_p (wi::gcd (a, b), 1));
-
-  unsigned int p = a.get_precision () + 1;
-  gcc_checking_assert (b.get_precision () + 1 == p);
-  wide_int c = wide_int::from (a, p, UNSIGNED);
-  wide_int d = wide_int::from (b, p, UNSIGNED);
-  wide_int x0 = wide_int::from (0, p, UNSIGNED);
-  wide_int x1 = wide_int::from (1, p, UNSIGNED);
-
-  if (wi::eq_p (b, 1))
-    return wide_int::from (1, p, UNSIGNED);
-
-  while (wi::gt_p (c, 1, UNSIGNED))
-    {
-      wide_int t = d;
-      wide_int q = wi::divmod_trunc (c, d, UNSIGNED, &d);
-      c = t;
-      wide_int s = x0;
-      x0 = wi::sub (x1, wi::mul (q, x0));
-      x1 = s;
-    }
-  if (wi::lt_p (x1, 0, SIGNED))
-    x1 += d;
-  return x1;
-}
-
 /* Optimize x % C1 == C2 for signed modulo if C1 is a power of two and C2
   is non-zero and C3 ((1<<(prec-1)) | (C1 - 1)):
   for C2 > 0 to x & C3 == C2
@ -12101,7 +12069,7 @@ maybe_optimize_mod_cmp (enum tree_code code, tree *arg0, tree *arg1)
  w = wi::lrshift (w, shift);
  wide_int a = wide_int::from (w, prec + 1, UNSIGNED);
  wide_int b = wi::shifted_mask (prec, 1, false, prec + 1);
-  wide_int m = wide_int::from (mod_inv (a, b), prec, UNSIGNED);
+  wide_int m = wide_int::from (wi::mod_inv (a, b), prec, UNSIGNED);
  tree c3 = wide_int_to_tree (type, m);
  tree c5 = NULL_TREE;
  wide_int d, e;
--- a/gcc/match.pd
+++ b/gcc/match.pd
@ -3828,7 +3828,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     (cmp @0 @2))))))

 /* For integral types with undefined overflow fold
-   x * C1 == C2 into x == C2 / C1 or false.  */
+   x * C1 == C2 into x == C2 / C1 or false.
+   If overflow wraps and C1 is odd, simplify to x == C2 / C1 in the ring
+   Z / 2^n Z.  */
 (for cmp (eq ne)
 (simplify
  (cmp (mult @0 INTEGER_CST@1) INTEGER_CST@2)
@ -3839,7 +3841,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    (if (wi::multiple_of_p (wi::to_widest (@2), wi::to_widest (@1),
 			    TYPE_SIGN (TREE_TYPE (@0)), &quot))
     (cmp @0 { wide_int_to_tree (TREE_TYPE (@0), quot); })
-     { constant_boolean_node (cmp == NE_EXPR, type); })))))
+     { constant_boolean_node (cmp == NE_EXPR, type); }))
+   (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+	&& TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
+	&& (wi::bit_and (wi::to_wide (@1), 1) == 1))
+    (cmp @0
+     {
+       tree itype = TREE_TYPE (@0);
+       int p = TYPE_PRECISION (itype);
+       wide_int m = wi::one (p + 1) << p;
+       wide_int a = wide_int::from (wi::to_wide (@1), p + 1, UNSIGNED);
+       wide_int i = wide_int::from (wi::mod_inv (a, m),
+				    p, TYPE_SIGN (itype));
+       wide_int_to_tree (itype, wi::mul (i, wi::to_wide (@2)));
+     })))))

 /* Simplify comparison of something with itself.  For IEEE
   floating-point, we can only do some of these simplifications.  */
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c
@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fwrapv -fdump-tree-gimple" } */
+
+typedef __INT32_TYPE__ int32_t;
+typedef unsigned __INT32_TYPE__ uint32_t;
+
+int e(int32_t x){return 3*x==5;}
+int f(int32_t x){return 3*x==-5;}
+int g(int32_t x){return -3*x==5;}
+int h(int32_t x){return 7*x==3;}
+int i(uint32_t x){return 7*x==3;}
+
+/* { dg-final { scan-tree-dump-times "== 1431655767" 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "== -1431655767" 2 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "== 613566757" 2 "gimple" } } */
--- a/gcc/wide-int.cc
+++ b/gcc/wide-int.cc
@ -2223,6 +2223,39 @@ wi::round_up_for_mask (const wide_int &val, const wide_int &mask)
  return (val | tmp) & -tmp;
 }

+/* Compute the modular multiplicative inverse of A modulo B
+   using extended Euclid's algorithm.  Assumes A and B are coprime,
+   and that A and B have the same precision.  */
+wide_int
+wi::mod_inv (const wide_int &a, const wide_int &b)
+{
+  /* Verify the assumption.  */
+  gcc_checking_assert (wi::eq_p (wi::gcd (a, b), 1));
+
+  unsigned int p = a.get_precision () + 1;
+  gcc_checking_assert (b.get_precision () + 1 == p);
+  wide_int c = wide_int::from (a, p, UNSIGNED);
+  wide_int d = wide_int::from (b, p, UNSIGNED);
+  wide_int x0 = wide_int::from (0, p, UNSIGNED);
+  wide_int x1 = wide_int::from (1, p, UNSIGNED);
+
+  if (wi::eq_p (b, 1))
+    return wide_int::from (1, p, UNSIGNED);
+
+  while (wi::gt_p (c, 1, UNSIGNED))
+    {
+      wide_int t = d;
+      wide_int q = wi::divmod_trunc (c, d, UNSIGNED, &d);
+      c = t;
+      wide_int s = x0;
+      x0 = wi::sub (x1, wi::mul (q, x0));
+      x1 = s;
+    }
+  if (wi::lt_p (x1, 0, SIGNED))
+    x1 += d;
+  return x1;
+}
+
 /*
 * Private utilities.
 */
--- a/gcc/wide-int.h
+++ b/gcc/wide-int.h
@ -3389,6 +3389,8 @@ namespace wi
  wide_int round_down_for_mask (const wide_int &, const wide_int &);
  wide_int round_up_for_mask (const wide_int &, const wide_int &);

+  wide_int mod_inv (const wide_int &a, const wide_int &b);
+
  template <typename T>
  T mask (unsigned int, bool);