re PR rtl-optimization/44214 (Compiler does not optimize vector divide with -freciprocal-math (or -ffast-math))

gcc: 2012-04-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com> PR rtl-optimization/44214 * fold-const.c (exact_inverse): New function. (fold_binary_loc): Fold vector and complex division by constant into multiply by recripocal with flag_reciprocal_math; fold vector division by constant into multiply by reciprocal with exact inverse. gcc/testsuite: 2012-04-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com> PR rtl-optimization/44214 * gcc.dg/pr44214-1.c: New test. * gcc.dg/pr44214-2.c: Likewise. * gcc.dg/pr44214-3.c: Likewise. From-SVN: r186625
2012-04-20 14:19:13 +00:00 · 2012-04-20 14:19:13 +00:00 · add6207a86
parent ead84f73b0
commit add6207a86
6 changed files with 122 additions and 13 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,11 @@
+2012-04-20  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+	PR rtl-optimization/44214
+	* fold-const.c (exact_inverse): New function.
+	(fold_binary_loc): Fold vector and complex division by constant into
+	multiply by recripocal with flag_reciprocal_math; fold vector division
+	by constant into multiply by reciprocal with exact inverse.
+
 2012-04-20  Jan Hubicka  <jh@suse.cz>

 	* lto-symtab.c (lto_cgraph_replace_node): Merge needed instead of force flags.
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@ -9693,6 +9693,48 @@ fold_addr_of_array_ref_difference (location_t loc, tree type,
  return NULL_TREE;
 }

+/* If the real or vector real constant CST of type TYPE has an exact
+   inverse, return it, else return NULL.  */
+
+static tree
+exact_inverse (tree type, tree cst)
+{
+  REAL_VALUE_TYPE r;
+  tree unit_type, *elts;
+  enum machine_mode mode;
+  unsigned vec_nelts, i;
+
+  switch (TREE_CODE (cst))
+    {
+    case REAL_CST:
+      r = TREE_REAL_CST (cst);
+
+      if (exact_real_inverse (TYPE_MODE (type), &r))
+	return build_real (type, r);
+
+      return NULL_TREE;
+
+    case VECTOR_CST:
+      vec_nelts = VECTOR_CST_NELTS (cst);
+      elts = XALLOCAVEC (tree, vec_nelts);
+      unit_type = TREE_TYPE (type);
+      mode = TYPE_MODE (unit_type);
+
+      for (i = 0; i < vec_nelts; i++)
+	{
+	  r = TREE_REAL_CST (VECTOR_CST_ELT (cst, i));
+	  if (!exact_real_inverse (mode, &r))
+	    return NULL_TREE;
+	  elts[i] = build_real (unit_type, r);
+	}
+
+      return build_vector (type, elts);
+
+    default:
+      return NULL_TREE;
+    }
+}
+
 /* Fold a binary expression of code CODE and type TYPE with operands
   OP0 and OP1.  LOC is the location of the resulting expression.
   Return the folded expression if folding is successful.  Otherwise,
@ -11734,23 +11776,24 @@ fold_binary_loc (location_t loc,
 	 so only do this if -freciprocal-math.  We can actually
 	 always safely do it if ARG1 is a power of two, but it's hard to
 	 tell if it is or not in a portable manner.  */
-      if (TREE_CODE (arg1) == REAL_CST)
+      if (optimize
+	  && (TREE_CODE (arg1) == REAL_CST
+	      || (TREE_CODE (arg1) == COMPLEX_CST
+		  && COMPLEX_FLOAT_TYPE_P (TREE_TYPE (arg1)))
+	      || (TREE_CODE (arg1) == VECTOR_CST
+		  && VECTOR_FLOAT_TYPE_P (TREE_TYPE (arg1)))))
 	{
 	  if (flag_reciprocal_math
-	      && 0 != (tem = const_binop (code, build_real (type, dconst1),
-					  arg1)))
+	      && 0 != (tem = const_binop (code, build_one_cst (type), arg1)))
 	    return fold_build2_loc (loc, MULT_EXPR, type, arg0, tem);
-	  /* Find the reciprocal if optimizing and the result is exact.  */
-	  if (optimize)
+	  /* Find the reciprocal if optimizing and the result is exact.
+	     TODO: Complex reciprocal not implemented.  */
+	  if (TREE_CODE (arg1) != COMPLEX_CST)
 	    {
-	      REAL_VALUE_TYPE r;
-	      r = TREE_REAL_CST (arg1);
-	      if (exact_real_inverse (TYPE_MODE(TREE_TYPE(arg0)), &r))
-		{
-		  tem = build_real (type, r);
-		  return fold_build2_loc (loc, MULT_EXPR, type,
-				      fold_convert_loc (loc, type, arg0), tem);
-		}
+	      tree inverse = exact_inverse (TREE_TYPE (arg0), arg1);
+
+	      if (inverse)
+		return fold_build2_loc (loc, MULT_EXPR, type, arg0, inverse);
 	    }
 	}
      /* Convert A/B/C to A/(B*C).  */
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,10 @@
+2012-04-20  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+	PR rtl-optimization/44214
+	* gcc.dg/pr44214-1.c: New test.
+	* gcc.dg/pr44214-2.c: Likewise.
+	* gcc.dg/pr44214-3.c: Likewise.
+
 2012-04-20  Richard Guenther  <rguenther@suse.de>

 	* g++.dg/torture/20120420-1.C: New testcase.
--- a/gcc/testsuite/gcc.dg/pr44214-1.c
+++ b/gcc/testsuite/gcc.dg/pr44214-1.c
@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -freciprocal-math -fdump-tree-ccp1" } */
+
+typedef double v2df __attribute__ ((vector_size (16)));
+
+void do_div (v2df *a, v2df *b)
+{
+  *a = *b / (v2df) { 2.0, 3.0 };
+}
+
+/* Constant folding should multiply *b by the reciprocals of the
+   vector elements.  The fold does not take place for generic
+   vectors until the first CCP pass.  The string " * " occurs 3
+   times:  one multiply and two indirect parameters.  */
+
+/* { dg-final { scan-tree-dump-times " \\\* " 3 "ccp1" } } */
+/* { dg-final { scan-tree-dump-times " / " 0 "ccp1" } } */
+/* { dg-final { cleanup-tree-dump "ccp1" } } */
--- a/gcc/testsuite/gcc.dg/pr44214-2.c
+++ b/gcc/testsuite/gcc.dg/pr44214-2.c
@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -freciprocal-math -fdump-tree-original" } */
+
+void do_div (_Complex double *a, _Complex double *b)
+{
+  *a = *b / (4.0 - 5.0fi);
+}
+
+/* Constant folding should multiply *b by the reciprocal of 4 - 5i
+   = 4/41 + (5/41)i.  */
+
+/* { dg-final { scan-tree-dump-times " \\\* " 1 "original" } } */
+/* { dg-final { scan-tree-dump-times " / " 0 "original" } } */
+/* { dg-final { cleanup-tree-dump "original" } } */
--- a/gcc/testsuite/gcc.dg/pr44214-3.c
+++ b/gcc/testsuite/gcc.dg/pr44214-3.c
@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ccp1" } */
+
+typedef double v2df __attribute__ ((vector_size (16)));
+
+void do_div (v2df *a, v2df *b)
+{
+  *a = *b / (v2df) { 2.0, 2.0 };
+}
+
+/* Since 2.0 has an exact reciprocal, constant folding should multiply *b
+   by the reciprocals of the vector elements.  As a result there should be
+   one vector multiply and zero divides in the optimized code.  The fold
+   does not take place for generic vectors until the first CCP pass.  The
+   string " * " occurs 3 times:  one multiply and two indirect parameters.  */
+
+/* { dg-final { scan-tree-dump-times " \\\* " 3 "ccp1" } } */
+/* { dg-final { scan-tree-dump-times " / " 0 "ccp1" } } */
+/* { dg-final { cleanup-tree-dump "ccp1" } } */