re PR tree-optimization/46728 (GCC does not generate fmadd for pow (x, 0.75)+y on powerpc)

2011-06-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com> PR tree-optimization/46728 * builtins.c (powi_table): Remove. (powi_lookup_cost): Remove. (powi_cost): Remove. (expand_powi_1): Remove. (expand_powi): Remove. (expand_builtin_pow_root): Remove. (expand_builtin_pow): Remove. (expand_builtin_powi): Eliminate handling of constant exponent. (expand_builtin): Use expand_builtin_mathfn_2 for BUILT_IN_POW. From-SVN: r174701
2011-06-06 14:27:41 +00:00 · 2011-06-06 14:27:41 +00:00 · 3906ea1b34
commit 3906ea1b34
parent 3598cabdae
2 changed files with 14 additions and 472 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,16 @@
+2011-06-06  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+	PR tree-optimization/46728
+	* builtins.c (powi_table): Remove.
+	(powi_lookup_cost): Remove.
+	(powi_cost): Remove.
+	(expand_powi_1): Remove.
+	(expand_powi): Remove.
+	(expand_builtin_pow_root): Remove.
+	(expand_builtin_pow): Remove.
+	(expand_builtin_powi): Eliminate handling of constant exponent.
+	(expand_builtin): Use expand_builtin_mathfn_2 for BUILT_IN_POW.
+	
 2011-06-06  Alexandre Oliva  <aoliva@redhat.com>

 	* cprop.c (local_cprop_pass): Don't set changed for debug insns.
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@ -2823,451 +2823,6 @@ expand_builtin_int_roundingfn_2 (tree exp, rtx target)
  return target;
 }

-/* To evaluate powi(x,n), the floating point value x raised to the
-   constant integer exponent n, we use a hybrid algorithm that
-   combines the "window method" with look-up tables.  For an
-   introduction to exponentiation algorithms and "addition chains",
-   see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
-   "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
-   3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
-   Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
-
-/* Provide a default value for POWI_MAX_MULTS, the maximum number of
-   multiplications to inline before calling the system library's pow
-   function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
-   so this default never requires calling pow, powf or powl.  */
-
-#ifndef POWI_MAX_MULTS
-#define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
-#endif
-
-/* The size of the "optimal power tree" lookup table.  All
-   exponents less than this value are simply looked up in the
-   powi_table below.  This threshold is also used to size the
-   cache of pseudo registers that hold intermediate results.  */
-#define POWI_TABLE_SIZE 256
-
-/* The size, in bits of the window, used in the "window method"
-   exponentiation algorithm.  This is equivalent to a radix of
-   (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
-#define POWI_WINDOW_SIZE 3
-
-/* The following table is an efficient representation of an
-   "optimal power tree".  For each value, i, the corresponding
-   value, j, in the table states than an optimal evaluation
-   sequence for calculating pow(x,i) can be found by evaluating
-   pow(x,j)*pow(x,i-j).  An optimal power tree for the first
-   100 integers is given in Knuth's "Seminumerical algorithms".  */
-
-static const unsigned char powi_table[POWI_TABLE_SIZE] =
-  {
-      0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
-      4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
-      8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
-     12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
-     16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
-     20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
-     24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
-     28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
-     32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
-     36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
-     40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
-     44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
-     48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
-     52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
-     56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
-     60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
-     64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
-     68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
-     72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
-     76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
-     80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
-     84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
-     88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
-     92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
-     96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
-    100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
-    104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
-    108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
-    112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
-    116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
-    120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
-    124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
-  };
-
-
-/* Return the number of multiplications required to calculate
-   powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
-   subroutine of powi_cost.  CACHE is an array indicating
-   which exponents have already been calculated.  */
-
-static int
-powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
-{
-  /* If we've already calculated this exponent, then this evaluation
-     doesn't require any additional multiplications.  */
-  if (cache[n])
-    return 0;
-
-  cache[n] = true;
-  return powi_lookup_cost (n - powi_table[n], cache)
-	 + powi_lookup_cost (powi_table[n], cache) + 1;
-}
-
-/* Return the number of multiplications required to calculate
-   powi(x,n) for an arbitrary x, given the exponent N.  This
-   function needs to be kept in sync with expand_powi below.  */
-
-static int
-powi_cost (HOST_WIDE_INT n)
-{
-  bool cache[POWI_TABLE_SIZE];
-  unsigned HOST_WIDE_INT digit;
-  unsigned HOST_WIDE_INT val;
-  int result;
-
-  if (n == 0)
-    return 0;
-
-  /* Ignore the reciprocal when calculating the cost.  */
-  val = (n < 0) ? -n : n;
-
-  /* Initialize the exponent cache.  */
-  memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
-  cache[1] = true;
-
-  result = 0;
-
-  while (val >= POWI_TABLE_SIZE)
-    {
-      if (val & 1)
-	{
-	  digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
-	  result += powi_lookup_cost (digit, cache)
-		    + POWI_WINDOW_SIZE + 1;
-	  val >>= POWI_WINDOW_SIZE;
-	}
-      else
-	{
-	  val >>= 1;
-	  result++;
-	}
-    }
-
-  return result + powi_lookup_cost (val, cache);
-}
-
-/* Recursive subroutine of expand_powi.  This function takes the array,
-   CACHE, of already calculated exponents and an exponent N and returns
-   an RTX that corresponds to CACHE[1]**N, as calculated in mode MODE.  */
-
-static rtx
-expand_powi_1 (enum machine_mode mode, unsigned HOST_WIDE_INT n, rtx *cache)
-{
-  unsigned HOST_WIDE_INT digit;
-  rtx target, result;
-  rtx op0, op1;
-
-  if (n < POWI_TABLE_SIZE)
-    {
-      if (cache[n])
-	return cache[n];
-
-      target = gen_reg_rtx (mode);
-      cache[n] = target;
-
-      op0 = expand_powi_1 (mode, n - powi_table[n], cache);
-      op1 = expand_powi_1 (mode, powi_table[n], cache);
-    }
-  else if (n & 1)
-    {
-      target = gen_reg_rtx (mode);
-      digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
-      op0 = expand_powi_1 (mode, n - digit, cache);
-      op1 = expand_powi_1 (mode, digit, cache);
-    }
-  else
-    {
-      target = gen_reg_rtx (mode);
-      op0 = expand_powi_1 (mode, n >> 1, cache);
-      op1 = op0;
-    }
-
-  result = expand_mult (mode, op0, op1, target, 0);
-  if (result != target)
-    emit_move_insn (target, result);
-  return target;
-}
-
-/* Expand the RTL to evaluate powi(x,n) in mode MODE.  X is the
-   floating point operand in mode MODE, and N is the exponent.  This
-   function needs to be kept in sync with powi_cost above.  */
-
-static rtx
-expand_powi (rtx x, enum machine_mode mode, HOST_WIDE_INT n)
-{
-  rtx cache[POWI_TABLE_SIZE];
-  rtx result;
-
-  if (n == 0)
-    return CONST1_RTX (mode);
-
-  memset (cache, 0, sizeof (cache));
-  cache[1] = x;
-
-  result = expand_powi_1 (mode, (n < 0) ? -n : n, cache);
-
-  /* If the original exponent was negative, reciprocate the result.  */
-  if (n < 0)
-    result = expand_binop (mode, sdiv_optab, CONST1_RTX (mode),
-			   result, NULL_RTX, 0, OPTAB_LIB_WIDEN);
-
-  return result;
-}
-
-/* Fold a builtin function call to pow, powf, or powl into a series of sqrts or
-   cbrts.  Return NULL_RTX if no simplification can be made or expand the tree
-   if we can simplify it.  */
-static rtx
-expand_builtin_pow_root (location_t loc, tree arg0, tree arg1, tree type,
-			 rtx subtarget)
-{
-  if (TREE_CODE (arg1) == REAL_CST
-      && !TREE_OVERFLOW (arg1)
-      && flag_unsafe_math_optimizations)
-    {
-      enum machine_mode mode = TYPE_MODE (type);
-      tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
-      tree cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
-      REAL_VALUE_TYPE c = TREE_REAL_CST (arg1);
-      tree op = NULL_TREE;
-
-      if (sqrtfn)
-	{
-	  /* Optimize pow (x, 0.5) into sqrt.  */
-	  if (REAL_VALUES_EQUAL (c, dconsthalf))
-	    op = build_call_nofold_loc (loc, sqrtfn, 1, arg0);
-
-	  /* Don't do this optimization if we don't have a sqrt insn.  */
-	  else if (optab_handler (sqrt_optab, mode) != CODE_FOR_nothing)
-	    {
-	      REAL_VALUE_TYPE dconst1_4 = dconst1;
-	      REAL_VALUE_TYPE dconst3_4;
-	      SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
-
-	      real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0);
-	      SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2);
-
-	      /* Optimize pow (x, 0.25) into sqrt (sqrt (x)).  Assume on most
-		 machines that a builtin sqrt instruction is smaller than a
-		 call to pow with 0.25, so do this optimization even if
-		 -Os.  */
-	      if (REAL_VALUES_EQUAL (c, dconst1_4))
-		{
-		  op = build_call_nofold_loc (loc, sqrtfn, 1, arg0);
-		  op = build_call_nofold_loc (loc, sqrtfn, 1, op);
-		}
-
-	      /* Optimize pow (x, 0.75) = sqrt (x) * sqrt (sqrt (x)) unless we
-		 are optimizing for space.  */
-	      else if (optimize_insn_for_speed_p ()
-		       && !TREE_SIDE_EFFECTS (arg0)
-		       && REAL_VALUES_EQUAL (c, dconst3_4))
-		{
-		  tree sqrt1 = build_call_expr_loc (loc, sqrtfn, 1, arg0);
-		  tree sqrt2 = builtin_save_expr (sqrt1);
-		  tree sqrt3 = build_call_expr_loc (loc, sqrtfn, 1, sqrt1);
-		  op = fold_build2_loc (loc, MULT_EXPR, type, sqrt2, sqrt3);
-		}
-	    }
-	}
-
-      /* Check whether we can do cbrt insstead of pow (x, 1./3.) and
-	 cbrt/sqrts instead of pow (x, 1./6.).  */
-      if (cbrtfn && ! op
-	  && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode)))
-	{
-	  /* First try 1/3.  */
-	  REAL_VALUE_TYPE dconst1_3
-	    = real_value_truncate (mode, dconst_third ());
-
-	  if (REAL_VALUES_EQUAL (c, dconst1_3))
-	    op = build_call_nofold_loc (loc, cbrtfn, 1, arg0);
-
-	      /* Now try 1/6.  */
-	  else if (optimize_insn_for_speed_p ()
-		   && optab_handler (sqrt_optab, mode) != CODE_FOR_nothing)
-	    {
-	      REAL_VALUE_TYPE dconst1_6 = dconst1_3;
-	      SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
-
-	      if (REAL_VALUES_EQUAL (c, dconst1_6))
-		{
-		  op = build_call_nofold_loc (loc, sqrtfn, 1, arg0);
-		  op = build_call_nofold_loc (loc, cbrtfn, 1, op);
-		}
-	    }
-	}
-
-      if (op)
-	return expand_expr (op, subtarget, mode, EXPAND_NORMAL);
-    }
-
-  return NULL_RTX;
-}
-
-/* Expand a call to the pow built-in mathematical function.  Return NULL_RTX if
-   a normal call should be emitted rather than expanding the function
-   in-line.  EXP is the expression that is a call to the builtin
-   function; if convenient, the result should be placed in TARGET.  */
-
-static rtx
-expand_builtin_pow (tree exp, rtx target, rtx subtarget)
-{
-  tree arg0, arg1;
-  tree fn, narg0;
-  tree type = TREE_TYPE (exp);
-  REAL_VALUE_TYPE cint, c, c2;
-  HOST_WIDE_INT n;
-  rtx op, op2;
-  enum machine_mode mode = TYPE_MODE (type);
-
-  if (! validate_arglist (exp, REAL_TYPE, REAL_TYPE, VOID_TYPE))
-    return NULL_RTX;
-
-  arg0 = CALL_EXPR_ARG (exp, 0);
-  arg1 = CALL_EXPR_ARG (exp, 1);
-
-  if (TREE_CODE (arg1) != REAL_CST
-      || TREE_OVERFLOW (arg1))
-    return expand_builtin_mathfn_2 (exp, target, subtarget);
-
-  /* Handle constant exponents.  */
-
-  /* For integer valued exponents we can expand to an optimal multiplication
-     sequence using expand_powi.  */
-  c = TREE_REAL_CST (arg1);
-  n = real_to_integer (&c);
-  real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
-  if (real_identical (&c, &cint)
-      && ((n >= -1 && n <= 2)
-	  || (flag_unsafe_math_optimizations
-	      && optimize_insn_for_speed_p ()
-	      && powi_cost (n) <= POWI_MAX_MULTS)))
-    {
-      op = expand_expr (arg0, subtarget, VOIDmode, EXPAND_NORMAL);
-      if (n != 1)
-	{
-	  op = force_reg (mode, op);
-	  op = expand_powi (op, mode, n);
-	}
-      return op;
-    }
-
-  narg0 = builtin_save_expr (arg0);
-
-  /* If the exponent is not integer valued, check if it is half of an integer.
-     In this case we can expand to sqrt (x) * x**(n/2).  */
-  fn = mathfn_built_in (type, BUILT_IN_SQRT);
-  if (fn != NULL_TREE)
-    {
-      real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
-      n = real_to_integer (&c2);
-      real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
-      if (real_identical (&c2, &cint)
-	  && ((flag_unsafe_math_optimizations
-	       && optimize_insn_for_speed_p ()
-	       && powi_cost (n/2) <= POWI_MAX_MULTS)
-	      /* Even the c == 0.5 case cannot be done unconditionally
-	         when we need to preserve signed zeros, as
-		 pow (-0, 0.5) is +0, while sqrt(-0) is -0.  */
-	      || (!HONOR_SIGNED_ZEROS (mode) && n == 1)
-	      /* For c == 1.5 we can assume that x * sqrt (x) is always
-	         smaller than pow (x, 1.5) if sqrt will not be expanded
-		 as a call.  */
-	      || (n == 3
-		  && optab_handler (sqrt_optab, mode) != CODE_FOR_nothing)))
-	{
-	  tree call_expr = build_call_nofold_loc (EXPR_LOCATION (exp), fn, 1,
-						  narg0);
-	  /* Use expand_expr in case the newly built call expression
-	     was folded to a non-call.  */
-	  op = expand_expr (call_expr, subtarget, mode, EXPAND_NORMAL);
-	  if (n != 1)
-	    {
-	      op2 = expand_expr (narg0, subtarget, VOIDmode, EXPAND_NORMAL);
-	      op2 = force_reg (mode, op2);
-	      op2 = expand_powi (op2, mode, abs (n / 2));
-	      op = expand_simple_binop (mode, MULT, op, op2, NULL_RTX,
-					0, OPTAB_LIB_WIDEN);
-	      /* If the original exponent was negative, reciprocate the
-		 result.  */
-	      if (n < 0)
-		op = expand_binop (mode, sdiv_optab, CONST1_RTX (mode),
-				   op, NULL_RTX, 0, OPTAB_LIB_WIDEN);
-	    }
-	  return op;
-	}
-    }
-
-  /* Check whether we can do a series of sqrt or cbrt's instead of the pow
-     call.  */
-  op = expand_builtin_pow_root (EXPR_LOCATION (exp), arg0, arg1, type,
-				subtarget);
-  if (op)
-    return op;
-
-  /* Try if the exponent is a third of an integer.  In this case
-     we can expand to x**(n/3) * cbrt(x)**(n%3).  As cbrt (x) is
-     different from pow (x, 1./3.) due to rounding and behavior
-     with negative x we need to constrain this transformation to
-     unsafe math and positive x or finite math.  */
-  fn = mathfn_built_in (type, BUILT_IN_CBRT);
-  if (fn != NULL_TREE
-      && flag_unsafe_math_optimizations
-      && (tree_expr_nonnegative_p (arg0)
-	  || !HONOR_NANS (mode)))
-    {
-      REAL_VALUE_TYPE dconst3;
-      real_from_integer (&dconst3, VOIDmode, 3, 0, 0);
-      real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
-      real_round (&c2, mode, &c2);
-      n = real_to_integer (&c2);
-      real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
-      real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
-      real_convert (&c2, mode, &c2);
-      if (real_identical (&c2, &c)
-	  && ((optimize_insn_for_speed_p ()
-	       && powi_cost (n/3) <= POWI_MAX_MULTS)
-	      || n == 1))
-	{
-	  tree call_expr = build_call_nofold_loc (EXPR_LOCATION (exp), fn, 1,
-						  narg0);
-	  op = expand_builtin (call_expr, NULL_RTX, subtarget, mode, 0);
-	  if (abs (n) % 3 == 2)
-	    op = expand_simple_binop (mode, MULT, op, op, op,
-				      0, OPTAB_LIB_WIDEN);
-	  if (n != 1)
-	    {
-	      op2 = expand_expr (narg0, subtarget, VOIDmode, EXPAND_NORMAL);
-	      op2 = force_reg (mode, op2);
-	      op2 = expand_powi (op2, mode, abs (n / 3));
-	      op = expand_simple_binop (mode, MULT, op, op2, NULL_RTX,
-					0, OPTAB_LIB_WIDEN);
-	      /* If the original exponent was negative, reciprocate the
-		 result.  */
-	      if (n < 0)
-		op = expand_binop (mode, sdiv_optab, CONST1_RTX (mode),
-				   op, NULL_RTX, 0, OPTAB_LIB_WIDEN);
-	    }
-	  return op;
-	}
-    }
-
-  /* Fall back to optab expansion.  */
-  return expand_builtin_mathfn_2 (exp, target, subtarget);
-}
-
 /* Expand a call to the powi built-in mathematical function.  Return NULL_RTX if
   a normal call should be emitted rather than expanding the function
   in-line.  EXP is the expression that is a call to the builtin
@ -3288,27 +2843,6 @@ expand_builtin_powi (tree exp, rtx target)
  arg1 = CALL_EXPR_ARG (exp, 1);
  mode = TYPE_MODE (TREE_TYPE (exp));

-  /* Handle constant power.  */
-
-  if (TREE_CODE (arg1) == INTEGER_CST
-      && !TREE_OVERFLOW (arg1))
-    {
-      HOST_WIDE_INT n = TREE_INT_CST_LOW (arg1);
-
-      /* If the exponent is -1, 0, 1 or 2, then expand_powi is exact.
-	 Otherwise, check the number of multiplications required.  */
-      if ((TREE_INT_CST_HIGH (arg1) == 0
-	   || TREE_INT_CST_HIGH (arg1) == -1)
-	  && ((n >= -1 && n <= 2)
-	      || (optimize_insn_for_speed_p ()
-		  && powi_cost (n) <= POWI_MAX_MULTS)))
-	{
-	  op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
-	  op0 = force_reg (mode, op0);
-	  return expand_powi (op0, mode, n);
-	}
-    }
-
  /* Emit a libcall to libgcc.  */

  /* Mode of the 2nd argument must match that of an int.  */
@ -5862,12 +5396,6 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
 	return target;
      break;

-    CASE_FLT_FN (BUILT_IN_POW):
-      target = expand_builtin_pow (exp, target, subtarget);
-      if (target)
-	return target;
-      break;
-
    CASE_FLT_FN (BUILT_IN_POWI):
      target = expand_builtin_powi (exp, target);
      if (target)
@ -5885,6 +5413,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
    CASE_FLT_FN (BUILT_IN_FMOD):
    CASE_FLT_FN (BUILT_IN_REMAINDER):
    CASE_FLT_FN (BUILT_IN_DREM):
+    CASE_FLT_FN (BUILT_IN_POW):
      target = expand_builtin_mathfn_2 (exp, target, subtarget);
      if (target)
 	return target;