re PR tree-optimization/46722 (Missed fma for x*x + y)

2010-11-30  Richard Guenther  <rguenther@suse.de>

	PR tree-optimization/46722
	* tree-ssa-math-opts.c (convert_mult_to_fma): Get multiplication
	operands as arguments.
	(execute_optimize_widening_mul): Also handle power of two as
	multiplication.

	* gcc.target/i386/fma4-fma-2.c: New testcase.

From-SVN: r167304
This commit is contained in:
Richard Guenther 2010-11-30 16:55:35 +00:00 committed by Richard Biener
parent 16187791e3
commit 4dbed5f6d4
4 changed files with 123 additions and 11 deletions

View File

@ -1,3 +1,11 @@
2010-11-30 Richard Guenther <rguenther@suse.de>
PR tree-optimization/46722
* tree-ssa-math-opts.c (convert_mult_to_fma): Get multiplication
operands as arguments.
(execute_optimize_widening_mul): Also handle power of two as
multiplication.
2010-11-30 Dave Korn <dave.korn.cygwin@gmail.com>
PR middle-end/46709

View File

@ -1,3 +1,8 @@
2010-11-30 Richard Guenther <rguenther@suse.de>
PR tree-optimization/46722
* gcc.target/i386/fma4-fma-2.c: New testcase.
2010-11-29 Nicola Pero <nicola.pero@meta-innovation.com>
* objc.dg/duplicate-class-1.m: New.

View File

@ -0,0 +1,67 @@
/* Test that the compiler properly optimizes floating point multiply
and add instructions into vfmaddss, vfmsubss, vfnmaddss,
vfnmsubss on FMA4 systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -funsafe-math-optimizations -mfma4" } */
extern void exit (int);
float
flt_mul_add (float a, float c)
{
return (a * a) + c;
}
double
dbl_mul_add (double a, double c)
{
return (a * a) + c;
}
float
flt_mul_sub (float a, float c)
{
return (a * a) - c;
}
double
dbl_mul_sub (double a, double c)
{
return (a * a) - c;
}
float
flt_neg_mul_add (float a, float c)
{
return (-(a * a)) + c;
}
double
dbl_neg_mul_add (double a, double c)
{
return (-(a * a)) + c;
}
float f[10] = { 2, 3, 4 };
double d[10] = { 2, 3, 4 };
int main ()
{
f[3] = flt_mul_add (f[0], f[2]);
f[4] = flt_mul_sub (f[0], f[2]);
f[5] = flt_neg_mul_add (f[0], f[2]);
d[3] = dbl_mul_add (d[0], d[2]);
d[4] = dbl_mul_sub (d[0], d[2]);
d[5] = dbl_neg_mul_add (d[0], d[2]);
exit (0);
}
/* { dg-final { scan-assembler "vfmaddss" } } */
/* { dg-final { scan-assembler "vfmaddsd" } } */
/* { dg-final { scan-assembler "vfmsubss" } } */
/* { dg-final { scan-assembler "vfmsubsd" } } */
/* { dg-final { scan-assembler "vfnmaddss" } } */
/* { dg-final { scan-assembler "vfnmaddsd" } } */

View File

@ -1494,14 +1494,14 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
return true;
}
/* Combine the multiplication at MUL_STMT with uses in additions and
subtractions to form fused multiply-add operations. Returns true
if successful and MUL_STMT should be removed. */
/* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
with uses in additions and subtractions to form fused multiply-add
operations. Returns true if successful and MUL_STMT should be removed. */
static bool
convert_mult_to_fma (gimple mul_stmt)
convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2)
{
tree mul_result = gimple_assign_lhs (mul_stmt);
tree mul_result = gimple_get_lhs (mul_stmt);
tree type = TREE_TYPE (mul_result);
gimple use_stmt, neguse_stmt, fma_stmt;
use_operand_p use_p;
@ -1607,7 +1607,7 @@ convert_mult_to_fma (gimple mul_stmt)
{
gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
enum tree_code use_code;
tree addop, mulop1, result = mul_result;
tree addop, mulop1 = op1, result = mul_result;
bool negate_p = false;
if (is_gimple_debug (use_stmt))
@ -1646,7 +1646,6 @@ convert_mult_to_fma (gimple mul_stmt)
negate_p = !negate_p;
}
mulop1 = gimple_assign_rhs1 (mul_stmt);
if (negate_p)
mulop1 = force_gimple_operand_gsi (&gsi,
build1 (NEGATE_EXPR,
@ -1656,8 +1655,7 @@ convert_mult_to_fma (gimple mul_stmt)
fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR,
gimple_assign_lhs (use_stmt),
mulop1,
gimple_assign_rhs2 (mul_stmt),
mulop1, op2,
addop);
gsi_replace (&gsi, fma_stmt, true);
}
@ -1673,6 +1671,7 @@ static unsigned int
execute_optimize_widening_mul (void)
{
basic_block bb;
bool cfg_changed = false;
FOR_EACH_BB (bb)
{
@ -1690,7 +1689,9 @@ execute_optimize_widening_mul (void)
{
case MULT_EXPR:
if (!convert_mult_to_widen (stmt)
&& convert_mult_to_fma (stmt))
&& convert_mult_to_fma (stmt,
gimple_assign_rhs1 (stmt),
gimple_assign_rhs2 (stmt)))
{
gsi_remove (&gsi, true);
release_defs (stmt);
@ -1706,11 +1707,42 @@ execute_optimize_widening_mul (void)
default:;
}
}
else if (is_gimple_call (stmt))
{
tree fndecl = gimple_call_fndecl (stmt);
if (fndecl
&& DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
{
switch (DECL_FUNCTION_CODE (fndecl))
{
case BUILT_IN_POWF:
case BUILT_IN_POW:
case BUILT_IN_POWL:
if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
&& REAL_VALUES_EQUAL
(TREE_REAL_CST (gimple_call_arg (stmt, 1)),
dconst2)
&& convert_mult_to_fma (stmt,
gimple_call_arg (stmt, 0),
gimple_call_arg (stmt, 0)))
{
gsi_remove (&gsi, true);
release_defs (stmt);
if (gimple_purge_dead_eh_edges (bb))
cfg_changed = true;
continue;
}
break;
default:;
}
}
}
gsi_next (&gsi);
}
}
return 0;
return cfg_changed ? TODO_cleanup_cfg : 0;
}
static bool