re PR rtl-optimization/44214 (Compiler does not optimize vector divide with -freciprocal-math (or -ffast-math))
gcc: 2012-04-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com> PR rtl-optimization/44214 * fold-const.c (exact_inverse): New function. (fold_binary_loc): Fold vector and complex division by constant into multiply by recripocal with flag_reciprocal_math; fold vector division by constant into multiply by reciprocal with exact inverse. gcc/testsuite: 2012-04-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com> PR rtl-optimization/44214 * gcc.dg/pr44214-1.c: New test. * gcc.dg/pr44214-2.c: Likewise. * gcc.dg/pr44214-3.c: Likewise. From-SVN: r186625
This commit is contained in:
parent
ead84f73b0
commit
add6207a86
|
@ -1,3 +1,11 @@
|
|||
2012-04-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
||||
|
||||
PR rtl-optimization/44214
|
||||
* fold-const.c (exact_inverse): New function.
|
||||
(fold_binary_loc): Fold vector and complex division by constant into
|
||||
multiply by recripocal with flag_reciprocal_math; fold vector division
|
||||
by constant into multiply by reciprocal with exact inverse.
|
||||
|
||||
2012-04-20 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* lto-symtab.c (lto_cgraph_replace_node): Merge needed instead of force flags.
|
||||
|
|
|
@ -9693,6 +9693,48 @@ fold_addr_of_array_ref_difference (location_t loc, tree type,
|
|||
return NULL_TREE;
|
||||
}
|
||||
|
||||
/* If the real or vector real constant CST of type TYPE has an exact
|
||||
inverse, return it, else return NULL. */
|
||||
|
||||
static tree
|
||||
exact_inverse (tree type, tree cst)
|
||||
{
|
||||
REAL_VALUE_TYPE r;
|
||||
tree unit_type, *elts;
|
||||
enum machine_mode mode;
|
||||
unsigned vec_nelts, i;
|
||||
|
||||
switch (TREE_CODE (cst))
|
||||
{
|
||||
case REAL_CST:
|
||||
r = TREE_REAL_CST (cst);
|
||||
|
||||
if (exact_real_inverse (TYPE_MODE (type), &r))
|
||||
return build_real (type, r);
|
||||
|
||||
return NULL_TREE;
|
||||
|
||||
case VECTOR_CST:
|
||||
vec_nelts = VECTOR_CST_NELTS (cst);
|
||||
elts = XALLOCAVEC (tree, vec_nelts);
|
||||
unit_type = TREE_TYPE (type);
|
||||
mode = TYPE_MODE (unit_type);
|
||||
|
||||
for (i = 0; i < vec_nelts; i++)
|
||||
{
|
||||
r = TREE_REAL_CST (VECTOR_CST_ELT (cst, i));
|
||||
if (!exact_real_inverse (mode, &r))
|
||||
return NULL_TREE;
|
||||
elts[i] = build_real (unit_type, r);
|
||||
}
|
||||
|
||||
return build_vector (type, elts);
|
||||
|
||||
default:
|
||||
return NULL_TREE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Fold a binary expression of code CODE and type TYPE with operands
|
||||
OP0 and OP1. LOC is the location of the resulting expression.
|
||||
Return the folded expression if folding is successful. Otherwise,
|
||||
|
@ -11734,23 +11776,24 @@ fold_binary_loc (location_t loc,
|
|||
so only do this if -freciprocal-math. We can actually
|
||||
always safely do it if ARG1 is a power of two, but it's hard to
|
||||
tell if it is or not in a portable manner. */
|
||||
if (TREE_CODE (arg1) == REAL_CST)
|
||||
if (optimize
|
||||
&& (TREE_CODE (arg1) == REAL_CST
|
||||
|| (TREE_CODE (arg1) == COMPLEX_CST
|
||||
&& COMPLEX_FLOAT_TYPE_P (TREE_TYPE (arg1)))
|
||||
|| (TREE_CODE (arg1) == VECTOR_CST
|
||||
&& VECTOR_FLOAT_TYPE_P (TREE_TYPE (arg1)))))
|
||||
{
|
||||
if (flag_reciprocal_math
|
||||
&& 0 != (tem = const_binop (code, build_real (type, dconst1),
|
||||
arg1)))
|
||||
&& 0 != (tem = const_binop (code, build_one_cst (type), arg1)))
|
||||
return fold_build2_loc (loc, MULT_EXPR, type, arg0, tem);
|
||||
/* Find the reciprocal if optimizing and the result is exact. */
|
||||
if (optimize)
|
||||
/* Find the reciprocal if optimizing and the result is exact.
|
||||
TODO: Complex reciprocal not implemented. */
|
||||
if (TREE_CODE (arg1) != COMPLEX_CST)
|
||||
{
|
||||
REAL_VALUE_TYPE r;
|
||||
r = TREE_REAL_CST (arg1);
|
||||
if (exact_real_inverse (TYPE_MODE(TREE_TYPE(arg0)), &r))
|
||||
{
|
||||
tem = build_real (type, r);
|
||||
return fold_build2_loc (loc, MULT_EXPR, type,
|
||||
fold_convert_loc (loc, type, arg0), tem);
|
||||
}
|
||||
tree inverse = exact_inverse (TREE_TYPE (arg0), arg1);
|
||||
|
||||
if (inverse)
|
||||
return fold_build2_loc (loc, MULT_EXPR, type, arg0, inverse);
|
||||
}
|
||||
}
|
||||
/* Convert A/B/C to A/(B*C). */
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
2012-04-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
||||
|
||||
PR rtl-optimization/44214
|
||||
* gcc.dg/pr44214-1.c: New test.
|
||||
* gcc.dg/pr44214-2.c: Likewise.
|
||||
* gcc.dg/pr44214-3.c: Likewise.
|
||||
|
||||
2012-04-20 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
* g++.dg/torture/20120420-1.C: New testcase.
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -freciprocal-math -fdump-tree-ccp1" } */
|
||||
|
||||
typedef double v2df __attribute__ ((vector_size (16)));
|
||||
|
||||
void do_div (v2df *a, v2df *b)
|
||||
{
|
||||
*a = *b / (v2df) { 2.0, 3.0 };
|
||||
}
|
||||
|
||||
/* Constant folding should multiply *b by the reciprocals of the
|
||||
vector elements. The fold does not take place for generic
|
||||
vectors until the first CCP pass. The string " * " occurs 3
|
||||
times: one multiply and two indirect parameters. */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times " \\\* " 3 "ccp1" } } */
|
||||
/* { dg-final { scan-tree-dump-times " / " 0 "ccp1" } } */
|
||||
/* { dg-final { cleanup-tree-dump "ccp1" } } */
|
|
@ -0,0 +1,14 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -freciprocal-math -fdump-tree-original" } */
|
||||
|
||||
void do_div (_Complex double *a, _Complex double *b)
|
||||
{
|
||||
*a = *b / (4.0 - 5.0fi);
|
||||
}
|
||||
|
||||
/* Constant folding should multiply *b by the reciprocal of 4 - 5i
|
||||
= 4/41 + (5/41)i. */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times " \\\* " 1 "original" } } */
|
||||
/* { dg-final { scan-tree-dump-times " / " 0 "original" } } */
|
||||
/* { dg-final { cleanup-tree-dump "original" } } */
|
|
@ -0,0 +1,19 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-ccp1" } */
|
||||
|
||||
typedef double v2df __attribute__ ((vector_size (16)));
|
||||
|
||||
void do_div (v2df *a, v2df *b)
|
||||
{
|
||||
*a = *b / (v2df) { 2.0, 2.0 };
|
||||
}
|
||||
|
||||
/* Since 2.0 has an exact reciprocal, constant folding should multiply *b
|
||||
by the reciprocals of the vector elements. As a result there should be
|
||||
one vector multiply and zero divides in the optimized code. The fold
|
||||
does not take place for generic vectors until the first CCP pass. The
|
||||
string " * " occurs 3 times: one multiply and two indirect parameters. */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times " \\\* " 3 "ccp1" } } */
|
||||
/* { dg-final { scan-tree-dump-times " / " 0 "ccp1" } } */
|
||||
/* { dg-final { cleanup-tree-dump "ccp1" } } */
|
Loading…
Reference in New Issue