diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 449a1b49c25..a456ec1256a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2005-05-17 Paolo Bonzini + + * Makefile.in: Add tree-ssa-math-opts.c. + * expr.c (expand_expr_real_1) : Never emit as a*(1/b). + * fold-const.c (distribute_real_division): New. + (fold_binary) : Use it. + * tree-pass.h (pass_cse_reciprocals): New. + * tree-optimize.c (init_tree_optimization_passes): Run it. + * tree-ssa-math-opts.c: New file. + * doc/passes.texi: Document the new pass. + 2005-05-17 Richard Guenther PR middle-end/21595 @@ -21,7 +32,7 @@ 2005-05-17 Hans-Peter Nilsson * config/cris/cris.md: Unquote preparation and output statements. - (BWD, WD, BW): New, mode-macros. + (BWD, WD, BW): New, mode-macros. (S, s, m, mm, nbitsm1): New, mode-attrs. (szext, shift, shiftrt, ncond, ocond, rcond): New, code-macros. (u, su, shlr, slr, ncond, ocond, rcond, rCC, oCC, roCC): New, diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 8d0d15ca798..f9ed2e871b5 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -934,7 +934,7 @@ OBJS-common = \ tree-phinodes.o tree-ssanames.o tree-sra.o tree-complex.o tree-ssa-loop.o \ tree-ssa-loop-niter.o tree-ssa-loop-manip.o tree-ssa-threadupdate.o \ tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o \ - tree-ssa-loop-ivcanon.o tree-ssa-propagate.o \ + tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-math-opts.o \ tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o \ alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o \ cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o \ @@ -1803,6 +1803,8 @@ tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \ $(PARAMS_H) output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h \ $(TREE_DUMP_H) tree-pass.h $(FLAGS_H) real.h $(BASIC_BLOCK_H) \ hard-reg-set.h +tree-ssa-math-opts.o : tree-ssa-math-opts.c $(TREE_FLOW_H) $(CONFIG_H) \ + $(SYSTEM_H) $(TREE_H) $(TIMEVAR_H) tree-pass.h $(TM_H) $(FLAGS_H) tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \ $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) tree-inline.h $(FLAGS_H) \ function.h $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \ diff --git a/gcc/doc/passes.texi b/gcc/doc/passes.texi index 8e123667ec5..ad6110e7312 100644 --- a/gcc/doc/passes.texi +++ b/gcc/doc/passes.texi @@ -354,7 +354,7 @@ This pass transforms tail recursion into a loop. It is located in This pass sinks stores and assignments down the flowgraph closer to it's use point. The pass is located in @file{tree-ssa-sink.c} and is -described by @code{pass_sink_code} +described by @code{pass_sink_code}. @item Partial redundancy elimination @@ -362,6 +362,12 @@ This pass eliminates partially redundant computations, as well as performing load motion. The pass is located in @file{tree-ssa-pre.c} and is described by @code{pass_pre}. +Just before partial redundancy elimination, if +@option{-funsafe-math-optimizations} is on, GCC tries to convert +divisions to multiplications by the reciprocal. The pass is located +in @file{tree-ssa-math-opts.c} and is described by +@code{pass_cse_reciprocal}. + @item Loop optimization The main driver of the pass is placed in @file{tree-ssa-loop.c} diff --git a/gcc/expr.c b/gcc/expr.c index 12fa129fb85..459c248f420 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -7806,18 +7806,6 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode, return expand_divmod (0, code, mode, op0, op1, target, unsignedp); case RDIV_EXPR: - /* Emit a/b as a*(1/b). Later we may manage CSE the reciprocal saving - expensive divide. If not, combine will rebuild the original - computation. */ - if (flag_unsafe_math_optimizations && optimize && !optimize_size - && TREE_CODE (type) == REAL_TYPE - && !real_onep (TREE_OPERAND (exp, 0))) - return expand_expr (build2 (MULT_EXPR, type, TREE_OPERAND (exp, 0), - build2 (RDIV_EXPR, type, - build_real (type, dconst1), - TREE_OPERAND (exp, 1))), - target, tmode, modifier); - goto binop; case TRUNC_MOD_EXPR: diff --git a/gcc/fold-const.c b/gcc/fold-const.c index deb8780b8d5..050d45c6069 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -3103,6 +3103,46 @@ distribute_bit_expr (enum tree_code code, tree type, tree arg0, tree arg1) return fold_build2 (TREE_CODE (arg0), type, common, fold_build2 (code, type, left, right)); } + +/* Knowing that ARG0 and ARG1 are both RDIV_EXPRs, simplify a binary operation + with code CODE. This optimization is unsafe. */ +static tree +distribute_real_division (enum tree_code code, tree type, tree arg0, tree arg1) +{ + bool mul0 = TREE_CODE (arg0) == MULT_EXPR; + bool mul1 = TREE_CODE (arg1) == MULT_EXPR; + + /* (A / C) +- (B / C) -> (A +- B) / C. */ + if (mul0 == mul1 + && operand_equal_p (TREE_OPERAND (arg0, 1), + TREE_OPERAND (arg1, 1), 0)) + return fold_build2 (mul0 ? MULT_EXPR : RDIV_EXPR, type, + fold_build2 (code, type, + TREE_OPERAND (arg0, 0), + TREE_OPERAND (arg1, 0)), + TREE_OPERAND (arg0, 1)); + + /* (A / C1) +- (A / C2) -> A * (1 / C1 +- 1 / C2). */ + if (operand_equal_p (TREE_OPERAND (arg0, 0), + TREE_OPERAND (arg1, 0), 0) + && TREE_CODE (TREE_OPERAND (arg0, 1)) == REAL_CST + && TREE_CODE (TREE_OPERAND (arg1, 1)) == REAL_CST) + { + REAL_VALUE_TYPE r0, r1; + r0 = TREE_REAL_CST (TREE_OPERAND (arg0, 1)); + r1 = TREE_REAL_CST (TREE_OPERAND (arg1, 1)); + if (!mul0) + real_arithmetic (&r0, RDIV_EXPR, &dconst1, &r0); + if (!mul1) + real_arithmetic (&r1, RDIV_EXPR, &dconst1, &r1); + real_arithmetic (&r0, code, &r0, &r1); + return fold_build2 (MULT_EXPR, type, + TREE_OPERAND (arg0, 0), + build_real (type, r0)); + } + + return NULL_TREE; +} /* Return a BIT_FIELD_REF of type TYPE to refer to BITSIZE bits of INNER starting at BITPOS. The field is unsigned if UNSIGNEDP is nonzero. */ @@ -7528,6 +7568,12 @@ fold_binary (enum tree_code code, tree type, tree op0, tree op1) fold_convert (type, tem)); } + if (flag_unsafe_math_optimizations + && (TREE_CODE (arg0) == RDIV_EXPR || TREE_CODE (arg0) == MULT_EXPR) + && (TREE_CODE (arg1) == RDIV_EXPR || TREE_CODE (arg1) == MULT_EXPR) + && (tem = distribute_real_division (code, type, arg0, arg1))) + return tem; + /* Convert x+x into x*2.0. */ if (operand_equal_p (arg0, arg1, 0) && SCALAR_FLOAT_TYPE_P (type)) @@ -7925,6 +7971,12 @@ fold_binary (enum tree_code code, tree type, tree op0, tree op1) return fold_convert (type, fold (tem)); } + if (flag_unsafe_math_optimizations + && (TREE_CODE (arg0) == RDIV_EXPR || TREE_CODE (arg0) == MULT_EXPR) + && (TREE_CODE (arg1) == RDIV_EXPR || TREE_CODE (arg1) == MULT_EXPR) + && (tem = distribute_real_division (code, type, arg0, arg1))) + return tem; + if (TREE_CODE (arg0) == MULT_EXPR && TREE_CODE (arg1) == MULT_EXPR && (!FLOAT_TYPE_P (type) || flag_unsafe_math_optimizations)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b6ce026b197..6fcfe370eeb 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2005-05-17 Paolo Bonzini + + * gcc.dg/fold-div-1.c, gcc.dg/recip-1.c, gcc.dg/recip-2.c: New. + 2005-05-17 Richard Guenther PR middle-end/21595 @@ -319,8 +323,8 @@ 2005-05-02 Paolo Bonzini - * gcc.dg/altivec-3.c (vec_store): Do not use the old - __builtin_altivec_st_internal_4si built-in. + * gcc.dg/altivec-3.c (vec_store): Do not use the old + __builtin_altivec_st_internal_4si built-in. 2005-05-02 Mark Mitchell diff --git a/gcc/testsuite/gcc.dg/fold-div-1.c b/gcc/testsuite/gcc.dg/fold-div-1.c new file mode 100644 index 00000000000..533908c300c --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-div-1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-funsafe-math-optimizations -fdump-tree-gimple" } */ + +float f(float x) +{ + return x/2 + x/3; +} + +float g(float x) +{ + return 2/x + 3/x; +} + +float h(float x) +{ + return x/2 - x/3; +} + +float i(float x) +{ + return 2/x - 3/x; +} + +/* f and h should be turned into multiplications, + the divisions in g and i should be grouped together. */ + +/* { dg-final { scan-tree-dump-times " \\* " 2 "gimple" } } */ +/* { dg-final { scan-tree-dump-times " / " 2 "gimple" } } */ +/* { dg-final { cleanup-tree-dump "gimple" } } */ + diff --git a/gcc/testsuite/gcc.dg/tree-ssa/recip-1.c b/gcc/testsuite/gcc.dg/tree-ssa/recip-1.c new file mode 100644 index 00000000000..36f2c23ded7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/recip-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -funsafe-math-optimizations -fdump-tree-recip" } */ + +float e(float *x, float *y, float *z) +{ + float m = __builtin_sqrt (*x * *x + *y * *y + *z * *z); + *x /= m; + *y /= m; + *z /= m; +} + +/* Look for only one division. */ +/* { dg-final { scan-tree-dump-times "= .* /" 1 "recip" } } */ +/* { dg-final { cleanup-tree-dump "recip" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/recip-2.c b/gcc/testsuite/gcc.dg/tree-ssa/recip-2.c new file mode 100644 index 00000000000..7d0e97ae371 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/recip-2.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -funsafe-math-optimizations -fdump-tree-recip" } */ + +float e(float a, float b, float c, float d, float e, float f) +{ + if (a < b) + { + a = a + b; + c = c + d; + } + + /* The PHI nodes for these divisions should be combined. */ + e = e / a; + f = f / a; + + a = a / c; + b = b / c; + + return a + b + e + f; +} + +/* { dg-final { scan-tree-dump-times " / " 2 "recip" } } */ +/* { dg-final { cleanup-tree-dump "recip" } } */ diff --git a/gcc/tree-optimize.c b/gcc/tree-optimize.c index 66c25de147b..ec873fbb250 100644 --- a/gcc/tree-optimize.c +++ b/gcc/tree-optimize.c @@ -383,6 +383,7 @@ init_tree_optimization_passes (void) we add may_alias right after fold builtins which can create arbitrary GIMPLE. */ NEXT_PASS (pass_may_alias); + NEXT_PASS (pass_cse_reciprocals); NEXT_PASS (pass_split_crit_edges); NEXT_PASS (pass_pre); NEXT_PASS (pass_sink_code); diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 83fa184b736..0806822a9c4 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -196,6 +196,7 @@ extern struct tree_opt_pass pass_fold_builtins; extern struct tree_opt_pass pass_stdarg; extern struct tree_opt_pass pass_early_warn_uninitialized; extern struct tree_opt_pass pass_late_warn_uninitialized; +extern struct tree_opt_pass pass_cse_reciprocals; extern struct tree_opt_pass pass_warn_function_return; extern struct tree_opt_pass pass_warn_function_noreturn; extern struct tree_opt_pass pass_phiopt;