Extend tree-call-cdce to calls whose result is used

For -fmath-errno, builtins.c currently expands calls to sqrt to:

        y = sqrt_optab (x);
        if (y != y)
          [ sqrt (x); or errno = EDOM; ]

The drawbacks of this are:

- the call to sqrt is protected by the result of the optab rather
      than the input.  It would be better to check __builtin_isless (x, 0),
      like tree-call-cdce.c does.

- the branch isn't exposed at the gimple level and so gets little
      high-level optimisation.

- we do this for log too, but for log a zero input produces
      -inf rather than a NaN, and sets errno to ERANGE rather than EDOM.

This patch moves the code to tree-call-cdce.c instead, with the optab
operation being represented as an internal function.  This means that
we can use the existing argument-based range checks rather than the
result-based checks and that we get more gimple optimisation of
the branch.

Previously the pass was only enabled by default at -O2 or above,
but the old builtins.c code was enabled at -O.  The patch therefore
enables the pass at -O as well.

The previous patch to cfgexpand.c handled cases where functions
don't (or are assumed not to) set errno, so this patch makes
the builtins.c code dead.

Tested on x86_64-linux-gnu, aarch64-linux-gnu, arm-linux-gnueabi
and visium-elf (for the EDOM stuff).

gcc/
	* builtins.c (expand_errno_check, expand_builtin_mathfn)
	(expand_builtin_mathfn_2): Delete.
	(expand_builtin): Remove handling of functions with
	internal function equivalents.
	* internal-fn.def (SET_EDOM): New internal function.
	* internal-fn.h (set_edom_supported_p): Declare.
	* internal-fn.c (expand_SET_EDOM): New function.
	(set_edom_supported_p): Likewise.
	* tree-call-cdce.c: Include builtins.h and internal-fn.h.
	Rewrite comment at head of file.
	(is_call_dce_candidate): Rename to...
	(can_test_argument_range): ...this.  Don't check gimple_call_lhs
	or gimple_call_builtin_p here.
	(edom_only_function): New function.
	(shrink_wrap_one_built_in_call_with_conds): New function, split out
	from...
	(shrink_wrap_one_built_in_call): ...here.
	(can_use_internal_fn, use_internal_fn): New functions.
	(shrink_wrap_conditional_dead_built_in_calls): Call use_internal_fn
	for calls that have an lhs.
	(pass_call_cdce::gate): Remove optimize_function_for_speed_p check.
	(pass_call_cdce::execute): Skip blocks that are optimized for size.
	Check gimple_call_builtin_p here.  Use can_use_internal_fn for
	calls with an lhs.
	* opts.c (default_options_table): Enable -ftree-builtin-call-cdce
	at -O and above.

From-SVN: r230488
This commit is contained in:
Richard Sandiford 2015-11-17 18:51:55 +00:00 committed by Richard Sandiford
parent 4cfe7a6c35
commit 883cabdecd
7 changed files with 282 additions and 393 deletions

View File

@ -1,3 +1,32 @@
2015-11-17 Richard Sandiford <richard.sandiford@arm.com>
* builtins.c (expand_errno_check, expand_builtin_mathfn)
(expand_builtin_mathfn_2): Delete.
(expand_builtin): Remove handling of functions with
internal function equivalents.
* internal-fn.def (SET_EDOM): New internal function.
* internal-fn.h (set_edom_supported_p): Declare.
* internal-fn.c (expand_SET_EDOM): New function.
(set_edom_supported_p): Likewise.
* tree-call-cdce.c: Include builtins.h and internal-fn.h.
Rewrite comment at head of file.
(is_call_dce_candidate): Rename to...
(can_test_argument_range): ...this. Don't check gimple_call_lhs
or gimple_call_builtin_p here.
(edom_only_function): New function.
(shrink_wrap_one_built_in_call_with_conds): New function, split out
from...
(shrink_wrap_one_built_in_call): ...here.
(can_use_internal_fn, use_internal_fn): New functions.
(shrink_wrap_conditional_dead_built_in_calls): Call use_internal_fn
for calls that have an lhs.
(pass_call_cdce::gate): Remove optimize_function_for_speed_p check.
(pass_call_cdce::execute): Skip blocks that are optimized for size.
Check gimple_call_builtin_p here. Use can_use_internal_fn for
calls with an lhs.
* opts.c (default_options_table): Enable -ftree-builtin-call-cdce
at -O and above.
2015-11-17 Richard Sandiford <richard.sandiford@arm.com>
* builtins.h (called_as_built_in): Declare.

View File

@ -101,9 +101,6 @@ static rtx expand_builtin_apply (rtx, rtx, rtx);
static void expand_builtin_return (rtx);
static enum type_class type_to_class (tree);
static rtx expand_builtin_classify_type (tree);
static void expand_errno_check (tree, rtx);
static rtx expand_builtin_mathfn (tree, rtx, rtx);
static rtx expand_builtin_mathfn_2 (tree, rtx, rtx);
static rtx expand_builtin_mathfn_3 (tree, rtx, rtx);
static rtx expand_builtin_mathfn_ternary (tree, rtx, rtx);
static rtx expand_builtin_interclass_mathfn (tree, rtx);
@ -1972,286 +1969,6 @@ replacement_internal_fn (gcall *call)
return IFN_LAST;
}
/* If errno must be maintained, expand the RTL to check if the result,
TARGET, of a built-in function call, EXP, is NaN, and if so set
errno to EDOM. */
static void
expand_errno_check (tree exp, rtx target)
{
rtx_code_label *lab = gen_label_rtx ();
/* Test the result; if it is NaN, set errno=EDOM because
the argument was not in the domain. */
do_compare_rtx_and_jump (target, target, EQ, 0, GET_MODE (target),
NULL_RTX, NULL, lab,
/* The jump is very likely. */
REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1));
#ifdef TARGET_EDOM
/* If this built-in doesn't throw an exception, set errno directly. */
if (TREE_NOTHROW (TREE_OPERAND (CALL_EXPR_FN (exp), 0)))
{
#ifdef GEN_ERRNO_RTX
rtx errno_rtx = GEN_ERRNO_RTX;
#else
rtx errno_rtx
= gen_rtx_MEM (word_mode, gen_rtx_SYMBOL_REF (Pmode, "errno"));
#endif
emit_move_insn (errno_rtx,
gen_int_mode (TARGET_EDOM, GET_MODE (errno_rtx)));
emit_label (lab);
return;
}
#endif
/* Make sure the library call isn't expanded as a tail call. */
CALL_EXPR_TAILCALL (exp) = 0;
/* We can't set errno=EDOM directly; let the library call do it.
Pop the arguments right away in case the call gets deleted. */
NO_DEFER_POP;
expand_call (exp, target, 0);
OK_DEFER_POP;
emit_label (lab);
}
/* Expand a call to one of the builtin math functions (sqrt, exp, or log).
Return NULL_RTX if a normal call should be emitted rather than expanding
the function in-line. EXP is the expression that is a call to the builtin
function; if convenient, the result should be placed in TARGET.
SUBTARGET may be used as the target for computing one of EXP's operands. */
static rtx
expand_builtin_mathfn (tree exp, rtx target, rtx subtarget)
{
optab builtin_optab;
rtx op0;
rtx_insn *insns;
tree fndecl = get_callee_fndecl (exp);
machine_mode mode;
bool errno_set = false;
bool try_widening = false;
tree arg;
if (!validate_arglist (exp, REAL_TYPE, VOID_TYPE))
return NULL_RTX;
arg = CALL_EXPR_ARG (exp, 0);
switch (DECL_FUNCTION_CODE (fndecl))
{
CASE_FLT_FN (BUILT_IN_SQRT):
errno_set = ! tree_expr_nonnegative_p (arg);
try_widening = true;
builtin_optab = sqrt_optab;
break;
CASE_FLT_FN (BUILT_IN_EXP):
errno_set = true; builtin_optab = exp_optab; break;
CASE_FLT_FN (BUILT_IN_EXP10):
CASE_FLT_FN (BUILT_IN_POW10):
errno_set = true; builtin_optab = exp10_optab; break;
CASE_FLT_FN (BUILT_IN_EXP2):
errno_set = true; builtin_optab = exp2_optab; break;
CASE_FLT_FN (BUILT_IN_EXPM1):
errno_set = true; builtin_optab = expm1_optab; break;
CASE_FLT_FN (BUILT_IN_LOGB):
errno_set = true; builtin_optab = logb_optab; break;
CASE_FLT_FN (BUILT_IN_LOG):
errno_set = true; builtin_optab = log_optab; break;
CASE_FLT_FN (BUILT_IN_LOG10):
errno_set = true; builtin_optab = log10_optab; break;
CASE_FLT_FN (BUILT_IN_LOG2):
errno_set = true; builtin_optab = log2_optab; break;
CASE_FLT_FN (BUILT_IN_LOG1P):
errno_set = true; builtin_optab = log1p_optab; break;
CASE_FLT_FN (BUILT_IN_ASIN):
builtin_optab = asin_optab; break;
CASE_FLT_FN (BUILT_IN_ACOS):
builtin_optab = acos_optab; break;
CASE_FLT_FN (BUILT_IN_TAN):
builtin_optab = tan_optab; break;
CASE_FLT_FN (BUILT_IN_ATAN):
builtin_optab = atan_optab; break;
CASE_FLT_FN (BUILT_IN_FLOOR):
builtin_optab = floor_optab; break;
CASE_FLT_FN (BUILT_IN_CEIL):
builtin_optab = ceil_optab; break;
CASE_FLT_FN (BUILT_IN_TRUNC):
builtin_optab = btrunc_optab; break;
CASE_FLT_FN (BUILT_IN_ROUND):
builtin_optab = round_optab; break;
CASE_FLT_FN (BUILT_IN_NEARBYINT):
builtin_optab = nearbyint_optab;
if (flag_trapping_math)
break;
/* Else fallthrough and expand as rint. */
CASE_FLT_FN (BUILT_IN_RINT):
builtin_optab = rint_optab; break;
CASE_FLT_FN (BUILT_IN_SIGNIFICAND):
builtin_optab = significand_optab; break;
default:
gcc_unreachable ();
}
/* Make a suitable register to place result in. */
mode = TYPE_MODE (TREE_TYPE (exp));
if (! flag_errno_math || ! HONOR_NANS (mode))
errno_set = false;
/* Before working hard, check whether the instruction is available, but try
to widen the mode for specific operations. */
if ((optab_handler (builtin_optab, mode) != CODE_FOR_nothing
|| (try_widening && !excess_precision_type (TREE_TYPE (exp))))
&& (!errno_set || !optimize_insn_for_size_p ()))
{
rtx result = gen_reg_rtx (mode);
/* Wrap the computation of the argument in a SAVE_EXPR, as we may
need to expand the argument again. This way, we will not perform
side-effects more the once. */
CALL_EXPR_ARG (exp, 0) = arg = builtin_save_expr (arg);
op0 = expand_expr (arg, subtarget, VOIDmode, EXPAND_NORMAL);
start_sequence ();
/* Compute into RESULT.
Set RESULT to wherever the result comes back. */
result = expand_unop (mode, builtin_optab, op0, result, 0);
if (result != 0)
{
if (errno_set)
expand_errno_check (exp, result);
/* Output the entire sequence. */
insns = get_insns ();
end_sequence ();
emit_insn (insns);
return result;
}
/* If we were unable to expand via the builtin, stop the sequence
(without outputting the insns) and call to the library function
with the stabilized argument list. */
end_sequence ();
}
return expand_call (exp, target, target == const0_rtx);
}
/* Expand a call to the builtin binary math functions (pow and atan2).
Return NULL_RTX if a normal call should be emitted rather than expanding the
function in-line. EXP is the expression that is a call to the builtin
function; if convenient, the result should be placed in TARGET.
SUBTARGET may be used as the target for computing one of EXP's
operands. */
static rtx
expand_builtin_mathfn_2 (tree exp, rtx target, rtx subtarget)
{
optab builtin_optab;
rtx op0, op1, result;
rtx_insn *insns;
int op1_type = REAL_TYPE;
tree fndecl = get_callee_fndecl (exp);
tree arg0, arg1;
machine_mode mode;
bool errno_set = true;
switch (DECL_FUNCTION_CODE (fndecl))
{
CASE_FLT_FN (BUILT_IN_SCALBN):
CASE_FLT_FN (BUILT_IN_SCALBLN):
CASE_FLT_FN (BUILT_IN_LDEXP):
op1_type = INTEGER_TYPE;
default:
break;
}
if (!validate_arglist (exp, REAL_TYPE, op1_type, VOID_TYPE))
return NULL_RTX;
arg0 = CALL_EXPR_ARG (exp, 0);
arg1 = CALL_EXPR_ARG (exp, 1);
switch (DECL_FUNCTION_CODE (fndecl))
{
CASE_FLT_FN (BUILT_IN_POW):
builtin_optab = pow_optab; break;
CASE_FLT_FN (BUILT_IN_ATAN2):
builtin_optab = atan2_optab; break;
CASE_FLT_FN (BUILT_IN_SCALB):
if (REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (exp)))->b != 2)
return 0;
builtin_optab = scalb_optab; break;
CASE_FLT_FN (BUILT_IN_SCALBN):
CASE_FLT_FN (BUILT_IN_SCALBLN):
if (REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (exp)))->b != 2)
return 0;
/* Fall through... */
CASE_FLT_FN (BUILT_IN_LDEXP):
builtin_optab = ldexp_optab; break;
CASE_FLT_FN (BUILT_IN_FMOD):
builtin_optab = fmod_optab; break;
CASE_FLT_FN (BUILT_IN_REMAINDER):
CASE_FLT_FN (BUILT_IN_DREM):
builtin_optab = remainder_optab; break;
default:
gcc_unreachable ();
}
/* Make a suitable register to place result in. */
mode = TYPE_MODE (TREE_TYPE (exp));
/* Before working hard, check whether the instruction is available. */
if (optab_handler (builtin_optab, mode) == CODE_FOR_nothing)
return NULL_RTX;
result = gen_reg_rtx (mode);
if (! flag_errno_math || ! HONOR_NANS (mode))
errno_set = false;
if (errno_set && optimize_insn_for_size_p ())
return 0;
/* Always stabilize the argument list. */
CALL_EXPR_ARG (exp, 0) = arg0 = builtin_save_expr (arg0);
CALL_EXPR_ARG (exp, 1) = arg1 = builtin_save_expr (arg1);
op0 = expand_expr (arg0, subtarget, VOIDmode, EXPAND_NORMAL);
op1 = expand_normal (arg1);
start_sequence ();
/* Compute into RESULT.
Set RESULT to wherever the result comes back. */
result = expand_binop (mode, builtin_optab, op0, op1,
result, 0, OPTAB_DIRECT);
/* If we were unable to expand via the builtin, stop the sequence
(without outputting the insns) and call to the library function
with the stabilized argument list. */
if (result == 0)
{
end_sequence ();
return expand_call (exp, target, target == const0_rtx);
}
if (errno_set)
expand_errno_check (exp, result);
/* Output the entire sequence. */
insns = get_insns ();
end_sequence ();
emit_insn (insns);
return result;
}
/* Expand a call to the builtin trinary math functions (fma).
Return NULL_RTX if a normal call should be emitted rather than expanding the
function in-line. EXP is the expression that is a call to the builtin
@ -5984,37 +5701,6 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
CASE_FLT_FN (BUILT_IN_CABS):
break;
CASE_FLT_FN (BUILT_IN_EXP):
CASE_FLT_FN (BUILT_IN_EXP10):
CASE_FLT_FN (BUILT_IN_POW10):
CASE_FLT_FN (BUILT_IN_EXP2):
CASE_FLT_FN (BUILT_IN_EXPM1):
CASE_FLT_FN (BUILT_IN_LOGB):
CASE_FLT_FN (BUILT_IN_LOG):
CASE_FLT_FN (BUILT_IN_LOG10):
CASE_FLT_FN (BUILT_IN_LOG2):
CASE_FLT_FN (BUILT_IN_LOG1P):
CASE_FLT_FN (BUILT_IN_TAN):
CASE_FLT_FN (BUILT_IN_ASIN):
CASE_FLT_FN (BUILT_IN_ACOS):
CASE_FLT_FN (BUILT_IN_ATAN):
CASE_FLT_FN (BUILT_IN_SIGNIFICAND):
/* Treat these like sqrt only if unsafe math optimizations are allowed,
because of possible accuracy problems. */
if (! flag_unsafe_math_optimizations)
break;
CASE_FLT_FN (BUILT_IN_SQRT):
CASE_FLT_FN (BUILT_IN_FLOOR):
CASE_FLT_FN (BUILT_IN_CEIL):
CASE_FLT_FN (BUILT_IN_TRUNC):
CASE_FLT_FN (BUILT_IN_ROUND):
CASE_FLT_FN (BUILT_IN_NEARBYINT):
CASE_FLT_FN (BUILT_IN_RINT):
target = expand_builtin_mathfn (exp, target, subtarget);
if (target)
return target;
break;
CASE_FLT_FN (BUILT_IN_FMA):
target = expand_builtin_mathfn_ternary (exp, target, subtarget);
if (target)
@ -6061,23 +5747,6 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
return target;
break;
CASE_FLT_FN (BUILT_IN_ATAN2):
CASE_FLT_FN (BUILT_IN_LDEXP):
CASE_FLT_FN (BUILT_IN_SCALB):
CASE_FLT_FN (BUILT_IN_SCALBN):
CASE_FLT_FN (BUILT_IN_SCALBLN):
if (! flag_unsafe_math_optimizations)
break;
CASE_FLT_FN (BUILT_IN_FMOD):
CASE_FLT_FN (BUILT_IN_REMAINDER):
CASE_FLT_FN (BUILT_IN_DREM):
CASE_FLT_FN (BUILT_IN_POW):
target = expand_builtin_mathfn_2 (exp, target, subtarget);
if (target)
return target;
break;
CASE_FLT_FN (BUILT_IN_CEXPI):
target = expand_builtin_cexpi (exp, target);
gcc_assert (target);

View File

@ -2073,6 +2073,24 @@ expand_GOACC_REDUCTION (internal_fn, gcall *)
gcc_unreachable ();
}
/* Set errno to EDOM. */
static void
expand_SET_EDOM (internal_fn, gcall *)
{
#ifdef TARGET_EDOM
#ifdef GEN_ERRNO_RTX
rtx errno_rtx = GEN_ERRNO_RTX;
#else
rtx errno_rtx = gen_rtx_MEM (word_mode, gen_rtx_SYMBOL_REF (Pmode, "errno"));
#endif
emit_move_insn (errno_rtx,
gen_int_mode (TARGET_EDOM, GET_MODE (errno_rtx)));
#else
gcc_unreachable ();
#endif
}
/* Expand a call to FN using the operands in STMT. FN has a single
output operand and NARGS input operands. */
@ -2217,6 +2235,18 @@ direct_internal_fn_supported_p (internal_fn fn, tree type)
return direct_internal_fn_supported_p (fn, tree_pair (type, type));
}
/* Return true if IFN_SET_EDOM is supported. */
bool
set_edom_supported_p (void)
{
#ifdef TARGET_EDOM
return true;
#else
return false;
#endif
}
#define DEF_INTERNAL_OPTAB_FN(CODE, FLAGS, OPTAB, TYPE) \
static void \
expand_##CODE (internal_fn fn, gcall *stmt) \

View File

@ -181,6 +181,10 @@ DEF_INTERNAL_FN (GOACC_LOOP, ECF_PURE | ECF_NOTHROW, NULL)
/* OpenACC reduction abstraction. See internal-fn.h for usage. */
DEF_INTERNAL_FN (GOACC_REDUCTION, ECF_NOTHROW | ECF_LEAF, NULL)
/* Set errno to EDOM, if GCC knows how to do that directly for the
current target. */
DEF_INTERNAL_FN (SET_EDOM, ECF_LEAF | ECF_NOTHROW, NULL)
#undef DEF_INTERNAL_INT_FN
#undef DEF_INTERNAL_FLT_FN
#undef DEF_INTERNAL_OPTAB_FN

View File

@ -160,6 +160,7 @@ extern tree_pair direct_internal_fn_types (internal_fn, tree, tree *);
extern tree_pair direct_internal_fn_types (internal_fn, gcall *);
extern bool direct_internal_fn_supported_p (internal_fn, tree_pair);
extern bool direct_internal_fn_supported_p (internal_fn, tree);
extern bool set_edom_supported_p (void);
extern void expand_internal_call (gcall *);
extern void expand_internal_call (internal_fn, gcall *);

View File

@ -478,6 +478,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fmove_loop_invariants, NULL, 1 },
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_pta, NULL, 1 },
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fssa_phiopt, NULL, 1 },
{ OPT_LEVELS_1_PLUS, OPT_ftree_builtin_call_dce, NULL, 1 },
/* -O2 optimizations. */
{ OPT_LEVELS_2_PLUS, OPT_finline_small_functions, NULL, 1 },
@ -503,7 +504,6 @@ static const struct default_options default_options_table[] =
REORDER_BLOCKS_ALGORITHM_STC },
{ OPT_LEVELS_2_PLUS, OPT_freorder_functions, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_ftree_vrp, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_ftree_builtin_call_dce, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_ftree_pre, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_ftree_switch_conversion, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fipa_cp, NULL, 1 },

View File

@ -33,46 +33,77 @@ along with GCC; see the file COPYING3. If not see
#include "gimple-iterator.h"
#include "tree-cfg.h"
#include "tree-into-ssa.h"
#include "builtins.h"
#include "internal-fn.h"
/* Conditional dead call elimination
/* This pass serves two closely-related purposes:
Some builtin functions can set errno on error conditions, but they
are otherwise pure. If the result of a call to such a function is
not used, the compiler can still not eliminate the call without
powerful interprocedural analysis to prove that the errno is not
checked. However, if the conditions under which the error occurs
are known, the compiler can conditionally dead code eliminate the
calls by shrink-wrapping the semi-dead calls into the error condition:
1. It conditionally executes calls that set errno if (a) the result of
the call is unused and (b) a simple range check on the arguments can
detect most cases where errno does not need to be set.
built_in_call (args)
==>
if (error_cond (args))
built_in_call (args)
This is the "conditional dead-code elimination" that gave the pass
its original name, since the call is dead for most argument values.
The calls for which it helps are usually part of the C++ abstraction
penalty exposed after inlining.
2. It looks for calls to built-in functions that set errno and whose
result is used. It checks whether there is an associated internal
function that doesn't set errno and whether the target supports
that internal function. If so, the pass uses the internal function
to compute the result of the built-in function but still arranges
for errno to be set when necessary. There are two ways of setting
errno:
a. by protecting the original call with the same argument checks as (1)
b. by protecting the original call with a check that the result
of the internal function is not equal to itself (i.e. is NaN).
(b) requires that NaNs are the only erroneous results. It is not
appropriate for functions like log, which returns ERANGE for zero
arguments. (b) is also likely to perform worse than (a) because it
requires the result to be calculated first. The pass therefore uses
(a) when it can and uses (b) as a fallback.
For (b) the pass can replace the original call with a call to
IFN_SET_EDOM, if the target supports direct assignments to errno.
In both cases, arguments that require errno to be set should occur
rarely in practice. Checks of the errno result should also be rare,
but the compiler would need powerful interprocedural analysis to
prove that errno is not checked. It's much easier to add argument
checks or result checks instead.
An example of (1) is:
An actual simple example is :
log (x); // Mostly dead call
==>
if (__builtin_islessequal (x, 0))
log (x);
With this change, call to log (x) is effectively eliminated, as
in majority of the cases, log won't be called with x out of
in the majority of the cases, log won't be called with x out of
range. The branch is totally predictable, so the branch cost
is low.
An example of (2) is:
y = sqrt (x);
==>
y = IFN_SQRT (x);
if (__builtin_isless (x, 0))
sqrt (x);
In the vast majority of cases we should then never need to call sqrt.
Note that library functions are not supposed to clear errno to zero without
error. See IEEE Std 1003.1, section 2.3 Error Numbers, and section 7.5:3 of
ISO/IEC 9899 (C99).
The condition wrapping the builtin call is conservatively set to avoid too
aggressive (wrong) shrink wrapping. The optimization is called conditional
dead call elimination because the call is eliminated under the condition
that the input arguments would not lead to domain or range error (for
instance when x <= 0 for a log (x) call), however the chances that the error
condition is hit is very low (those builtin calls which are conditionally
dead are usually part of the C++ abstraction penalty exposed after
inlining). */
aggressive (wrong) shrink wrapping. */
/* A structure for representing input domain of
@ -251,28 +282,15 @@ check_builtin_call (gcall *bcall)
return check_target_format (arg);
}
/* A helper function to determine if a builtin function call is a
candidate for conditional DCE. Returns true if the builtin call
is a candidate. */
/* Return true if built-in function call CALL calls a math function
and if we know how to test the range of its arguments to detect _most_
situations in which errno is not set. The test must err on the side
of treating non-erroneous values as potentially erroneous. */
static bool
is_call_dce_candidate (gcall *call)
can_test_argument_range (gcall *call)
{
tree fn;
enum built_in_function fnc;
/* Only potentially dead calls are considered. */
if (gimple_call_lhs (call))
return false;
fn = gimple_call_fndecl (call);
if (!fn
|| !DECL_BUILT_IN (fn)
|| (DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL))
return false;
fnc = DECL_FUNCTION_CODE (fn);
switch (fnc)
switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call)))
{
/* Trig functions. */
CASE_FLT_FN (BUILT_IN_ACOS):
@ -306,6 +324,31 @@ is_call_dce_candidate (gcall *call)
return false;
}
/* Return true if CALL can produce a domain error (EDOM) but can never
produce a pole, range overflow or range underflow error (all ERANGE).
This means that we can tell whether a function would have set errno
by testing whether the result is a NaN. */
static bool
edom_only_function (gcall *call)
{
switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call)))
{
CASE_FLT_FN (BUILT_IN_ACOS):
CASE_FLT_FN (BUILT_IN_ASIN):
CASE_FLT_FN (BUILT_IN_ATAN):
CASE_FLT_FN (BUILT_IN_COS):
CASE_FLT_FN (BUILT_IN_SIGNIFICAND):
CASE_FLT_FN (BUILT_IN_SIN):
CASE_FLT_FN (BUILT_IN_SQRT):
CASE_FLT_FN (BUILT_IN_FMOD):
CASE_FLT_FN (BUILT_IN_REMAINDER):
return true;
default:
return false;
}
}
/* A helper function to generate gimple statements for one bound
comparison, so that the built-in function is called whenever
@ -703,33 +746,24 @@ gen_shrink_wrap_conditions (gcall *bi_call, vec<gimple *> conds,
/* Probability of the branch (to the call) is taken. */
#define ERR_PROB 0.01
/* The function to shrink wrap a partially dead builtin call
whose return value is not used anywhere, but has to be kept
live due to potential error condition. Returns true if the
transformation actually happens. */
/* Shrink-wrap BI_CALL so that it is only called when one of the NCONDS
conditions in CONDS is false.
Return true on success, in which case the cfg will have been updated. */
static bool
shrink_wrap_one_built_in_call (gcall *bi_call)
shrink_wrap_one_built_in_call_with_conds (gcall *bi_call, vec <gimple *> conds,
unsigned int nconds)
{
gimple_stmt_iterator bi_call_bsi;
basic_block bi_call_bb, join_tgt_bb, guard_bb;
edge join_tgt_in_edge_from_call, join_tgt_in_edge_fall_thru;
edge bi_call_in_edge0, guard_bb_in_edge;
unsigned tn_cond_stmts, nconds;
unsigned tn_cond_stmts;
unsigned ci;
gimple *cond_expr = NULL;
gimple *cond_expr_start;
auto_vec<gimple *, 12> conds;
gen_shrink_wrap_conditions (bi_call, conds, &nconds);
/* This can happen if the condition generator decides
it is not beneficial to do the transformation. Just
return false and do not do any transformation for
the call. */
if (nconds == 0)
return false;
/* The cfg we want to create looks like this:
[guard n-1] <- guard_bb (old block)
@ -868,6 +902,117 @@ shrink_wrap_one_built_in_call (gcall *bi_call)
return true;
}
/* Shrink-wrap BI_CALL so that it is only called when it might set errno
(but is always called if it would set errno).
Return true on success, in which case the cfg will have been updated. */
static bool
shrink_wrap_one_built_in_call (gcall *bi_call)
{
unsigned nconds = 0;
auto_vec<gimple *, 12> conds;
gen_shrink_wrap_conditions (bi_call, conds, &nconds);
/* This can happen if the condition generator decides
it is not beneficial to do the transformation. Just
return false and do not do any transformation for
the call. */
if (nconds == 0)
return false;
return shrink_wrap_one_built_in_call_with_conds (bi_call, conds, nconds);
}
/* Return true if built-in function call CALL could be implemented using
a combination of an internal function to compute the result and a
separate call to set errno. */
static bool
can_use_internal_fn (gcall *call)
{
/* Only replace calls that set errno. */
if (!gimple_vdef (call))
return false;
/* Punt if we can't conditionalize the call. */
basic_block bb = gimple_bb (call);
if (stmt_ends_bb_p (call) && !find_fallthru_edge (bb->succs))
return false;
/* See whether there is an internal function for this built-in. */
if (replacement_internal_fn (call) == IFN_LAST)
return false;
/* See whether we can catch all cases where errno would be set,
while still avoiding the call in most cases. */
if (!can_test_argument_range (call)
&& !edom_only_function (call))
return false;
return true;
}
/* Implement built-in function call CALL using an internal function.
Return true on success, in which case the cfg will have changed. */
static bool
use_internal_fn (gcall *call)
{
unsigned nconds = 0;
auto_vec<gimple *, 12> conds;
gen_shrink_wrap_conditions (call, conds, &nconds);
if (nconds == 0 && !edom_only_function (call))
return false;
internal_fn ifn = replacement_internal_fn (call);
gcc_assert (ifn != IFN_LAST);
/* Construct the new call, with the same arguments as the original one. */
auto_vec <tree, 16> args;
unsigned int nargs = gimple_call_num_args (call);
for (unsigned int i = 0; i < nargs; ++i)
args.safe_push (gimple_call_arg (call, i));
gcall *new_call = gimple_build_call_internal_vec (ifn, args);
gimple_set_location (new_call, gimple_location (call));
/* Transfer the LHS to the new call. */
tree lhs = gimple_call_lhs (call);
gimple_call_set_lhs (new_call, lhs);
gimple_call_set_lhs (call, NULL_TREE);
SSA_NAME_DEF_STMT (lhs) = new_call;
/* Insert the new call. */
gimple_stmt_iterator gsi = gsi_for_stmt (call);
gsi_insert_before (&gsi, new_call, GSI_SAME_STMT);
if (nconds == 0)
{
/* Skip the call if LHS == LHS. If we reach here, EDOM is the only
valid errno value and it is used iff the result is NaN. */
conds.quick_push (gimple_build_cond (EQ_EXPR, lhs, lhs,
NULL_TREE, NULL_TREE));
nconds++;
/* Try replacing the original call with a direct assignment to
errno, via an internal function. */
if (set_edom_supported_p () && !stmt_ends_bb_p (call))
{
gimple_stmt_iterator gsi = gsi_for_stmt (call);
gcall *new_call = gimple_build_call_internal (IFN_SET_EDOM, 0);
gimple_set_vuse (new_call, gimple_vuse (call));
gimple_set_vdef (new_call, gimple_vdef (call));
SSA_NAME_DEF_STMT (gimple_vdef (new_call)) = new_call;
gimple_set_location (new_call, gimple_location (call));
gsi_replace (&gsi, new_call, false);
call = new_call;
}
}
if (!shrink_wrap_one_built_in_call_with_conds (call, conds, nconds))
/* It's too late to back out now. */
gcc_unreachable ();
return true;
}
/* The top level function for conditional dead code shrink
wrapping transformation. */
@ -884,7 +1029,10 @@ shrink_wrap_conditional_dead_built_in_calls (vec<gcall *> calls)
for (; i < n ; i++)
{
gcall *bi_call = calls[i];
changed |= shrink_wrap_one_built_in_call (bi_call);
if (gimple_call_lhs (bi_call))
changed |= use_internal_fn (bi_call);
else
changed |= shrink_wrap_one_built_in_call (bi_call);
}
return changed;
@ -913,13 +1061,12 @@ public:
{}
/* opt_pass methods: */
virtual bool gate (function *fun)
virtual bool gate (function *)
{
/* The limit constants used in the implementation
assume IEEE floating point format. Other formats
can be supported in the future if needed. */
return flag_tree_builtin_call_dce != 0
&& optimize_function_for_speed_p (fun);
return flag_tree_builtin_call_dce != 0;
}
virtual unsigned int execute (function *);
@ -935,11 +1082,20 @@ pass_call_cdce::execute (function *fun)
auto_vec<gcall *> cond_dead_built_in_calls;
FOR_EACH_BB_FN (bb, fun)
{
/* Skip blocks that are being optimized for size, since our
transformation always increases code size. */
if (optimize_bb_for_size_p (bb))
continue;
/* Collect dead call candidates. */
for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
{
gcall *stmt = dyn_cast <gcall *> (gsi_stmt (i));
if (stmt && is_call_dce_candidate (stmt))
if (stmt
&& gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)
&& (gimple_call_lhs (stmt)
? can_use_internal_fn (stmt)
: can_test_argument_range (stmt)))
{
if (dump_file && (dump_flags & TDF_DETAILS))
{