Extend tree-call-cdce to calls whose result is used
For -fmath-errno, builtins.c currently expands calls to sqrt to: y = sqrt_optab (x); if (y != y) [ sqrt (x); or errno = EDOM; ] The drawbacks of this are: - the call to sqrt is protected by the result of the optab rather than the input. It would be better to check __builtin_isless (x, 0), like tree-call-cdce.c does. - the branch isn't exposed at the gimple level and so gets little high-level optimisation. - we do this for log too, but for log a zero input produces -inf rather than a NaN, and sets errno to ERANGE rather than EDOM. This patch moves the code to tree-call-cdce.c instead, with the optab operation being represented as an internal function. This means that we can use the existing argument-based range checks rather than the result-based checks and that we get more gimple optimisation of the branch. Previously the pass was only enabled by default at -O2 or above, but the old builtins.c code was enabled at -O. The patch therefore enables the pass at -O as well. The previous patch to cfgexpand.c handled cases where functions don't (or are assumed not to) set errno, so this patch makes the builtins.c code dead. Tested on x86_64-linux-gnu, aarch64-linux-gnu, arm-linux-gnueabi and visium-elf (for the EDOM stuff). gcc/ * builtins.c (expand_errno_check, expand_builtin_mathfn) (expand_builtin_mathfn_2): Delete. (expand_builtin): Remove handling of functions with internal function equivalents. * internal-fn.def (SET_EDOM): New internal function. * internal-fn.h (set_edom_supported_p): Declare. * internal-fn.c (expand_SET_EDOM): New function. (set_edom_supported_p): Likewise. * tree-call-cdce.c: Include builtins.h and internal-fn.h. Rewrite comment at head of file. (is_call_dce_candidate): Rename to... (can_test_argument_range): ...this. Don't check gimple_call_lhs or gimple_call_builtin_p here. (edom_only_function): New function. (shrink_wrap_one_built_in_call_with_conds): New function, split out from... (shrink_wrap_one_built_in_call): ...here. (can_use_internal_fn, use_internal_fn): New functions. (shrink_wrap_conditional_dead_built_in_calls): Call use_internal_fn for calls that have an lhs. (pass_call_cdce::gate): Remove optimize_function_for_speed_p check. (pass_call_cdce::execute): Skip blocks that are optimized for size. Check gimple_call_builtin_p here. Use can_use_internal_fn for calls with an lhs. * opts.c (default_options_table): Enable -ftree-builtin-call-cdce at -O and above. From-SVN: r230488
This commit is contained in:
parent
4cfe7a6c35
commit
883cabdecd
|
@ -1,3 +1,32 @@
|
|||
2015-11-17 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* builtins.c (expand_errno_check, expand_builtin_mathfn)
|
||||
(expand_builtin_mathfn_2): Delete.
|
||||
(expand_builtin): Remove handling of functions with
|
||||
internal function equivalents.
|
||||
* internal-fn.def (SET_EDOM): New internal function.
|
||||
* internal-fn.h (set_edom_supported_p): Declare.
|
||||
* internal-fn.c (expand_SET_EDOM): New function.
|
||||
(set_edom_supported_p): Likewise.
|
||||
* tree-call-cdce.c: Include builtins.h and internal-fn.h.
|
||||
Rewrite comment at head of file.
|
||||
(is_call_dce_candidate): Rename to...
|
||||
(can_test_argument_range): ...this. Don't check gimple_call_lhs
|
||||
or gimple_call_builtin_p here.
|
||||
(edom_only_function): New function.
|
||||
(shrink_wrap_one_built_in_call_with_conds): New function, split out
|
||||
from...
|
||||
(shrink_wrap_one_built_in_call): ...here.
|
||||
(can_use_internal_fn, use_internal_fn): New functions.
|
||||
(shrink_wrap_conditional_dead_built_in_calls): Call use_internal_fn
|
||||
for calls that have an lhs.
|
||||
(pass_call_cdce::gate): Remove optimize_function_for_speed_p check.
|
||||
(pass_call_cdce::execute): Skip blocks that are optimized for size.
|
||||
Check gimple_call_builtin_p here. Use can_use_internal_fn for
|
||||
calls with an lhs.
|
||||
* opts.c (default_options_table): Enable -ftree-builtin-call-cdce
|
||||
at -O and above.
|
||||
|
||||
2015-11-17 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* builtins.h (called_as_built_in): Declare.
|
||||
|
|
331
gcc/builtins.c
331
gcc/builtins.c
|
@ -101,9 +101,6 @@ static rtx expand_builtin_apply (rtx, rtx, rtx);
|
|||
static void expand_builtin_return (rtx);
|
||||
static enum type_class type_to_class (tree);
|
||||
static rtx expand_builtin_classify_type (tree);
|
||||
static void expand_errno_check (tree, rtx);
|
||||
static rtx expand_builtin_mathfn (tree, rtx, rtx);
|
||||
static rtx expand_builtin_mathfn_2 (tree, rtx, rtx);
|
||||
static rtx expand_builtin_mathfn_3 (tree, rtx, rtx);
|
||||
static rtx expand_builtin_mathfn_ternary (tree, rtx, rtx);
|
||||
static rtx expand_builtin_interclass_mathfn (tree, rtx);
|
||||
|
@ -1972,286 +1969,6 @@ replacement_internal_fn (gcall *call)
|
|||
return IFN_LAST;
|
||||
}
|
||||
|
||||
/* If errno must be maintained, expand the RTL to check if the result,
|
||||
TARGET, of a built-in function call, EXP, is NaN, and if so set
|
||||
errno to EDOM. */
|
||||
|
||||
static void
|
||||
expand_errno_check (tree exp, rtx target)
|
||||
{
|
||||
rtx_code_label *lab = gen_label_rtx ();
|
||||
|
||||
/* Test the result; if it is NaN, set errno=EDOM because
|
||||
the argument was not in the domain. */
|
||||
do_compare_rtx_and_jump (target, target, EQ, 0, GET_MODE (target),
|
||||
NULL_RTX, NULL, lab,
|
||||
/* The jump is very likely. */
|
||||
REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1));
|
||||
|
||||
#ifdef TARGET_EDOM
|
||||
/* If this built-in doesn't throw an exception, set errno directly. */
|
||||
if (TREE_NOTHROW (TREE_OPERAND (CALL_EXPR_FN (exp), 0)))
|
||||
{
|
||||
#ifdef GEN_ERRNO_RTX
|
||||
rtx errno_rtx = GEN_ERRNO_RTX;
|
||||
#else
|
||||
rtx errno_rtx
|
||||
= gen_rtx_MEM (word_mode, gen_rtx_SYMBOL_REF (Pmode, "errno"));
|
||||
#endif
|
||||
emit_move_insn (errno_rtx,
|
||||
gen_int_mode (TARGET_EDOM, GET_MODE (errno_rtx)));
|
||||
emit_label (lab);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Make sure the library call isn't expanded as a tail call. */
|
||||
CALL_EXPR_TAILCALL (exp) = 0;
|
||||
|
||||
/* We can't set errno=EDOM directly; let the library call do it.
|
||||
Pop the arguments right away in case the call gets deleted. */
|
||||
NO_DEFER_POP;
|
||||
expand_call (exp, target, 0);
|
||||
OK_DEFER_POP;
|
||||
emit_label (lab);
|
||||
}
|
||||
|
||||
/* Expand a call to one of the builtin math functions (sqrt, exp, or log).
|
||||
Return NULL_RTX if a normal call should be emitted rather than expanding
|
||||
the function in-line. EXP is the expression that is a call to the builtin
|
||||
function; if convenient, the result should be placed in TARGET.
|
||||
SUBTARGET may be used as the target for computing one of EXP's operands. */
|
||||
|
||||
static rtx
|
||||
expand_builtin_mathfn (tree exp, rtx target, rtx subtarget)
|
||||
{
|
||||
optab builtin_optab;
|
||||
rtx op0;
|
||||
rtx_insn *insns;
|
||||
tree fndecl = get_callee_fndecl (exp);
|
||||
machine_mode mode;
|
||||
bool errno_set = false;
|
||||
bool try_widening = false;
|
||||
tree arg;
|
||||
|
||||
if (!validate_arglist (exp, REAL_TYPE, VOID_TYPE))
|
||||
return NULL_RTX;
|
||||
|
||||
arg = CALL_EXPR_ARG (exp, 0);
|
||||
|
||||
switch (DECL_FUNCTION_CODE (fndecl))
|
||||
{
|
||||
CASE_FLT_FN (BUILT_IN_SQRT):
|
||||
errno_set = ! tree_expr_nonnegative_p (arg);
|
||||
try_widening = true;
|
||||
builtin_optab = sqrt_optab;
|
||||
break;
|
||||
CASE_FLT_FN (BUILT_IN_EXP):
|
||||
errno_set = true; builtin_optab = exp_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_EXP10):
|
||||
CASE_FLT_FN (BUILT_IN_POW10):
|
||||
errno_set = true; builtin_optab = exp10_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_EXP2):
|
||||
errno_set = true; builtin_optab = exp2_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_EXPM1):
|
||||
errno_set = true; builtin_optab = expm1_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_LOGB):
|
||||
errno_set = true; builtin_optab = logb_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_LOG):
|
||||
errno_set = true; builtin_optab = log_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_LOG10):
|
||||
errno_set = true; builtin_optab = log10_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_LOG2):
|
||||
errno_set = true; builtin_optab = log2_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_LOG1P):
|
||||
errno_set = true; builtin_optab = log1p_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_ASIN):
|
||||
builtin_optab = asin_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_ACOS):
|
||||
builtin_optab = acos_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_TAN):
|
||||
builtin_optab = tan_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_ATAN):
|
||||
builtin_optab = atan_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_FLOOR):
|
||||
builtin_optab = floor_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_CEIL):
|
||||
builtin_optab = ceil_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_TRUNC):
|
||||
builtin_optab = btrunc_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_ROUND):
|
||||
builtin_optab = round_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_NEARBYINT):
|
||||
builtin_optab = nearbyint_optab;
|
||||
if (flag_trapping_math)
|
||||
break;
|
||||
/* Else fallthrough and expand as rint. */
|
||||
CASE_FLT_FN (BUILT_IN_RINT):
|
||||
builtin_optab = rint_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_SIGNIFICAND):
|
||||
builtin_optab = significand_optab; break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Make a suitable register to place result in. */
|
||||
mode = TYPE_MODE (TREE_TYPE (exp));
|
||||
|
||||
if (! flag_errno_math || ! HONOR_NANS (mode))
|
||||
errno_set = false;
|
||||
|
||||
/* Before working hard, check whether the instruction is available, but try
|
||||
to widen the mode for specific operations. */
|
||||
if ((optab_handler (builtin_optab, mode) != CODE_FOR_nothing
|
||||
|| (try_widening && !excess_precision_type (TREE_TYPE (exp))))
|
||||
&& (!errno_set || !optimize_insn_for_size_p ()))
|
||||
{
|
||||
rtx result = gen_reg_rtx (mode);
|
||||
|
||||
/* Wrap the computation of the argument in a SAVE_EXPR, as we may
|
||||
need to expand the argument again. This way, we will not perform
|
||||
side-effects more the once. */
|
||||
CALL_EXPR_ARG (exp, 0) = arg = builtin_save_expr (arg);
|
||||
|
||||
op0 = expand_expr (arg, subtarget, VOIDmode, EXPAND_NORMAL);
|
||||
|
||||
start_sequence ();
|
||||
|
||||
/* Compute into RESULT.
|
||||
Set RESULT to wherever the result comes back. */
|
||||
result = expand_unop (mode, builtin_optab, op0, result, 0);
|
||||
|
||||
if (result != 0)
|
||||
{
|
||||
if (errno_set)
|
||||
expand_errno_check (exp, result);
|
||||
|
||||
/* Output the entire sequence. */
|
||||
insns = get_insns ();
|
||||
end_sequence ();
|
||||
emit_insn (insns);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* If we were unable to expand via the builtin, stop the sequence
|
||||
(without outputting the insns) and call to the library function
|
||||
with the stabilized argument list. */
|
||||
end_sequence ();
|
||||
}
|
||||
|
||||
return expand_call (exp, target, target == const0_rtx);
|
||||
}
|
||||
|
||||
/* Expand a call to the builtin binary math functions (pow and atan2).
|
||||
Return NULL_RTX if a normal call should be emitted rather than expanding the
|
||||
function in-line. EXP is the expression that is a call to the builtin
|
||||
function; if convenient, the result should be placed in TARGET.
|
||||
SUBTARGET may be used as the target for computing one of EXP's
|
||||
operands. */
|
||||
|
||||
static rtx
|
||||
expand_builtin_mathfn_2 (tree exp, rtx target, rtx subtarget)
|
||||
{
|
||||
optab builtin_optab;
|
||||
rtx op0, op1, result;
|
||||
rtx_insn *insns;
|
||||
int op1_type = REAL_TYPE;
|
||||
tree fndecl = get_callee_fndecl (exp);
|
||||
tree arg0, arg1;
|
||||
machine_mode mode;
|
||||
bool errno_set = true;
|
||||
|
||||
switch (DECL_FUNCTION_CODE (fndecl))
|
||||
{
|
||||
CASE_FLT_FN (BUILT_IN_SCALBN):
|
||||
CASE_FLT_FN (BUILT_IN_SCALBLN):
|
||||
CASE_FLT_FN (BUILT_IN_LDEXP):
|
||||
op1_type = INTEGER_TYPE;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!validate_arglist (exp, REAL_TYPE, op1_type, VOID_TYPE))
|
||||
return NULL_RTX;
|
||||
|
||||
arg0 = CALL_EXPR_ARG (exp, 0);
|
||||
arg1 = CALL_EXPR_ARG (exp, 1);
|
||||
|
||||
switch (DECL_FUNCTION_CODE (fndecl))
|
||||
{
|
||||
CASE_FLT_FN (BUILT_IN_POW):
|
||||
builtin_optab = pow_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_ATAN2):
|
||||
builtin_optab = atan2_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_SCALB):
|
||||
if (REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (exp)))->b != 2)
|
||||
return 0;
|
||||
builtin_optab = scalb_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_SCALBN):
|
||||
CASE_FLT_FN (BUILT_IN_SCALBLN):
|
||||
if (REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (exp)))->b != 2)
|
||||
return 0;
|
||||
/* Fall through... */
|
||||
CASE_FLT_FN (BUILT_IN_LDEXP):
|
||||
builtin_optab = ldexp_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_FMOD):
|
||||
builtin_optab = fmod_optab; break;
|
||||
CASE_FLT_FN (BUILT_IN_REMAINDER):
|
||||
CASE_FLT_FN (BUILT_IN_DREM):
|
||||
builtin_optab = remainder_optab; break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Make a suitable register to place result in. */
|
||||
mode = TYPE_MODE (TREE_TYPE (exp));
|
||||
|
||||
/* Before working hard, check whether the instruction is available. */
|
||||
if (optab_handler (builtin_optab, mode) == CODE_FOR_nothing)
|
||||
return NULL_RTX;
|
||||
|
||||
result = gen_reg_rtx (mode);
|
||||
|
||||
if (! flag_errno_math || ! HONOR_NANS (mode))
|
||||
errno_set = false;
|
||||
|
||||
if (errno_set && optimize_insn_for_size_p ())
|
||||
return 0;
|
||||
|
||||
/* Always stabilize the argument list. */
|
||||
CALL_EXPR_ARG (exp, 0) = arg0 = builtin_save_expr (arg0);
|
||||
CALL_EXPR_ARG (exp, 1) = arg1 = builtin_save_expr (arg1);
|
||||
|
||||
op0 = expand_expr (arg0, subtarget, VOIDmode, EXPAND_NORMAL);
|
||||
op1 = expand_normal (arg1);
|
||||
|
||||
start_sequence ();
|
||||
|
||||
/* Compute into RESULT.
|
||||
Set RESULT to wherever the result comes back. */
|
||||
result = expand_binop (mode, builtin_optab, op0, op1,
|
||||
result, 0, OPTAB_DIRECT);
|
||||
|
||||
/* If we were unable to expand via the builtin, stop the sequence
|
||||
(without outputting the insns) and call to the library function
|
||||
with the stabilized argument list. */
|
||||
if (result == 0)
|
||||
{
|
||||
end_sequence ();
|
||||
return expand_call (exp, target, target == const0_rtx);
|
||||
}
|
||||
|
||||
if (errno_set)
|
||||
expand_errno_check (exp, result);
|
||||
|
||||
/* Output the entire sequence. */
|
||||
insns = get_insns ();
|
||||
end_sequence ();
|
||||
emit_insn (insns);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Expand a call to the builtin trinary math functions (fma).
|
||||
Return NULL_RTX if a normal call should be emitted rather than expanding the
|
||||
function in-line. EXP is the expression that is a call to the builtin
|
||||
|
@ -5984,37 +5701,6 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
|
|||
CASE_FLT_FN (BUILT_IN_CABS):
|
||||
break;
|
||||
|
||||
CASE_FLT_FN (BUILT_IN_EXP):
|
||||
CASE_FLT_FN (BUILT_IN_EXP10):
|
||||
CASE_FLT_FN (BUILT_IN_POW10):
|
||||
CASE_FLT_FN (BUILT_IN_EXP2):
|
||||
CASE_FLT_FN (BUILT_IN_EXPM1):
|
||||
CASE_FLT_FN (BUILT_IN_LOGB):
|
||||
CASE_FLT_FN (BUILT_IN_LOG):
|
||||
CASE_FLT_FN (BUILT_IN_LOG10):
|
||||
CASE_FLT_FN (BUILT_IN_LOG2):
|
||||
CASE_FLT_FN (BUILT_IN_LOG1P):
|
||||
CASE_FLT_FN (BUILT_IN_TAN):
|
||||
CASE_FLT_FN (BUILT_IN_ASIN):
|
||||
CASE_FLT_FN (BUILT_IN_ACOS):
|
||||
CASE_FLT_FN (BUILT_IN_ATAN):
|
||||
CASE_FLT_FN (BUILT_IN_SIGNIFICAND):
|
||||
/* Treat these like sqrt only if unsafe math optimizations are allowed,
|
||||
because of possible accuracy problems. */
|
||||
if (! flag_unsafe_math_optimizations)
|
||||
break;
|
||||
CASE_FLT_FN (BUILT_IN_SQRT):
|
||||
CASE_FLT_FN (BUILT_IN_FLOOR):
|
||||
CASE_FLT_FN (BUILT_IN_CEIL):
|
||||
CASE_FLT_FN (BUILT_IN_TRUNC):
|
||||
CASE_FLT_FN (BUILT_IN_ROUND):
|
||||
CASE_FLT_FN (BUILT_IN_NEARBYINT):
|
||||
CASE_FLT_FN (BUILT_IN_RINT):
|
||||
target = expand_builtin_mathfn (exp, target, subtarget);
|
||||
if (target)
|
||||
return target;
|
||||
break;
|
||||
|
||||
CASE_FLT_FN (BUILT_IN_FMA):
|
||||
target = expand_builtin_mathfn_ternary (exp, target, subtarget);
|
||||
if (target)
|
||||
|
@ -6061,23 +5747,6 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
|
|||
return target;
|
||||
break;
|
||||
|
||||
CASE_FLT_FN (BUILT_IN_ATAN2):
|
||||
CASE_FLT_FN (BUILT_IN_LDEXP):
|
||||
CASE_FLT_FN (BUILT_IN_SCALB):
|
||||
CASE_FLT_FN (BUILT_IN_SCALBN):
|
||||
CASE_FLT_FN (BUILT_IN_SCALBLN):
|
||||
if (! flag_unsafe_math_optimizations)
|
||||
break;
|
||||
|
||||
CASE_FLT_FN (BUILT_IN_FMOD):
|
||||
CASE_FLT_FN (BUILT_IN_REMAINDER):
|
||||
CASE_FLT_FN (BUILT_IN_DREM):
|
||||
CASE_FLT_FN (BUILT_IN_POW):
|
||||
target = expand_builtin_mathfn_2 (exp, target, subtarget);
|
||||
if (target)
|
||||
return target;
|
||||
break;
|
||||
|
||||
CASE_FLT_FN (BUILT_IN_CEXPI):
|
||||
target = expand_builtin_cexpi (exp, target);
|
||||
gcc_assert (target);
|
||||
|
|
|
@ -2073,6 +2073,24 @@ expand_GOACC_REDUCTION (internal_fn, gcall *)
|
|||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Set errno to EDOM. */
|
||||
|
||||
static void
|
||||
expand_SET_EDOM (internal_fn, gcall *)
|
||||
{
|
||||
#ifdef TARGET_EDOM
|
||||
#ifdef GEN_ERRNO_RTX
|
||||
rtx errno_rtx = GEN_ERRNO_RTX;
|
||||
#else
|
||||
rtx errno_rtx = gen_rtx_MEM (word_mode, gen_rtx_SYMBOL_REF (Pmode, "errno"));
|
||||
#endif
|
||||
emit_move_insn (errno_rtx,
|
||||
gen_int_mode (TARGET_EDOM, GET_MODE (errno_rtx)));
|
||||
#else
|
||||
gcc_unreachable ();
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Expand a call to FN using the operands in STMT. FN has a single
|
||||
output operand and NARGS input operands. */
|
||||
|
||||
|
@ -2217,6 +2235,18 @@ direct_internal_fn_supported_p (internal_fn fn, tree type)
|
|||
return direct_internal_fn_supported_p (fn, tree_pair (type, type));
|
||||
}
|
||||
|
||||
/* Return true if IFN_SET_EDOM is supported. */
|
||||
|
||||
bool
|
||||
set_edom_supported_p (void)
|
||||
{
|
||||
#ifdef TARGET_EDOM
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define DEF_INTERNAL_OPTAB_FN(CODE, FLAGS, OPTAB, TYPE) \
|
||||
static void \
|
||||
expand_##CODE (internal_fn fn, gcall *stmt) \
|
||||
|
|
|
@ -181,6 +181,10 @@ DEF_INTERNAL_FN (GOACC_LOOP, ECF_PURE | ECF_NOTHROW, NULL)
|
|||
/* OpenACC reduction abstraction. See internal-fn.h for usage. */
|
||||
DEF_INTERNAL_FN (GOACC_REDUCTION, ECF_NOTHROW | ECF_LEAF, NULL)
|
||||
|
||||
/* Set errno to EDOM, if GCC knows how to do that directly for the
|
||||
current target. */
|
||||
DEF_INTERNAL_FN (SET_EDOM, ECF_LEAF | ECF_NOTHROW, NULL)
|
||||
|
||||
#undef DEF_INTERNAL_INT_FN
|
||||
#undef DEF_INTERNAL_FLT_FN
|
||||
#undef DEF_INTERNAL_OPTAB_FN
|
||||
|
|
|
@ -160,6 +160,7 @@ extern tree_pair direct_internal_fn_types (internal_fn, tree, tree *);
|
|||
extern tree_pair direct_internal_fn_types (internal_fn, gcall *);
|
||||
extern bool direct_internal_fn_supported_p (internal_fn, tree_pair);
|
||||
extern bool direct_internal_fn_supported_p (internal_fn, tree);
|
||||
extern bool set_edom_supported_p (void);
|
||||
|
||||
extern void expand_internal_call (gcall *);
|
||||
extern void expand_internal_call (internal_fn, gcall *);
|
||||
|
|
|
@ -478,6 +478,7 @@ static const struct default_options default_options_table[] =
|
|||
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fmove_loop_invariants, NULL, 1 },
|
||||
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_pta, NULL, 1 },
|
||||
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fssa_phiopt, NULL, 1 },
|
||||
{ OPT_LEVELS_1_PLUS, OPT_ftree_builtin_call_dce, NULL, 1 },
|
||||
|
||||
/* -O2 optimizations. */
|
||||
{ OPT_LEVELS_2_PLUS, OPT_finline_small_functions, NULL, 1 },
|
||||
|
@ -503,7 +504,6 @@ static const struct default_options default_options_table[] =
|
|||
REORDER_BLOCKS_ALGORITHM_STC },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_freorder_functions, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_ftree_vrp, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_ftree_builtin_call_dce, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_ftree_pre, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_ftree_switch_conversion, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_fipa_cp, NULL, 1 },
|
||||
|
|
|
@ -33,46 +33,77 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "gimple-iterator.h"
|
||||
#include "tree-cfg.h"
|
||||
#include "tree-into-ssa.h"
|
||||
#include "builtins.h"
|
||||
#include "internal-fn.h"
|
||||
|
||||
|
||||
/* Conditional dead call elimination
|
||||
/* This pass serves two closely-related purposes:
|
||||
|
||||
Some builtin functions can set errno on error conditions, but they
|
||||
are otherwise pure. If the result of a call to such a function is
|
||||
not used, the compiler can still not eliminate the call without
|
||||
powerful interprocedural analysis to prove that the errno is not
|
||||
checked. However, if the conditions under which the error occurs
|
||||
are known, the compiler can conditionally dead code eliminate the
|
||||
calls by shrink-wrapping the semi-dead calls into the error condition:
|
||||
1. It conditionally executes calls that set errno if (a) the result of
|
||||
the call is unused and (b) a simple range check on the arguments can
|
||||
detect most cases where errno does not need to be set.
|
||||
|
||||
built_in_call (args)
|
||||
==>
|
||||
if (error_cond (args))
|
||||
built_in_call (args)
|
||||
This is the "conditional dead-code elimination" that gave the pass
|
||||
its original name, since the call is dead for most argument values.
|
||||
The calls for which it helps are usually part of the C++ abstraction
|
||||
penalty exposed after inlining.
|
||||
|
||||
2. It looks for calls to built-in functions that set errno and whose
|
||||
result is used. It checks whether there is an associated internal
|
||||
function that doesn't set errno and whether the target supports
|
||||
that internal function. If so, the pass uses the internal function
|
||||
to compute the result of the built-in function but still arranges
|
||||
for errno to be set when necessary. There are two ways of setting
|
||||
errno:
|
||||
|
||||
a. by protecting the original call with the same argument checks as (1)
|
||||
|
||||
b. by protecting the original call with a check that the result
|
||||
of the internal function is not equal to itself (i.e. is NaN).
|
||||
|
||||
(b) requires that NaNs are the only erroneous results. It is not
|
||||
appropriate for functions like log, which returns ERANGE for zero
|
||||
arguments. (b) is also likely to perform worse than (a) because it
|
||||
requires the result to be calculated first. The pass therefore uses
|
||||
(a) when it can and uses (b) as a fallback.
|
||||
|
||||
For (b) the pass can replace the original call with a call to
|
||||
IFN_SET_EDOM, if the target supports direct assignments to errno.
|
||||
|
||||
In both cases, arguments that require errno to be set should occur
|
||||
rarely in practice. Checks of the errno result should also be rare,
|
||||
but the compiler would need powerful interprocedural analysis to
|
||||
prove that errno is not checked. It's much easier to add argument
|
||||
checks or result checks instead.
|
||||
|
||||
An example of (1) is:
|
||||
|
||||
An actual simple example is :
|
||||
log (x); // Mostly dead call
|
||||
==>
|
||||
if (__builtin_islessequal (x, 0))
|
||||
log (x);
|
||||
|
||||
With this change, call to log (x) is effectively eliminated, as
|
||||
in majority of the cases, log won't be called with x out of
|
||||
in the majority of the cases, log won't be called with x out of
|
||||
range. The branch is totally predictable, so the branch cost
|
||||
is low.
|
||||
|
||||
An example of (2) is:
|
||||
|
||||
y = sqrt (x);
|
||||
==>
|
||||
y = IFN_SQRT (x);
|
||||
if (__builtin_isless (x, 0))
|
||||
sqrt (x);
|
||||
|
||||
In the vast majority of cases we should then never need to call sqrt.
|
||||
|
||||
Note that library functions are not supposed to clear errno to zero without
|
||||
error. See IEEE Std 1003.1, section 2.3 Error Numbers, and section 7.5:3 of
|
||||
ISO/IEC 9899 (C99).
|
||||
|
||||
The condition wrapping the builtin call is conservatively set to avoid too
|
||||
aggressive (wrong) shrink wrapping. The optimization is called conditional
|
||||
dead call elimination because the call is eliminated under the condition
|
||||
that the input arguments would not lead to domain or range error (for
|
||||
instance when x <= 0 for a log (x) call), however the chances that the error
|
||||
condition is hit is very low (those builtin calls which are conditionally
|
||||
dead are usually part of the C++ abstraction penalty exposed after
|
||||
inlining). */
|
||||
aggressive (wrong) shrink wrapping. */
|
||||
|
||||
|
||||
/* A structure for representing input domain of
|
||||
|
@ -251,28 +282,15 @@ check_builtin_call (gcall *bcall)
|
|||
return check_target_format (arg);
|
||||
}
|
||||
|
||||
/* A helper function to determine if a builtin function call is a
|
||||
candidate for conditional DCE. Returns true if the builtin call
|
||||
is a candidate. */
|
||||
/* Return true if built-in function call CALL calls a math function
|
||||
and if we know how to test the range of its arguments to detect _most_
|
||||
situations in which errno is not set. The test must err on the side
|
||||
of treating non-erroneous values as potentially erroneous. */
|
||||
|
||||
static bool
|
||||
is_call_dce_candidate (gcall *call)
|
||||
can_test_argument_range (gcall *call)
|
||||
{
|
||||
tree fn;
|
||||
enum built_in_function fnc;
|
||||
|
||||
/* Only potentially dead calls are considered. */
|
||||
if (gimple_call_lhs (call))
|
||||
return false;
|
||||
|
||||
fn = gimple_call_fndecl (call);
|
||||
if (!fn
|
||||
|| !DECL_BUILT_IN (fn)
|
||||
|| (DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL))
|
||||
return false;
|
||||
|
||||
fnc = DECL_FUNCTION_CODE (fn);
|
||||
switch (fnc)
|
||||
switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call)))
|
||||
{
|
||||
/* Trig functions. */
|
||||
CASE_FLT_FN (BUILT_IN_ACOS):
|
||||
|
@ -306,6 +324,31 @@ is_call_dce_candidate (gcall *call)
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Return true if CALL can produce a domain error (EDOM) but can never
|
||||
produce a pole, range overflow or range underflow error (all ERANGE).
|
||||
This means that we can tell whether a function would have set errno
|
||||
by testing whether the result is a NaN. */
|
||||
|
||||
static bool
|
||||
edom_only_function (gcall *call)
|
||||
{
|
||||
switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call)))
|
||||
{
|
||||
CASE_FLT_FN (BUILT_IN_ACOS):
|
||||
CASE_FLT_FN (BUILT_IN_ASIN):
|
||||
CASE_FLT_FN (BUILT_IN_ATAN):
|
||||
CASE_FLT_FN (BUILT_IN_COS):
|
||||
CASE_FLT_FN (BUILT_IN_SIGNIFICAND):
|
||||
CASE_FLT_FN (BUILT_IN_SIN):
|
||||
CASE_FLT_FN (BUILT_IN_SQRT):
|
||||
CASE_FLT_FN (BUILT_IN_FMOD):
|
||||
CASE_FLT_FN (BUILT_IN_REMAINDER):
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* A helper function to generate gimple statements for one bound
|
||||
comparison, so that the built-in function is called whenever
|
||||
|
@ -703,33 +746,24 @@ gen_shrink_wrap_conditions (gcall *bi_call, vec<gimple *> conds,
|
|||
/* Probability of the branch (to the call) is taken. */
|
||||
#define ERR_PROB 0.01
|
||||
|
||||
/* The function to shrink wrap a partially dead builtin call
|
||||
whose return value is not used anywhere, but has to be kept
|
||||
live due to potential error condition. Returns true if the
|
||||
transformation actually happens. */
|
||||
/* Shrink-wrap BI_CALL so that it is only called when one of the NCONDS
|
||||
conditions in CONDS is false.
|
||||
|
||||
Return true on success, in which case the cfg will have been updated. */
|
||||
|
||||
static bool
|
||||
shrink_wrap_one_built_in_call (gcall *bi_call)
|
||||
shrink_wrap_one_built_in_call_with_conds (gcall *bi_call, vec <gimple *> conds,
|
||||
unsigned int nconds)
|
||||
{
|
||||
gimple_stmt_iterator bi_call_bsi;
|
||||
basic_block bi_call_bb, join_tgt_bb, guard_bb;
|
||||
edge join_tgt_in_edge_from_call, join_tgt_in_edge_fall_thru;
|
||||
edge bi_call_in_edge0, guard_bb_in_edge;
|
||||
unsigned tn_cond_stmts, nconds;
|
||||
unsigned tn_cond_stmts;
|
||||
unsigned ci;
|
||||
gimple *cond_expr = NULL;
|
||||
gimple *cond_expr_start;
|
||||
|
||||
auto_vec<gimple *, 12> conds;
|
||||
gen_shrink_wrap_conditions (bi_call, conds, &nconds);
|
||||
|
||||
/* This can happen if the condition generator decides
|
||||
it is not beneficial to do the transformation. Just
|
||||
return false and do not do any transformation for
|
||||
the call. */
|
||||
if (nconds == 0)
|
||||
return false;
|
||||
|
||||
/* The cfg we want to create looks like this:
|
||||
|
||||
[guard n-1] <- guard_bb (old block)
|
||||
|
@ -868,6 +902,117 @@ shrink_wrap_one_built_in_call (gcall *bi_call)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Shrink-wrap BI_CALL so that it is only called when it might set errno
|
||||
(but is always called if it would set errno).
|
||||
|
||||
Return true on success, in which case the cfg will have been updated. */
|
||||
|
||||
static bool
|
||||
shrink_wrap_one_built_in_call (gcall *bi_call)
|
||||
{
|
||||
unsigned nconds = 0;
|
||||
auto_vec<gimple *, 12> conds;
|
||||
gen_shrink_wrap_conditions (bi_call, conds, &nconds);
|
||||
/* This can happen if the condition generator decides
|
||||
it is not beneficial to do the transformation. Just
|
||||
return false and do not do any transformation for
|
||||
the call. */
|
||||
if (nconds == 0)
|
||||
return false;
|
||||
return shrink_wrap_one_built_in_call_with_conds (bi_call, conds, nconds);
|
||||
}
|
||||
|
||||
/* Return true if built-in function call CALL could be implemented using
|
||||
a combination of an internal function to compute the result and a
|
||||
separate call to set errno. */
|
||||
|
||||
static bool
|
||||
can_use_internal_fn (gcall *call)
|
||||
{
|
||||
/* Only replace calls that set errno. */
|
||||
if (!gimple_vdef (call))
|
||||
return false;
|
||||
|
||||
/* Punt if we can't conditionalize the call. */
|
||||
basic_block bb = gimple_bb (call);
|
||||
if (stmt_ends_bb_p (call) && !find_fallthru_edge (bb->succs))
|
||||
return false;
|
||||
|
||||
/* See whether there is an internal function for this built-in. */
|
||||
if (replacement_internal_fn (call) == IFN_LAST)
|
||||
return false;
|
||||
|
||||
/* See whether we can catch all cases where errno would be set,
|
||||
while still avoiding the call in most cases. */
|
||||
if (!can_test_argument_range (call)
|
||||
&& !edom_only_function (call))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Implement built-in function call CALL using an internal function.
|
||||
Return true on success, in which case the cfg will have changed. */
|
||||
|
||||
static bool
|
||||
use_internal_fn (gcall *call)
|
||||
{
|
||||
unsigned nconds = 0;
|
||||
auto_vec<gimple *, 12> conds;
|
||||
gen_shrink_wrap_conditions (call, conds, &nconds);
|
||||
if (nconds == 0 && !edom_only_function (call))
|
||||
return false;
|
||||
|
||||
internal_fn ifn = replacement_internal_fn (call);
|
||||
gcc_assert (ifn != IFN_LAST);
|
||||
|
||||
/* Construct the new call, with the same arguments as the original one. */
|
||||
auto_vec <tree, 16> args;
|
||||
unsigned int nargs = gimple_call_num_args (call);
|
||||
for (unsigned int i = 0; i < nargs; ++i)
|
||||
args.safe_push (gimple_call_arg (call, i));
|
||||
gcall *new_call = gimple_build_call_internal_vec (ifn, args);
|
||||
gimple_set_location (new_call, gimple_location (call));
|
||||
|
||||
/* Transfer the LHS to the new call. */
|
||||
tree lhs = gimple_call_lhs (call);
|
||||
gimple_call_set_lhs (new_call, lhs);
|
||||
gimple_call_set_lhs (call, NULL_TREE);
|
||||
SSA_NAME_DEF_STMT (lhs) = new_call;
|
||||
|
||||
/* Insert the new call. */
|
||||
gimple_stmt_iterator gsi = gsi_for_stmt (call);
|
||||
gsi_insert_before (&gsi, new_call, GSI_SAME_STMT);
|
||||
|
||||
if (nconds == 0)
|
||||
{
|
||||
/* Skip the call if LHS == LHS. If we reach here, EDOM is the only
|
||||
valid errno value and it is used iff the result is NaN. */
|
||||
conds.quick_push (gimple_build_cond (EQ_EXPR, lhs, lhs,
|
||||
NULL_TREE, NULL_TREE));
|
||||
nconds++;
|
||||
|
||||
/* Try replacing the original call with a direct assignment to
|
||||
errno, via an internal function. */
|
||||
if (set_edom_supported_p () && !stmt_ends_bb_p (call))
|
||||
{
|
||||
gimple_stmt_iterator gsi = gsi_for_stmt (call);
|
||||
gcall *new_call = gimple_build_call_internal (IFN_SET_EDOM, 0);
|
||||
gimple_set_vuse (new_call, gimple_vuse (call));
|
||||
gimple_set_vdef (new_call, gimple_vdef (call));
|
||||
SSA_NAME_DEF_STMT (gimple_vdef (new_call)) = new_call;
|
||||
gimple_set_location (new_call, gimple_location (call));
|
||||
gsi_replace (&gsi, new_call, false);
|
||||
call = new_call;
|
||||
}
|
||||
}
|
||||
|
||||
if (!shrink_wrap_one_built_in_call_with_conds (call, conds, nconds))
|
||||
/* It's too late to back out now. */
|
||||
gcc_unreachable ();
|
||||
return true;
|
||||
}
|
||||
|
||||
/* The top level function for conditional dead code shrink
|
||||
wrapping transformation. */
|
||||
|
||||
|
@ -884,7 +1029,10 @@ shrink_wrap_conditional_dead_built_in_calls (vec<gcall *> calls)
|
|||
for (; i < n ; i++)
|
||||
{
|
||||
gcall *bi_call = calls[i];
|
||||
changed |= shrink_wrap_one_built_in_call (bi_call);
|
||||
if (gimple_call_lhs (bi_call))
|
||||
changed |= use_internal_fn (bi_call);
|
||||
else
|
||||
changed |= shrink_wrap_one_built_in_call (bi_call);
|
||||
}
|
||||
|
||||
return changed;
|
||||
|
@ -913,13 +1061,12 @@ public:
|
|||
{}
|
||||
|
||||
/* opt_pass methods: */
|
||||
virtual bool gate (function *fun)
|
||||
virtual bool gate (function *)
|
||||
{
|
||||
/* The limit constants used in the implementation
|
||||
assume IEEE floating point format. Other formats
|
||||
can be supported in the future if needed. */
|
||||
return flag_tree_builtin_call_dce != 0
|
||||
&& optimize_function_for_speed_p (fun);
|
||||
return flag_tree_builtin_call_dce != 0;
|
||||
}
|
||||
|
||||
virtual unsigned int execute (function *);
|
||||
|
@ -935,11 +1082,20 @@ pass_call_cdce::execute (function *fun)
|
|||
auto_vec<gcall *> cond_dead_built_in_calls;
|
||||
FOR_EACH_BB_FN (bb, fun)
|
||||
{
|
||||
/* Skip blocks that are being optimized for size, since our
|
||||
transformation always increases code size. */
|
||||
if (optimize_bb_for_size_p (bb))
|
||||
continue;
|
||||
|
||||
/* Collect dead call candidates. */
|
||||
for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
|
||||
{
|
||||
gcall *stmt = dyn_cast <gcall *> (gsi_stmt (i));
|
||||
if (stmt && is_call_dce_candidate (stmt))
|
||||
if (stmt
|
||||
&& gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)
|
||||
&& (gimple_call_lhs (stmt)
|
||||
? can_use_internal_fn (stmt)
|
||||
: can_test_argument_range (stmt)))
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue