PR 68432: Add a target hook to control size/speed optab choices
The problem in the PR is that some i386 optabs FAIL when optimising for size rather than speed. The gimple level generally needs access to this information before calling the generator, so this patch adds a new hook to say whether an optab should be used when optimising for size or speed. It also has a "both" option for cases where we want code that is optimised for both size and speed. I've passed the optab to the target hook because I think in most cases that's more useful than the instruction code. We could pass both if there's a use for it though. At the moment the match-and-simplify code doesn't have direct access to the target block, so for now I've used "both" there. Tested on x86_64-linux-gnu and powerpc64-linux-gnu. gcc/ PR tree-optimization/68432 * coretypes.h (optimization_type): New enum. * doc/tm.texi.in (TARGET_OPTAB_SUPPORTED_P): New hook. * doc/tm.texi: Regenerate. * target.def (optab_supported_p): New hook. * targhooks.h (default_optab_supported_p): Declare. * targhooks.c (default_optab_supported_p): New function. * predict.h (function_optimization_type): Declare. (bb_optimization_type): Likewise. * predict.c (function_optimization_type): New function. (bb_optimization_type): Likewise. * optabs-query.h (convert_optab_handler): Define an overload that takes an optimization type. (direct_optab_handler): Likewise. * optabs-query.c (convert_optab_handler): Likewise. (direct_optab_handler): Likewise. * internal-fn.h (direct_internal_fn_supported_p): Take an optimization_type argument. * internal-fn.c (direct_optab_supported_p): Likewise. (multi_vector_optab_supported_p): Likewise. (direct_internal_fn_supported_p): Likewise. * builtins.c (replacement_internal_fn): Update call to direct_internal_fn_supported_p. * gimple-match-head.c (build_call_internal): Likewise. * tree-vect-patterns.c (vect_recog_pow_pattern): Likewise. * tree-vect-stmts.c (vectorizable_internal_function): Likewise. * tree.c (maybe_build_call_expr_loc): Likewise. * config/i386/i386.c (ix86_optab_supported_p): New function. (TARGET_OPTAB_SUPPORTED_P): Define. * config/i386/i386.md (asinxf2): Remove optimize_insn_for_size_p check. (asin<mode>2, acosxf2, acos<mode>2, log1pxf2, log1p<mode>2) (expNcorexf3, expxf2, exp<mode>2, exp10xf2, exp10<mode>2, exp2xf2) (exp2<mode>2, expm1xf2, expm1<mode>2, ldexpxf3, ldexp<mode>3) (scalbxf3, scalb<mode>3, rint<mode>2, round<mode>2) (<rounding_insn>xf2, <rounding_insn><mode>2): Likewise. gcc/testsuite/ * gcc.target/i386/pr68432-1.c: New test. * gcc.target/i386/pr68432-2.c: Likewise. * gcc.target/i386/pr68432-3.c: Likewise. From-SVN: r231161
This commit is contained in:
parent
886456e210
commit
d95ab70a3c
|
@ -1,3 +1,41 @@
|
|||
2015-12-02 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
PR tree-optimization/68432
|
||||
* coretypes.h (optimization_type): New enum.
|
||||
* doc/tm.texi.in (TARGET_OPTAB_SUPPORTED_P): New hook.
|
||||
* doc/tm.texi: Regenerate.
|
||||
* target.def (optab_supported_p): New hook.
|
||||
* targhooks.h (default_optab_supported_p): Declare.
|
||||
* targhooks.c (default_optab_supported_p): New function.
|
||||
* predict.h (function_optimization_type): Declare.
|
||||
(bb_optimization_type): Likewise.
|
||||
* predict.c (function_optimization_type): New function.
|
||||
(bb_optimization_type): Likewise.
|
||||
* optabs-query.h (convert_optab_handler): Define an overload
|
||||
that takes an optimization type.
|
||||
(direct_optab_handler): Likewise.
|
||||
* optabs-query.c (convert_optab_handler): Likewise.
|
||||
(direct_optab_handler): Likewise.
|
||||
* internal-fn.h (direct_internal_fn_supported_p): Take an
|
||||
optimization_type argument.
|
||||
* internal-fn.c (direct_optab_supported_p): Likewise.
|
||||
(multi_vector_optab_supported_p): Likewise.
|
||||
(direct_internal_fn_supported_p): Likewise.
|
||||
* builtins.c (replacement_internal_fn): Update call to
|
||||
direct_internal_fn_supported_p.
|
||||
* gimple-match-head.c (build_call_internal): Likewise.
|
||||
* tree-vect-patterns.c (vect_recog_pow_pattern): Likewise.
|
||||
* tree-vect-stmts.c (vectorizable_internal_function): Likewise.
|
||||
* tree.c (maybe_build_call_expr_loc): Likewise.
|
||||
* config/i386/i386.c (ix86_optab_supported_p): New function.
|
||||
(TARGET_OPTAB_SUPPORTED_P): Define.
|
||||
* config/i386/i386.md (asinxf2): Remove optimize_insn_for_size_p check.
|
||||
(asin<mode>2, acosxf2, acos<mode>2, log1pxf2, log1p<mode>2)
|
||||
(expNcorexf3, expxf2, exp<mode>2, exp10xf2, exp10<mode>2, exp2xf2)
|
||||
(exp2<mode>2, expm1xf2, expm1<mode>2, ldexpxf3, ldexp<mode>3)
|
||||
(scalbxf3, scalb<mode>3, rint<mode>2, round<mode>2)
|
||||
(<rounding_insn>xf2, <rounding_insn><mode>2): Likewise.
|
||||
|
||||
2015-12-02 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* Makefile.in (GENSUPPORT_H): New macro.
|
||||
|
|
|
@ -1962,7 +1962,8 @@ replacement_internal_fn (gcall *call)
|
|||
if (ifn != IFN_LAST)
|
||||
{
|
||||
tree_pair types = direct_internal_fn_types (ifn, call);
|
||||
if (direct_internal_fn_supported_p (ifn, types))
|
||||
optimization_type opt_type = bb_optimization_type (gimple_bb (call));
|
||||
if (direct_internal_fn_supported_p (ifn, types, opt_type))
|
||||
return ifn;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -54100,6 +54100,49 @@ ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
|
||||
|
||||
static bool
|
||||
ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
|
||||
optimization_type opt_type)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case asin_optab:
|
||||
case acos_optab:
|
||||
case log1p_optab:
|
||||
case exp_optab:
|
||||
case exp10_optab:
|
||||
case exp2_optab:
|
||||
case expm1_optab:
|
||||
case ldexp_optab:
|
||||
case scalb_optab:
|
||||
case round_optab:
|
||||
return opt_type == OPTIMIZE_FOR_SPEED;
|
||||
|
||||
case rint_optab:
|
||||
if (SSE_FLOAT_MODE_P (mode1)
|
||||
&& TARGET_SSE_MATH
|
||||
&& !flag_trapping_math
|
||||
&& !TARGET_ROUND)
|
||||
return opt_type == OPTIMIZE_FOR_SPEED;
|
||||
return true;
|
||||
|
||||
case floor_optab:
|
||||
case ceil_optab:
|
||||
case btrunc_optab:
|
||||
if (SSE_FLOAT_MODE_P (mode1)
|
||||
&& TARGET_SSE_MATH
|
||||
&& !flag_trapping_math
|
||||
&& TARGET_ROUND)
|
||||
return true;
|
||||
return opt_type == OPTIMIZE_FOR_SPEED;
|
||||
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Address space support.
|
||||
|
||||
This is not "far pointers" in the 16-bit sense, but an easy way
|
||||
|
@ -54645,6 +54688,9 @@ ix86_addr_space_zero_address_valid (addr_space_t as)
|
|||
#undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
|
||||
#define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
|
||||
|
||||
#undef TARGET_OPTAB_SUPPORTED_P
|
||||
#define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
|
||||
|
||||
struct gcc_target targetm = TARGET_INITIALIZER;
|
||||
|
||||
#include "gt-i386.h"
|
||||
|
|
|
@ -14726,9 +14726,6 @@
|
|||
{
|
||||
int i;
|
||||
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
for (i = 2; i < 6; i++)
|
||||
operands[i] = gen_reg_rtx (XFmode);
|
||||
|
||||
|
@ -14746,9 +14743,6 @@
|
|||
rtx op0 = gen_reg_rtx (XFmode);
|
||||
rtx op1 = gen_reg_rtx (XFmode);
|
||||
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
|
||||
emit_insn (gen_asinxf2 (op0, op1));
|
||||
emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
|
||||
|
@ -14770,9 +14764,6 @@
|
|||
{
|
||||
int i;
|
||||
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
for (i = 2; i < 6; i++)
|
||||
operands[i] = gen_reg_rtx (XFmode);
|
||||
|
||||
|
@ -14790,9 +14781,6 @@
|
|||
rtx op0 = gen_reg_rtx (XFmode);
|
||||
rtx op1 = gen_reg_rtx (XFmode);
|
||||
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
|
||||
emit_insn (gen_acosxf2 (op0, op1));
|
||||
emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
|
||||
|
@ -14953,9 +14941,6 @@
|
|||
"TARGET_USE_FANCY_MATH_387
|
||||
&& flag_unsafe_math_optimizations"
|
||||
{
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
ix86_emit_i387_log1p (operands[0], operands[1]);
|
||||
DONE;
|
||||
})
|
||||
|
@ -14970,9 +14955,6 @@
|
|||
{
|
||||
rtx op0;
|
||||
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
op0 = gen_reg_rtx (XFmode);
|
||||
|
||||
operands[1] = gen_rtx_FLOAT_EXTEND (XFmode, operands[1]);
|
||||
|
@ -15121,9 +15103,6 @@
|
|||
{
|
||||
int i;
|
||||
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
for (i = 3; i < 10; i++)
|
||||
operands[i] = gen_reg_rtx (XFmode);
|
||||
|
||||
|
@ -15138,9 +15117,6 @@
|
|||
{
|
||||
rtx op2;
|
||||
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
op2 = gen_reg_rtx (XFmode);
|
||||
emit_move_insn (op2, standard_80387_constant_rtx (5)); /* fldl2e */
|
||||
|
||||
|
@ -15158,9 +15134,6 @@
|
|||
{
|
||||
rtx op0, op1;
|
||||
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
op0 = gen_reg_rtx (XFmode);
|
||||
op1 = gen_reg_rtx (XFmode);
|
||||
|
||||
|
@ -15178,9 +15151,6 @@
|
|||
{
|
||||
rtx op2;
|
||||
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
op2 = gen_reg_rtx (XFmode);
|
||||
emit_move_insn (op2, standard_80387_constant_rtx (6)); /* fldl2t */
|
||||
|
||||
|
@ -15198,9 +15168,6 @@
|
|||
{
|
||||
rtx op0, op1;
|
||||
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
op0 = gen_reg_rtx (XFmode);
|
||||
op1 = gen_reg_rtx (XFmode);
|
||||
|
||||
|
@ -15218,9 +15185,6 @@
|
|||
{
|
||||
rtx op2;
|
||||
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
op2 = gen_reg_rtx (XFmode);
|
||||
emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */
|
||||
|
||||
|
@ -15238,9 +15202,6 @@
|
|||
{
|
||||
rtx op0, op1;
|
||||
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
op0 = gen_reg_rtx (XFmode);
|
||||
op1 = gen_reg_rtx (XFmode);
|
||||
|
||||
|
@ -15278,9 +15239,6 @@
|
|||
{
|
||||
int i;
|
||||
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
for (i = 2; i < 13; i++)
|
||||
operands[i] = gen_reg_rtx (XFmode);
|
||||
|
||||
|
@ -15300,9 +15258,6 @@
|
|||
{
|
||||
rtx op0, op1;
|
||||
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
op0 = gen_reg_rtx (XFmode);
|
||||
op1 = gen_reg_rtx (XFmode);
|
||||
|
||||
|
@ -15320,8 +15275,6 @@
|
|||
&& flag_unsafe_math_optimizations"
|
||||
{
|
||||
rtx tmp1, tmp2;
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
tmp1 = gen_reg_rtx (XFmode);
|
||||
tmp2 = gen_reg_rtx (XFmode);
|
||||
|
@ -15343,9 +15296,6 @@
|
|||
{
|
||||
rtx op0, op1;
|
||||
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
op0 = gen_reg_rtx (XFmode);
|
||||
op1 = gen_reg_rtx (XFmode);
|
||||
|
||||
|
@ -15366,9 +15316,6 @@
|
|||
"TARGET_USE_FANCY_MATH_387
|
||||
&& flag_unsafe_math_optimizations"
|
||||
{
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
operands[3] = gen_reg_rtx (XFmode);
|
||||
})
|
||||
|
||||
|
@ -15383,9 +15330,6 @@
|
|||
{
|
||||
rtx op0, op1, op2;
|
||||
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
op0 = gen_reg_rtx (XFmode);
|
||||
op1 = gen_reg_rtx (XFmode);
|
||||
op2 = gen_reg_rtx (XFmode);
|
||||
|
@ -15463,8 +15407,6 @@
|
|||
if (TARGET_ROUND)
|
||||
emit_insn (gen_sse4_1_round<mode>2
|
||||
(operands[0], operands[1], GEN_INT (ROUND_MXCSR)));
|
||||
else if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
else
|
||||
ix86_expand_rint (operands[0], operands[1]);
|
||||
}
|
||||
|
@ -15491,9 +15433,6 @@
|
|||
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
|
||||
&& !flag_trapping_math && !flag_rounding_math)"
|
||||
{
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
|
||||
&& !flag_trapping_math && !flag_rounding_math)
|
||||
{
|
||||
|
@ -15747,8 +15686,7 @@
|
|||
FRNDINT_ROUNDING))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"TARGET_USE_FANCY_MATH_387
|
||||
&& flag_unsafe_math_optimizations
|
||||
&& !optimize_insn_for_size_p ()")
|
||||
&& flag_unsafe_math_optimizations")
|
||||
|
||||
(define_expand "<rounding_insn><mode>2"
|
||||
[(parallel [(set (match_operand:MODEF 0 "register_operand")
|
||||
|
@ -15768,8 +15706,6 @@
|
|||
if (TARGET_ROUND)
|
||||
emit_insn (gen_sse4_1_round<mode>2
|
||||
(operands[0], operands[1], GEN_INT (ROUND_<ROUNDING>)));
|
||||
else if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
else if (TARGET_64BIT || (<MODE>mode != DFmode))
|
||||
{
|
||||
if (ROUND_<ROUNDING> == ROUND_FLOOR)
|
||||
|
@ -15797,9 +15733,6 @@
|
|||
{
|
||||
rtx op0, op1;
|
||||
|
||||
if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
|
||||
op0 = gen_reg_rtx (XFmode);
|
||||
op1 = gen_reg_rtx (XFmode);
|
||||
emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
|
||||
|
|
|
@ -200,6 +200,18 @@ enum node_frequency {
|
|||
NODE_FREQUENCY_HOT
|
||||
};
|
||||
|
||||
/* Ways of optimizing code. */
|
||||
enum optimization_type {
|
||||
/* Prioritize speed over size. */
|
||||
OPTIMIZE_FOR_SPEED,
|
||||
|
||||
/* Only do things that are good for both size and speed. */
|
||||
OPTIMIZE_FOR_BOTH,
|
||||
|
||||
/* Prioritize size over speed. */
|
||||
OPTIMIZE_FOR_SIZE
|
||||
};
|
||||
|
||||
/* Possible initialization status of a variable. When requested
|
||||
by the user, this information is tracked and recorded in the DWARF
|
||||
debug information, along with the variable's location. */
|
||||
|
|
|
@ -6425,6 +6425,20 @@ Define this macro if a non-short-circuit operation produced by
|
|||
@code{BRANCH_COST} is greater than or equal to the value 2.
|
||||
@end defmac
|
||||
|
||||
@deftypefn {Target Hook} bool TARGET_OPTAB_SUPPORTED_P (int @var{op}, machine_mode @var{mode1}, machine_mode @var{mode2}, optimization_type @var{opt_type})
|
||||
Return true if the optimizers should use optab @var{op} with
|
||||
modes @var{mode1} and @var{mode2} for optimization type @var{opt_type}.
|
||||
The optab is known to have an associated @file{.md} instruction
|
||||
whose C condition is true. @var{mode2} is only meaningful for conversion
|
||||
optabs; for direct optabs it is a copy of @var{mode1}.
|
||||
|
||||
For example, when called with @var{op} equal to @code{rint_optab} and
|
||||
@var{mode1} equal to @code{DFmode}, the hook should say whether the
|
||||
optimizers should use optab @code{rintdf2}.
|
||||
|
||||
The default hook returns true for all inputs.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} bool TARGET_RTX_COSTS (rtx @var{x}, machine_mode @var{mode}, int @var{outer_code}, int @var{opno}, int *@var{total}, bool @var{speed})
|
||||
This target hook describes the relative costs of RTL expressions.
|
||||
|
||||
|
|
|
@ -4746,6 +4746,8 @@ Define this macro if a non-short-circuit operation produced by
|
|||
@code{BRANCH_COST} is greater than or equal to the value 2.
|
||||
@end defmac
|
||||
|
||||
@hook TARGET_OPTAB_SUPPORTED_P
|
||||
|
||||
@hook TARGET_RTX_COSTS
|
||||
|
||||
@hook TARGET_ADDRESS_COST
|
||||
|
|
|
@ -261,7 +261,7 @@ build_call_internal (internal_fn fn, tree type, unsigned int nargs, tree *ops)
|
|||
if (direct_internal_fn_p (fn))
|
||||
{
|
||||
tree_pair types = direct_internal_fn_types (fn, type, ops);
|
||||
if (!direct_internal_fn_supported_p (fn, types))
|
||||
if (!direct_internal_fn_supported_p (fn, types, OPTIMIZE_FOR_BOTH))
|
||||
return NULL;
|
||||
}
|
||||
return gimple_build_call_internal (fn, nargs, ops[0], ops[1], ops[2]);
|
||||
|
|
|
@ -2214,23 +2214,30 @@ direct_internal_fn_types (internal_fn fn, gcall *call)
|
|||
}
|
||||
|
||||
/* Return true if OPTAB is supported for TYPES (whose modes should be
|
||||
the same). Used for simple direct optabs. */
|
||||
the same) when the optimization type is OPT_TYPE. Used for simple
|
||||
direct optabs. */
|
||||
|
||||
static bool
|
||||
direct_optab_supported_p (direct_optab optab, tree_pair types)
|
||||
direct_optab_supported_p (direct_optab optab, tree_pair types,
|
||||
optimization_type opt_type)
|
||||
{
|
||||
machine_mode mode = TYPE_MODE (types.first);
|
||||
gcc_checking_assert (mode == TYPE_MODE (types.second));
|
||||
return direct_optab_handler (optab, mode) != CODE_FOR_nothing;
|
||||
return direct_optab_handler (optab, mode, opt_type) != CODE_FOR_nothing;
|
||||
}
|
||||
|
||||
/* Return true if load/store lanes optab OPTAB is supported for
|
||||
array type TYPES.first. */
|
||||
array type TYPES.first when the optimization type is OPT_TYPE. */
|
||||
|
||||
static bool
|
||||
multi_vector_optab_supported_p (convert_optab optab, tree_pair types)
|
||||
multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
|
||||
optimization_type opt_type)
|
||||
{
|
||||
return get_multi_vector_move (types.first, optab) != CODE_FOR_nothing;
|
||||
gcc_assert (TREE_CODE (types.first) == ARRAY_TYPE);
|
||||
machine_mode imode = TYPE_MODE (types.first);
|
||||
machine_mode vmode = TYPE_MODE (TREE_TYPE (types.first));
|
||||
return (convert_optab_handler (optab, imode, vmode, opt_type)
|
||||
!= CODE_FOR_nothing);
|
||||
}
|
||||
|
||||
#define direct_unary_optab_supported_p direct_optab_supported_p
|
||||
|
@ -2240,12 +2247,14 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types)
|
|||
#define direct_mask_store_optab_supported_p direct_optab_supported_p
|
||||
#define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
|
||||
|
||||
/* Return true if FN is supported for the types in TYPES. The types
|
||||
are those associated with the "type0" and "type1" fields of FN's
|
||||
direct_internal_fn_info structure. */
|
||||
/* Return true if FN is supported for the types in TYPES when the
|
||||
optimization type is OPT_TYPE. The types are those associated with
|
||||
the "type0" and "type1" fields of FN's direct_internal_fn_info
|
||||
structure. */
|
||||
|
||||
bool
|
||||
direct_internal_fn_supported_p (internal_fn fn, tree_pair types)
|
||||
direct_internal_fn_supported_p (internal_fn fn, tree_pair types,
|
||||
optimization_type opt_type)
|
||||
{
|
||||
switch (fn)
|
||||
{
|
||||
|
@ -2253,7 +2262,8 @@ direct_internal_fn_supported_p (internal_fn fn, tree_pair types)
|
|||
case IFN_##CODE: break;
|
||||
#define DEF_INTERNAL_OPTAB_FN(CODE, FLAGS, OPTAB, TYPE) \
|
||||
case IFN_##CODE: \
|
||||
return direct_##TYPE##_optab_supported_p (OPTAB##_optab, types);
|
||||
return direct_##TYPE##_optab_supported_p (OPTAB##_optab, types, \
|
||||
opt_type);
|
||||
#include "internal-fn.def"
|
||||
|
||||
case IFN_LAST:
|
||||
|
@ -2262,16 +2272,17 @@ direct_internal_fn_supported_p (internal_fn fn, tree_pair types)
|
|||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Return true if FN is supported for type TYPE. The caller knows that
|
||||
the "type0" and "type1" fields of FN's direct_internal_fn_info
|
||||
structure are the same. */
|
||||
/* Return true if FN is supported for type TYPE when the optimization
|
||||
type is OPT_TYPE. The caller knows that the "type0" and "type1"
|
||||
fields of FN's direct_internal_fn_info structure are the same. */
|
||||
|
||||
bool
|
||||
direct_internal_fn_supported_p (internal_fn fn, tree type)
|
||||
direct_internal_fn_supported_p (internal_fn fn, tree type,
|
||||
optimization_type opt_type)
|
||||
{
|
||||
const direct_internal_fn_info &info = direct_internal_fn (fn);
|
||||
gcc_checking_assert (info.type0 == info.type1);
|
||||
return direct_internal_fn_supported_p (fn, tree_pair (type, type));
|
||||
return direct_internal_fn_supported_p (fn, tree_pair (type, type), opt_type);
|
||||
}
|
||||
|
||||
/* Return true if IFN_SET_EDOM is supported. */
|
||||
|
|
|
@ -166,8 +166,10 @@ direct_internal_fn (internal_fn fn)
|
|||
|
||||
extern tree_pair direct_internal_fn_types (internal_fn, tree, tree *);
|
||||
extern tree_pair direct_internal_fn_types (internal_fn, gcall *);
|
||||
extern bool direct_internal_fn_supported_p (internal_fn, tree_pair);
|
||||
extern bool direct_internal_fn_supported_p (internal_fn, tree);
|
||||
extern bool direct_internal_fn_supported_p (internal_fn, tree_pair,
|
||||
optimization_type);
|
||||
extern bool direct_internal_fn_supported_p (internal_fn, tree,
|
||||
optimization_type);
|
||||
extern bool set_edom_supported_p (void);
|
||||
|
||||
extern void expand_internal_call (gcall *);
|
||||
|
|
|
@ -35,6 +35,36 @@ struct target_optabs *this_fn_optabs = &default_target_optabs;
|
|||
struct target_optabs *this_target_optabs = &default_target_optabs;
|
||||
#endif
|
||||
|
||||
/* Return the insn used to perform conversion OP from mode FROM_MODE
|
||||
to mode TO_MODE; return CODE_FOR_nothing if the target does not have
|
||||
such an insn, or if it is unsuitable for optimization type OPT_TYPE. */
|
||||
|
||||
insn_code
|
||||
convert_optab_handler (convert_optab optab, machine_mode to_mode,
|
||||
machine_mode from_mode, optimization_type opt_type)
|
||||
{
|
||||
insn_code icode = convert_optab_handler (optab, to_mode, from_mode);
|
||||
if (icode == CODE_FOR_nothing
|
||||
|| !targetm.optab_supported_p (optab, to_mode, from_mode, opt_type))
|
||||
return CODE_FOR_nothing;
|
||||
return icode;
|
||||
}
|
||||
|
||||
/* Return the insn used to implement mode MODE of OP; return
|
||||
CODE_FOR_nothing if the target does not have such an insn,
|
||||
or if it is unsuitable for optimization type OPT_TYPE. */
|
||||
|
||||
insn_code
|
||||
direct_optab_handler (convert_optab optab, machine_mode mode,
|
||||
optimization_type opt_type)
|
||||
{
|
||||
insn_code icode = direct_optab_handler (optab, mode);
|
||||
if (icode == CODE_FOR_nothing
|
||||
|| !targetm.optab_supported_p (optab, mode, mode, opt_type))
|
||||
return CODE_FOR_nothing;
|
||||
return icode;
|
||||
}
|
||||
|
||||
/* Enumerates the possible types of structure operand to an
|
||||
extraction_insn. */
|
||||
enum extraction_type { ET_unaligned_mem, ET_reg };
|
||||
|
|
|
@ -46,6 +46,9 @@ convert_optab_handler (convert_optab op, machine_mode to_mode,
|
|||
return raw_optab_handler (scode);
|
||||
}
|
||||
|
||||
enum insn_code convert_optab_handler (convert_optab, machine_mode,
|
||||
machine_mode, optimization_type);
|
||||
|
||||
/* Return the insn used to implement mode MODE of OP, or CODE_FOR_nothing
|
||||
if the target does not have such an insn. */
|
||||
|
||||
|
@ -55,6 +58,9 @@ direct_optab_handler (direct_optab op, machine_mode mode)
|
|||
return optab_handler (op, mode);
|
||||
}
|
||||
|
||||
enum insn_code direct_optab_handler (convert_optab, machine_mode,
|
||||
optimization_type);
|
||||
|
||||
/* Return true if UNOPTAB is for a trapping-on-overflow operation. */
|
||||
|
||||
inline bool
|
||||
|
|
|
@ -269,6 +269,16 @@ optimize_function_for_speed_p (struct function *fun)
|
|||
return !optimize_function_for_size_p (fun);
|
||||
}
|
||||
|
||||
/* Return the optimization type that should be used for the function FUN. */
|
||||
|
||||
optimization_type
|
||||
function_optimization_type (struct function *fun)
|
||||
{
|
||||
return (optimize_function_for_speed_p (fun)
|
||||
? OPTIMIZE_FOR_SPEED
|
||||
: OPTIMIZE_FOR_SIZE);
|
||||
}
|
||||
|
||||
/* Return TRUE when BB should be optimized for size. */
|
||||
|
||||
bool
|
||||
|
@ -286,6 +296,16 @@ optimize_bb_for_speed_p (const_basic_block bb)
|
|||
return !optimize_bb_for_size_p (bb);
|
||||
}
|
||||
|
||||
/* Return the optimization type that should be used for block BB. */
|
||||
|
||||
optimization_type
|
||||
bb_optimization_type (const_basic_block bb)
|
||||
{
|
||||
return (optimize_bb_for_speed_p (bb)
|
||||
? OPTIMIZE_FOR_SPEED
|
||||
: OPTIMIZE_FOR_SIZE);
|
||||
}
|
||||
|
||||
/* Return TRUE when BB should be optimized for size. */
|
||||
|
||||
bool
|
||||
|
|
|
@ -54,8 +54,10 @@ extern bool probably_never_executed_bb_p (struct function *, const_basic_block);
|
|||
extern bool probably_never_executed_edge_p (struct function *, edge);
|
||||
extern bool optimize_function_for_size_p (struct function *);
|
||||
extern bool optimize_function_for_speed_p (struct function *);
|
||||
extern optimization_type function_optimization_type (struct function *);
|
||||
extern bool optimize_bb_for_size_p (const_basic_block);
|
||||
extern bool optimize_bb_for_speed_p (const_basic_block);
|
||||
extern optimization_type bb_optimization_type (const_basic_block);
|
||||
extern bool optimize_edge_for_size_p (edge);
|
||||
extern bool optimize_edge_for_speed_p (edge);
|
||||
extern bool optimize_insn_for_size_p (void);
|
||||
|
|
|
@ -3434,6 +3434,23 @@ move would be greater than that of a library call.",
|
|||
enum by_pieces_operation op, bool speed_p),
|
||||
default_use_by_pieces_infrastructure_p)
|
||||
|
||||
DEFHOOK
|
||||
(optab_supported_p,
|
||||
"Return true if the optimizers should use optab @var{op} with\n\
|
||||
modes @var{mode1} and @var{mode2} for optimization type @var{opt_type}.\n\
|
||||
The optab is known to have an associated @file{.md} instruction\n\
|
||||
whose C condition is true. @var{mode2} is only meaningful for conversion\n\
|
||||
optabs; for direct optabs it is a copy of @var{mode1}.\n\
|
||||
\n\
|
||||
For example, when called with @var{op} equal to @code{rint_optab} and\n\
|
||||
@var{mode1} equal to @code{DFmode}, the hook should say whether the\n\
|
||||
optimizers should use optab @code{rintdf2}.\n\
|
||||
\n\
|
||||
The default hook returns true for all inputs.",
|
||||
bool, (int op, machine_mode mode1, machine_mode mode2,
|
||||
optimization_type opt_type),
|
||||
default_optab_supported_p)
|
||||
|
||||
/* True for MODE if the target expects that registers in this mode will
|
||||
be allocated to registers in a small register class. The compiler is
|
||||
allowed to use registers explicitly used in the rtl as spill registers
|
||||
|
|
|
@ -1953,4 +1953,12 @@ can_use_doloop_if_innermost (const widest_int &, const widest_int &,
|
|||
return loop_depth == 1;
|
||||
}
|
||||
|
||||
/* Default implementation of TARGET_OPTAB_SUPPORTED_P. */
|
||||
|
||||
bool
|
||||
default_optab_supported_p (int, machine_mode, machine_mode, optimization_type)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
#include "gt-targhooks.h"
|
||||
|
|
|
@ -250,4 +250,7 @@ extern void default_setup_incoming_vararg_bounds (cumulative_args_t ca ATTRIBUTE
|
|||
tree type ATTRIBUTE_UNUSED,
|
||||
int *pretend_arg_size ATTRIBUTE_UNUSED,
|
||||
int second_time ATTRIBUTE_UNUSED);
|
||||
extern bool default_optab_supported_p (int, machine_mode, machine_mode,
|
||||
optimization_type);
|
||||
|
||||
#endif /* GCC_TARGHOOKS_H */
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
2015-12-02 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* gcc.target/i386/pr68432-1.c: New test.
|
||||
* gcc.target/i386/pr68432-2.c: Likewise.
|
||||
* gcc.target/i386/pr68432-3.c: Likewise.
|
||||
|
||||
2015-12-02 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
|
||||
|
||||
* gcc.target/s390/zvector/vec-splat-2.c: New test.
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fno-math-errno -fno-trapping-math -msse2 -mfpmath=sse" } */
|
||||
|
||||
float
|
||||
f1 (float f)
|
||||
{
|
||||
return __builtin_rintf (f);
|
||||
}
|
||||
|
||||
double
|
||||
f2 (double f)
|
||||
{
|
||||
return __builtin_rint (f);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "\tucomiss\t" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "\tucomisd\t" 1 } } */
|
|
@ -0,0 +1,17 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-Os -fno-math-errno -fno-trapping-math -msse2 -mfpmath=sse" } */
|
||||
|
||||
float
|
||||
f1 (float f)
|
||||
{
|
||||
return __builtin_rintf (f);
|
||||
}
|
||||
|
||||
double
|
||||
f2 (double f)
|
||||
{
|
||||
return __builtin_rint (f);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "\tucomiss\t" } } */
|
||||
/* { dg-final { scan-assembler-not "\tucomisd\t" } } */
|
|
@ -0,0 +1,17 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fno-math-errno -fno-trapping-math -msse2 -mfpmath=sse" } */
|
||||
|
||||
float __attribute__ ((cold))
|
||||
f1 (float f)
|
||||
{
|
||||
return __builtin_rintf (f);
|
||||
}
|
||||
|
||||
double __attribute__ ((cold))
|
||||
f2 (double f)
|
||||
{
|
||||
return __builtin_rint (f);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "\tucomiss\t" } } */
|
||||
/* { dg-final { scan-assembler-not "\tucomisd\t" } } */
|
|
@ -1056,7 +1056,9 @@ vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
|
|||
&& real_equal (&TREE_REAL_CST (exp), &dconsthalf))
|
||||
{
|
||||
*type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
|
||||
if (*type_in && direct_internal_fn_supported_p (IFN_SQRT, *type_in))
|
||||
if (*type_in
|
||||
&& direct_internal_fn_supported_p (IFN_SQRT, *type_in,
|
||||
OPTIMIZE_FOR_SPEED))
|
||||
{
|
||||
gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
|
||||
var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
|
||||
|
|
|
@ -1681,7 +1681,8 @@ vectorizable_internal_function (combined_fn cfn, tree fndecl,
|
|||
{
|
||||
tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
|
||||
tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
|
||||
if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1)))
|
||||
if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
|
||||
OPTIMIZE_FOR_SPEED))
|
||||
return ifn;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11118,7 +11118,8 @@ maybe_build_call_expr_loc (location_t loc, combined_fn fn, tree type,
|
|||
if (direct_internal_fn_p (ifn))
|
||||
{
|
||||
tree_pair types = direct_internal_fn_types (ifn, type, argarray);
|
||||
if (!direct_internal_fn_supported_p (ifn, types))
|
||||
if (!direct_internal_fn_supported_p (ifn, types,
|
||||
OPTIMIZE_FOR_BOTH))
|
||||
return NULL_TREE;
|
||||
}
|
||||
return build_call_expr_internal_loc_array (loc, ifn, type, n, argarray);
|
||||
|
|
Loading…
Reference in New Issue