tree.def (FMA_EXPR): New tree code.

2010-11-04  Richard Guenther  <rguenther@suse.de>
	Richard Henderson  <rth@redhat.com>

	* tree.def (FMA_EXPR): New tree code.
	* expr.c (expand_expr_real_2): Add FMA_EXPR expansion code.
	* gimple.c (gimple_rhs_class_table): FMA_EXPR is a GIMPLE_TERNARY_RHS.
	* tree-cfg.c (verify_gimple_assign_ternary): Verify FMA_EXPR types.
	* tree-inline.c (estimate_operator_cost): Handle FMA_EXPR.
	* gimple-pretty-print.c (dump_ternary_rhs): Likewise.
	* tree-ssa-math-opts.c (convert_mult_to_fma): New function.
	(execute_optimize_widening_mul): Call it.  Reorganize to allow
	dead stmt removal.  Move TODO flags ...
	(pass_optimize_widening_mul): ... here.
	* flag-types.h (enum fp_contract_mode): New enum.
	* common.opt (flag_fp_contract_mode): New variable.
	(-ffp-contract): New option.
	* opts.c (common_handle_option): Handle it.
	* doc/invoke.texi (-ffp-contract): Document.
	* tree.h (fold_fma): Declare.
	* builtins.c (fold_fma): New function.
	(fold_builtin_fma): Likewise.
	(fold_builtin_3): Call it for fma.
	* fold-const.c (fold_ternary_loc): Fold FMA_EXPR.
	* optabs.c (optab_for_tree_code): Handle FMA_EXPR.
	* config/i386/sse.md (fms<mode>4, fnma<mode>, fnms<mode>4):
	New expanders.
	* doc/md.texi (fms<mode>4, fnma<mode>, fnms<mode>4): Document new
	named patterns.
	* genopinit.c (optabs): Initialize fms_optab, fnma_optab and fnms_optab.
	* optabs.h (enum optab_index): Add OTI_fms, OTI_fnma and OTI_fnms.
	(fms_optab, fnma_optab, fnms_optab): New defines.
	* gimplify.c (gimplify_expr): Handle binary truth expressions
	explicitly.  Handle FMA_EXPR.
	* tree-vect-stmts.c (vectorizable_operation): Handle ternary
	operations.

	* gcc.target/i386/fma4-vector-2.c: New testcase.

Co-Authored-By: Richard Henderson <rth@redhat.com>

From-SVN: r166304
This commit is contained in:
Richard Guenther 2010-11-04 10:56:22 +00:00 committed by Richard Biener
parent 47853c7300
commit 1694907238
24 changed files with 524 additions and 42 deletions

View File

@ -1,3 +1,39 @@
2010-11-04 Richard Guenther <rguenther@suse.de>
Richard Henderson <rth@redhat.com>
* tree.def (FMA_EXPR): New tree code.
* expr.c (expand_expr_real_2): Add FMA_EXPR expansion code.
* gimple.c (gimple_rhs_class_table): FMA_EXPR is a GIMPLE_TERNARY_RHS.
* tree-cfg.c (verify_gimple_assign_ternary): Verify FMA_EXPR types.
* tree-inline.c (estimate_operator_cost): Handle FMA_EXPR.
* gimple-pretty-print.c (dump_ternary_rhs): Likewise.
* tree-ssa-math-opts.c (convert_mult_to_fma): New function.
(execute_optimize_widening_mul): Call it. Reorganize to allow
dead stmt removal. Move TODO flags ...
(pass_optimize_widening_mul): ... here.
* flag-types.h (enum fp_contract_mode): New enum.
* common.opt (flag_fp_contract_mode): New variable.
(-ffp-contract): New option.
* opts.c (common_handle_option): Handle it.
* doc/invoke.texi (-ffp-contract): Document.
* tree.h (fold_fma): Declare.
* builtins.c (fold_fma): New function.
(fold_builtin_fma): Likewise.
(fold_builtin_3): Call it for fma.
* fold-const.c (fold_ternary_loc): Fold FMA_EXPR.
* optabs.c (optab_for_tree_code): Handle FMA_EXPR.
* config/i386/sse.md (fms<mode>4, fnma<mode>, fnms<mode>4):
New expanders.
* doc/md.texi (fms<mode>4, fnma<mode>, fnms<mode>4): Document new
named patterns.
* genopinit.c (optabs): Initialize fms_optab, fnma_optab and fnms_optab.
* optabs.h (enum optab_index): Add OTI_fms, OTI_fnma and OTI_fnms.
(fms_optab, fnma_optab, fnms_optab): New defines.
* gimplify.c (gimplify_expr): Handle binary truth expressions
explicitly. Handle FMA_EXPR.
* tree-vect-stmts.c (vectorizable_operation): Handle ternary
operations.
2010-11-04 Artjoms Sinkarovs <artyom.shinakroff@gmail.com>
Richard Guenther <rguenther@suse.de>

View File

@ -9266,6 +9266,40 @@ fold_builtin_abs (location_t loc, tree arg, tree type)
return fold_build1_loc (loc, ABS_EXPR, type, arg);
}
/* Fold a fma operation with arguments ARG[012]. */
tree
fold_fma (location_t loc ATTRIBUTE_UNUSED,
tree type, tree arg0, tree arg1, tree arg2)
{
if (TREE_CODE (arg0) == REAL_CST
&& TREE_CODE (arg1) == REAL_CST
&& TREE_CODE (arg2) == REAL_CST)
return do_mpfr_arg3 (arg0, arg1, arg2, type, mpfr_fma);
return NULL_TREE;
}
/* Fold a call to fma, fmaf, or fmal with arguments ARG[012]. */
static tree
fold_builtin_fma (location_t loc, tree arg0, tree arg1, tree arg2, tree type)
{
if (validate_arg (arg0, REAL_TYPE)
&& validate_arg(arg1, REAL_TYPE)
&& validate_arg(arg2, REAL_TYPE))
{
tree tem = fold_fma (loc, type, arg0, arg1, arg2);
if (tem)
return tem;
/* ??? Only expand to FMA_EXPR if it's directly supported. */
if (optab_handler (fma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
return fold_build3_loc (loc, FMA_EXPR, type, arg0, arg1, arg2);
}
return NULL_TREE;
}
/* Fold a call to builtin fmin or fmax. */
static tree
@ -10540,10 +10574,7 @@ fold_builtin_3 (location_t loc, tree fndecl,
return fold_builtin_sincos (loc, arg0, arg1, arg2);
CASE_FLT_FN (BUILT_IN_FMA):
if (validate_arg (arg0, REAL_TYPE)
&& validate_arg(arg1, REAL_TYPE)
&& validate_arg(arg2, REAL_TYPE))
return do_mpfr_arg3 (arg0, arg1, arg2, type, mpfr_fma);
return fold_builtin_fma (loc, arg0, arg1, arg2, type);
break;
CASE_FLT_FN (BUILT_IN_REMQUO):

View File

@ -58,6 +58,10 @@ bool flag_warn_unused_result = false
Variable
int *param_values
; Floating-point contraction mode, fast by default.
Variable
enum fp_contract_mode flag_fp_contract_mode = FP_CONTRACT_FAST
###
Driver
@ -857,6 +861,10 @@ fforward-propagate
Common Report Var(flag_forward_propagate) Optimization
Perform a forward propagation pass on RTL
ffp-contract=
Common Joined RejectNegative
-ffp-contract=[off|on|fast] Perform floating-point expression contraction.
; Nonzero means don't put addresses of constant functions in registers.
; Used for compiling the Unix kernel, where strange substitutions are
; done on the assembly output.

View File

@ -1859,7 +1859,7 @@
;; Intrinsic FMA operations.
;; The standard name for fma is only available with SSE math enabled.
;; The standard names for fma is only available with SSE math enabled.
(define_expand "fma<mode>4"
[(set (match_operand:FMAMODE 0 "register_operand")
(fma:FMAMODE
@ -1869,6 +1869,33 @@
"(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
"")
(define_expand "fms<mode>4"
[(set (match_operand:FMAMODE 0 "register_operand")
(fma:FMAMODE
(match_operand:FMAMODE 1 "nonimmediate_operand")
(match_operand:FMAMODE 2 "nonimmediate_operand")
(neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
"(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
"")
(define_expand "fnma<mode>4"
[(set (match_operand:FMAMODE 0 "register_operand")
(fma:FMAMODE
(neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
(match_operand:FMAMODE 2 "nonimmediate_operand")
(match_operand:FMAMODE 3 "nonimmediate_operand")))]
"(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
"")
(define_expand "fnms<mode>4"
[(set (match_operand:FMAMODE 0 "register_operand")
(fma:FMAMODE
(neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
(match_operand:FMAMODE 2 "nonimmediate_operand")
(neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
"(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
"")
;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
(define_expand "fma4i_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand")

View File

@ -343,7 +343,7 @@ Objective-C and Objective-C++ Dialects}.
-fdelayed-branch -fdelete-null-pointer-checks -fdse -fdse @gol
-fearly-inlining -fipa-sra -fexpensive-optimizations -ffast-math @gol
-ffinite-math-only -ffloat-store -fexcess-precision=@var{style} @gol
-fforward-propagate -ffunction-sections @gol
-fforward-propagate -ffp-contract=@var{style} -ffunction-sections @gol
-fgcse -fgcse-after-reload -fgcse-las -fgcse-lm -fgraphite-identity @gol
-fgcse-sm -fif-conversion -fif-conversion2 -findirect-inlining @gol
-finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol
@ -5992,6 +5992,18 @@ loop unrolling.
This option is enabled by default at optimization levels @option{-O},
@option{-O2}, @option{-O3}, @option{-Os}.
@item -ffp-contract=@var{style}
@opindex ffp-contract
@option{-ffp-contract=off} disables floating-point expression contraction.
@option{-ffp-contract=fast} enables floating-point expression contraction
such as forming of fused multiply-add operations if the target has
native support for them.
@option{-ffp-contract=on} enables floating-point expression contraction
if allowed by the language standard. This is currently not implemented
and treated equal to @option{-ffp-contract=off}.
The default is @option{-ffp-contract=fast}.
@item -fomit-frame-pointer
@opindex fomit-frame-pointer
Don't keep the frame pointer in a register for functions that

View File

@ -3958,6 +3958,36 @@ pattern is used to implement the @code{fma}, @code{fmaf}, and
multiply followed by the add if the machine does not perform a
rounding step between the operations.
@cindex @code{fms@var{m}4} instruction pattern
@item @samp{fms@var{m}4}
Like @code{fma@var{m}4}, except operand 3 subtracted from the
product instead of added to the product. This is represented
in the rtl as
@smallexample
(fma:@var{m} @var{op1} @var{op2} (neg:@var{m} @var{op3}))
@end smallexample
@cindex @code{fnma@var{m}4} instruction pattern
@item @samp{fnma@var{m}4}
Like @code{fma@var{m}4} except that the intermediate product
is negated before being added to operand 3. This is represented
in the rtl as
@smallexample
(fma:@var{m} (neg:@var{m} @var{op1}) @var{op2} @var{op3})
@end smallexample
@cindex @code{fnms@var{m}4} instruction pattern
@item @samp{fnms@var{m}4}
Like @code{fms@var{m}4} except that the intermediate product
is negated before subtracting operand 3. This is represented
in the rtl as
@smallexample
(fma:@var{m} (neg:@var{m} @var{op1}) @var{op2} (neg:@var{m} @var{op3}))
@end smallexample
@cindex @code{min@var{m}3} instruction pattern
@cindex @code{max@var{m}3} instruction pattern
@item @samp{smin@var{m}3}, @samp{smax@var{m}3}

View File

@ -7254,7 +7254,7 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
int ignore;
bool reduce_bit_field;
location_t loc = ops->location;
tree treeop0, treeop1;
tree treeop0, treeop1, treeop2;
#define REDUCE_BIT_FIELD(expr) (reduce_bit_field \
? reduce_to_bit_field_precision ((expr), \
target, \
@ -7267,6 +7267,7 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
treeop0 = ops->op0;
treeop1 = ops->op1;
treeop2 = ops->op2;
/* We should be called only on simple (binary or unary) expressions,
exactly those that are valid in gimple expressions that aren't
@ -7624,7 +7625,7 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
case WIDEN_MULT_PLUS_EXPR:
case WIDEN_MULT_MINUS_EXPR:
expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
op2 = expand_normal (ops->op2);
op2 = expand_normal (treeop2);
target = expand_widen_pattern_expr (ops, op0, op1, op2,
target, unsignedp);
return target;
@ -7711,6 +7712,46 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp));
case FMA_EXPR:
{
optab opt = fma_optab;
gimple def0, def2;
def0 = get_def_for_expr (treeop0, NEGATE_EXPR);
def2 = get_def_for_expr (treeop2, NEGATE_EXPR);
op0 = op2 = NULL;
if (def0 && def2
&& optab_handler (fnms_optab, mode) != CODE_FOR_nothing)
{
opt = fnms_optab;
op0 = expand_normal (gimple_assign_rhs1 (def0));
op2 = expand_normal (gimple_assign_rhs1 (def2));
}
else if (def0
&& optab_handler (fnma_optab, mode) != CODE_FOR_nothing)
{
opt = fnma_optab;
op0 = expand_normal (gimple_assign_rhs1 (def0));
}
else if (def2
&& optab_handler (fms_optab, mode) != CODE_FOR_nothing)
{
opt = fms_optab;
op2 = expand_normal (gimple_assign_rhs1 (def2));
}
if (op0 == NULL)
op0 = expand_expr (treeop0, subtarget, VOIDmode, EXPAND_NORMAL);
if (op2 == NULL)
op2 = expand_normal (treeop2);
op1 = expand_normal (treeop1);
return expand_ternary_op (TYPE_MODE (type), opt,
op0, op1, op2, target, 0);
}
case MULT_EXPR:
/* If this is a fixed-point operation, then we cannot use the code
below because "expand_mult" doesn't support sat/no-sat fixed-point

View File

@ -152,4 +152,11 @@ enum warn_strict_overflow_code
WARN_STRICT_OVERFLOW_MAGNITUDE = 5
};
/* Floating-point contraction mode. */
enum fp_contract_mode {
FP_CONTRACT_OFF = 0,
FP_CONTRACT_ON = 1,
FP_CONTRACT_FAST = 2
};
#endif /* ! GCC_FLAG_TYPES_H */

View File

@ -13281,10 +13281,10 @@ contains_label_p (tree st)
tree
fold_ternary_loc (location_t loc, enum tree_code code, tree type,
tree op0, tree op1, tree op2)
tree op0, tree op1, tree op2)
{
tree tem;
tree arg0 = NULL_TREE, arg1 = NULL_TREE;
tree arg0 = NULL_TREE, arg1 = NULL_TREE, arg2 = NULL_TREE;
enum tree_code_class kind = TREE_CODE_CLASS (code);
gcc_assert (IS_EXPR_CODE_CLASS (kind)
@ -13312,6 +13312,12 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type,
STRIP_NOPS (arg1);
}
if (op2)
{
arg2 = op2;
STRIP_NOPS (arg2);
}
switch (code)
{
case COMPONENT_REF:
@ -13610,6 +13616,17 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type,
return NULL_TREE;
case FMA_EXPR:
/* For integers we can decompose the FMA if possible. */
if (TREE_CODE (arg0) == INTEGER_CST
&& TREE_CODE (arg1) == INTEGER_CST)
return fold_build2_loc (loc, PLUS_EXPR, type,
const_binop (MULT_EXPR, arg0, arg1), arg2);
if (integer_zerop (arg2))
return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1);
return fold_fma (loc, type, arg0, arg1, arg2);
default:
return NULL_TREE;
} /* switch (code) */

View File

@ -160,6 +160,9 @@ static const char * const optabs[] =
"set_optab_handler (floor_optab, $A, CODE_FOR_$(floor$a2$))",
"set_convert_optab_handler (lfloor_optab, $B, $A, CODE_FOR_$(lfloor$F$a$I$b2$))",
"set_optab_handler (fma_optab, $A, CODE_FOR_$(fma$a4$))",
"set_optab_handler (fms_optab, $A, CODE_FOR_$(fms$a4$))",
"set_optab_handler (fnma_optab, $A, CODE_FOR_$(fnma$a4$))",
"set_optab_handler (fnms_optab, $A, CODE_FOR_$(fnms$a4$))",
"set_optab_handler (ceil_optab, $A, CODE_FOR_$(ceil$a2$))",
"set_convert_optab_handler (lceil_optab, $B, $A, CODE_FOR_$(lceil$F$a$I$b2$))",
"set_optab_handler (round_optab, $A, CODE_FOR_$(round$a2$))",

View File

@ -400,6 +400,14 @@ dump_ternary_rhs (pretty_printer *buffer, gimple gs, int spc, int flags)
pp_character (buffer, '>');
break;
case FMA_EXPR:
dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false);
pp_string (buffer, " * ");
dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags, false);
pp_string (buffer, " + ");
dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false);
break;
default:
gcc_unreachable ();
}

View File

@ -2529,7 +2529,8 @@ get_gimple_rhs_num_ops (enum tree_code code)
|| (SYM) == TRUTH_XOR_EXPR) ? GIMPLE_BINARY_RHS \
: (SYM) == TRUTH_NOT_EXPR ? GIMPLE_UNARY_RHS \
: ((SYM) == WIDEN_MULT_PLUS_EXPR \
|| (SYM) == WIDEN_MULT_MINUS_EXPR) ? GIMPLE_TERNARY_RHS \
|| (SYM) == WIDEN_MULT_MINUS_EXPR \
|| (SYM) == FMA_EXPR) ? GIMPLE_TERNARY_RHS \
: ((SYM) == COND_EXPR \
|| (SYM) == CONSTRUCTOR \
|| (SYM) == OBJ_TYPE_REF \

View File

@ -7170,6 +7170,16 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
ret = gimplify_omp_atomic (expr_p, pre_p);
break;
case TRUTH_AND_EXPR:
case TRUTH_OR_EXPR:
case TRUTH_XOR_EXPR:
/* Classified as tcc_expression. */
goto expr_2;
case FMA_EXPR:
/* Classified as tcc_expression. */
goto expr_3;
case POINTER_PLUS_EXPR:
/* Convert ((type *)A)+offset into &A->field_of_type_and_offset.
The second is gimple immediate saving a need for extra statement.
@ -7249,16 +7259,28 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
break;
}
expr_3:
{
enum gimplify_status r0, r1, r2;
r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
post_p, is_gimple_val, fb_rvalue);
r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
post_p, is_gimple_val, fb_rvalue);
r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p,
post_p, is_gimple_val, fb_rvalue);
ret = MIN (MIN (r0, r1), r2);
break;
}
case tcc_declaration:
case tcc_constant:
ret = GS_ALL_DONE;
goto dont_recalculate;
default:
gcc_assert (TREE_CODE (*expr_p) == TRUTH_AND_EXPR
|| TREE_CODE (*expr_p) == TRUTH_OR_EXPR
|| TREE_CODE (*expr_p) == TRUTH_XOR_EXPR);
goto expr_2;
gcc_unreachable ();
}
recalculate_side_effects (*expr_p);

View File

@ -374,6 +374,9 @@ optab_for_tree_code (enum tree_code code, const_tree type,
: (TYPE_SATURATING (type)
? ssmsub_widen_optab : smsub_widen_optab));
case FMA_EXPR:
return fma_optab;
case REDUC_MAX_EXPR:
return TYPE_UNSIGNED (type) ? reduc_umax_optab : reduc_smax_optab;

View File

@ -192,6 +192,9 @@ enum optab_index
OTI_atan2,
/* Floating multiply/add */
OTI_fma,
OTI_fms,
OTI_fnma,
OTI_fnms,
/* Move instruction. */
OTI_mov,
@ -435,6 +438,9 @@ enum optab_index
#define pow_optab (&optab_table[OTI_pow])
#define atan2_optab (&optab_table[OTI_atan2])
#define fma_optab (&optab_table[OTI_fma])
#define fms_optab (&optab_table[OTI_fms])
#define fnma_optab (&optab_table[OTI_fnma])
#define fnms_optab (&optab_table[OTI_fnms])
#define mov_optab (&optab_table[OTI_mov])
#define movstrict_optab (&optab_table[OTI_movstrict])

View File

@ -1901,6 +1901,18 @@ common_handle_option (struct gcc_options *opts,
return false;
break;
case OPT_ffp_contract_:
if (!strcmp (arg, "on"))
/* Not implemented, fall back to conservative FP_CONTRACT_OFF. */
flag_fp_contract_mode = FP_CONTRACT_OFF;
else if (!strcmp (arg, "off"))
flag_fp_contract_mode = FP_CONTRACT_OFF;
else if (!strcmp (arg, "fast"))
flag_fp_contract_mode = FP_CONTRACT_FAST;
else
error ("unknown floating point contraction style \"%s\"", arg);
break;
case OPT_fexcess_precision_:
if (!strcmp (arg, "fast"))
flag_excess_precision_cmdline = EXCESS_PRECISION_FAST;

View File

@ -1,3 +1,8 @@
2010-11-04 Richard Guenther <rguenther@suse.de>
Richard Henderson <rth@redhat.com>
* gcc.target/i386/fma4-vector-2.c: New testcase.
2010-11-04 Artjoms Sinkarovs <artyom.shinakroff@gmail.com>
Richard Guenther <rguenther@suse.de>

View File

@ -0,0 +1,21 @@
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -mfma4 -ftree-vectorize -mtune=generic" } */
float r[256], s[256];
float x[256];
float y[256];
float z[256];
void foo (void)
{
int i;
for (i = 0; i < 256; ++i)
{
r[i] = x[i] * y[i] - z[i];
s[i] = x[i] * y[i] + z[i];
}
}
/* { dg-final { scan-assembler "vfmaddps" } } */
/* { dg-final { scan-assembler "vfmsubps" } } */

View File

@ -3655,6 +3655,20 @@ verify_gimple_assign_ternary (gimple stmt)
}
break;
case FMA_EXPR:
if (!useless_type_conversion_p (lhs_type, rhs1_type)
|| !useless_type_conversion_p (lhs_type, rhs2_type)
|| !useless_type_conversion_p (lhs_type, rhs3_type))
{
error ("type mismatch in fused multiply-add expression");
debug_generic_expr (lhs_type);
debug_generic_expr (rhs1_type);
debug_generic_expr (rhs2_type);
debug_generic_expr (rhs3_type);
return true;
}
break;
default:
gcc_unreachable ();
}

View File

@ -3283,6 +3283,7 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights,
case POINTER_PLUS_EXPR:
case MINUS_EXPR:
case MULT_EXPR:
case FMA_EXPR:
case ADDR_SPACE_CONVERT_EXPR:
case FIXED_CONVERT_EXPR:

View File

@ -1494,6 +1494,123 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
return true;
}
/* Combine the multiplication at MUL_STMT with uses in additions and
subtractions to form fused multiply-add operations. Returns true
if successful and MUL_STMT should be removed. */
static bool
convert_mult_to_fma (gimple mul_stmt)
{
tree mul_result = gimple_assign_lhs (mul_stmt);
tree type = TREE_TYPE (mul_result);
gimple use_stmt, fma_stmt;
use_operand_p use_p;
imm_use_iterator imm_iter;
if (FLOAT_TYPE_P (type)
&& flag_fp_contract_mode == FP_CONTRACT_OFF)
return false;
/* We don't want to do bitfield reduction ops. */
if (INTEGRAL_TYPE_P (type)
&& (TYPE_PRECISION (type)
!= GET_MODE_PRECISION (TYPE_MODE (type))))
return false;
/* If the target doesn't support it, don't generate it. We assume that
if fma isn't available then fms, fnma or fnms are not either. */
if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
return false;
/* Make sure that the multiplication statement becomes dead after
the transformation, thus that all uses are transformed to FMAs.
This means we assume that an FMA operation has the same cost
as an addition. */
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
{
enum tree_code use_code;
use_stmt = USE_STMT (use_p);
if (!is_gimple_assign (use_stmt))
return false;
use_code = gimple_assign_rhs_code (use_stmt);
/* ??? We need to handle NEGATE_EXPR to eventually form fnms. */
if (use_code != PLUS_EXPR
&& use_code != MINUS_EXPR)
return false;
/* For now restrict this operations to single basic blocks. In theory
we would want to support sinking the multiplication in
m = a*b;
if ()
ma = m + c;
else
d = m;
to form a fma in the then block and sink the multiplication to the
else block. */
if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
return false;
/* We can't handle a * b + a * b. */
if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
return false;
/* If the target doesn't support a * b - c then drop the ball. */
if (gimple_assign_rhs1 (use_stmt) == mul_result
&& use_code == MINUS_EXPR
&& optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
return false;
/* If the target doesn't support -a * b + c then drop the ball. */
if (gimple_assign_rhs2 (use_stmt) == mul_result
&& use_code == MINUS_EXPR
&& optab_handler (fnma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
return false;
/* We don't yet generate -a * b - c below yet. */
}
FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
{
tree addop, mulop1;
gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
mulop1 = gimple_assign_rhs1 (mul_stmt);
if (gimple_assign_rhs1 (use_stmt) == mul_result)
{
addop = gimple_assign_rhs2 (use_stmt);
/* a * b - c -> a * b + (-c) */
if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
addop = force_gimple_operand_gsi (&gsi,
build1 (NEGATE_EXPR,
type, addop),
true, NULL_TREE, true,
GSI_SAME_STMT);
}
else
{
addop = gimple_assign_rhs1 (use_stmt);
/* a - b * c -> (-b) * c + a */
if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
mulop1 = force_gimple_operand_gsi (&gsi,
build1 (NEGATE_EXPR,
type, mulop1),
true, NULL_TREE, true,
GSI_SAME_STMT);
}
fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR,
gimple_assign_lhs (use_stmt),
mulop1,
gimple_assign_rhs2 (mul_stmt),
addop);
gsi_replace (&gsi, fma_stmt, true);
}
return true;
}
/* Find integer multiplications where the operands are extended from
smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
where appropriate. */
@ -1501,31 +1618,45 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
static unsigned int
execute_optimize_widening_mul (void)
{
bool changed = false;
basic_block bb;
FOR_EACH_BB (bb)
{
gimple_stmt_iterator gsi;
for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
{
gimple stmt = gsi_stmt (gsi);
enum tree_code code;
if (!is_gimple_assign (stmt))
continue;
if (is_gimple_assign (stmt))
{
code = gimple_assign_rhs_code (stmt);
switch (code)
{
case MULT_EXPR:
if (!convert_mult_to_widen (stmt)
&& convert_mult_to_fma (stmt))
{
gsi_remove (&gsi, true);
release_defs (stmt);
continue;
}
break;
code = gimple_assign_rhs_code (stmt);
if (code == MULT_EXPR)
changed |= convert_mult_to_widen (stmt);
else if (code == PLUS_EXPR || code == MINUS_EXPR)
changed |= convert_plusminus_to_widen (&gsi, stmt, code);
case PLUS_EXPR:
case MINUS_EXPR:
convert_plusminus_to_widen (&gsi, stmt, code);
break;
default:;
}
}
gsi_next (&gsi);
}
}
return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
| TODO_verify_stmts : 0);
return 0;
}
static bool
@ -1549,6 +1680,9 @@ struct gimple_opt_pass pass_optimize_widening_mul =
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0 /* todo_flags_finish */
TODO_verify_ssa
| TODO_verify_stmts
| TODO_dump_func
| TODO_update_ssa /* todo_flags_finish */
}
};

View File

@ -2343,7 +2343,8 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
/* Function vectorizable_operation.
Check if STMT performs a binary or unary operation that can be vectorized.
Check if STMT performs a binary, unary or ternary operation that can
be vectorized.
If VEC_STMT is also passed, vectorize the STMT: create a vectorized
stmt to replace it, put it in VEC_STMT, and insert it at BSI.
Return FALSE if not a vectorizable STMT, TRUE otherwise. */
@ -2354,7 +2355,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
{
tree vec_dest;
tree scalar_dest;
tree op0, op1 = NULL;
tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
@ -2366,7 +2367,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
int icode;
tree def;
gimple def_stmt;
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
enum vect_def_type dt[3]
= {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
gimple new_stmt = NULL;
stmt_vec_info prev_stmt_info;
int nunits_in;
@ -2374,8 +2376,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
tree vectype_out;
int ncopies;
int j, i;
VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
tree vop0, vop1;
VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
tree vop0, vop1, vop2;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
int vf;
@ -2401,10 +2403,11 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
/* Support only unary or binary operations. */
op_type = TREE_CODE_LENGTH (code);
if (op_type != unary_op && op_type != binary_op)
if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
op_type);
return false;
}
@ -2441,7 +2444,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
if (nunits_out != nunits_in)
return false;
if (op_type == binary_op)
if (op_type == binary_op || op_type == ternary_op)
{
op1 = gimple_assign_rhs2 (stmt);
if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
@ -2452,6 +2455,17 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
return false;
}
}
if (op_type == ternary_op)
{
op2 = gimple_assign_rhs3 (stmt);
if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
&dt[2]))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "use not simple.");
return false;
}
}
if (loop_vinfo)
vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
@ -2473,7 +2487,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|| code == RROTATE_EXPR)
return false;
optab = optab_for_tree_code (code, vectype, optab_default);
optab = optab_for_tree_code (code, vectype, optab_default);
/* Supportable by target? */
if (!optab)
@ -2534,8 +2548,10 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
if (!slp_node)
{
vec_oprnds0 = VEC_alloc (tree, heap, 1);
if (op_type == binary_op)
if (op_type == binary_op || op_type == ternary_op)
vec_oprnds1 = VEC_alloc (tree, heap, 1);
if (op_type == ternary_op)
vec_oprnds2 = VEC_alloc (tree, heap, 1);
}
/* In case the vectorization factor (VF) is bigger than the number
@ -2597,22 +2613,40 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
/* Handle uses. */
if (j == 0)
{
if (op_type == binary_op)
if (op_type == binary_op || op_type == ternary_op)
vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
slp_node);
else
vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
slp_node);
if (op_type == ternary_op)
{
vec_oprnds2 = VEC_alloc (tree, heap, 1);
VEC_quick_push (tree, vec_oprnds2,
vect_get_vec_def_for_operand (op2, stmt, NULL));
}
}
else
vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
{
vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
if (op_type == ternary_op)
{
tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
VEC_quick_push (tree, vec_oprnds2,
vect_get_vec_def_for_stmt_copy (dt[2],
vec_oprnd));
}
}
/* Arguments are ready. Create the new vector stmt. */
FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
{
vop1 = ((op_type == binary_op)
? VEC_index (tree, vec_oprnds1, i) : NULL);
new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
vop1 = ((op_type == binary_op || op_type == ternary_op)
? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
vop2 = ((op_type == ternary_op)
? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
vop0, vop1, vop2);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
@ -2633,6 +2667,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
VEC_free (tree, heap, vec_oprnds0);
if (vec_oprnds1)
VEC_free (tree, heap, vec_oprnds1);
if (vec_oprnds2)
VEC_free (tree, heap, vec_oprnds2);
return true;
}

View File

@ -1092,6 +1092,12 @@ DEFTREECODE (WIDEN_MULT_PLUS_EXPR, "widen_mult_plus_expr", tcc_expression, 3)
is subtracted from t3. */
DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_plus_expr", tcc_expression, 3)
/* Fused multiply-add.
All operands and the result are of the same type. No intermediate
rounding is performed after multiplying operand one with operand two
before adding operand three. */
DEFTREECODE (FMA_EXPR, "fma_expr", tcc_expression, 3)
/* Whole vector left/right shift in bits.
Operand 0 is a vector to be shifted.
Operand 1 is an integer shift amount in bits. */

View File

@ -4954,6 +4954,7 @@ extern void fold_defer_overflow_warnings (void);
extern void fold_undefer_overflow_warnings (bool, const_gimple, int);
extern void fold_undefer_and_ignore_overflow_warnings (void);
extern bool fold_deferring_overflow_warnings_p (void);
extern tree fold_fma (location_t, tree, tree, tree, tree);
enum operand_equal_flag
{