Add IFN_COND_FMA functions
This patch adds conditional equivalents of the IFN_FMA built-in functions. Most of it is just a mechanical extension of the binary stuff. 2018-07-12 Richard Sandiford <richard.sandiford@linaro.org> gcc/ * doc/md.texi (cond_fma, cond_fms, cond_fnma, cond_fnms): Document. * optabs.def (cond_fma_optab, cond_fms_optab, cond_fnma_optab) (cond_fnms_optab): New optabs. * internal-fn.def (COND_FMA, COND_FMS, COND_FNMA, COND_FNMS): New internal functions. (FMA): Use DEF_INTERNAL_FLT_FN rather than DEF_INTERNAL_FLT_FLOATN_FN. * internal-fn.h (get_conditional_internal_fn): Declare. (get_unconditional_internal_fn): Likewise. * internal-fn.c (cond_ternary_direct): New macro. (expand_cond_ternary_optab_fn): Likewise. (direct_cond_ternary_optab_supported_p): Likewise. (FOR_EACH_COND_FN_PAIR): Likewise. (get_conditional_internal_fn): New function. (get_unconditional_internal_fn): Likewise. * gimple-match.h (gimple_match_op::MAX_NUM_OPS): Bump to 5. (gimple_match_op::gimple_match_op): Add a new overload for 5 operands. (gimple_match_op::set_op): Likewise. (gimple_resimplify5): Declare. * genmatch.c (decision_tree::gen): Generate simplifications for 5 operands. * gimple-match-head.c (gimple_simplify): Define an overload for 5 operands. Handle calls with 5 arguments in the top-level overload. (convert_conditional_op): Handle conversions from unconditional internal functions to conditional ones. (gimple_resimplify5): New function. (build_call_internal): Pass a fifth operand. (maybe_push_res_to_seq): Likewise. (try_conditional_simplification): Try converting conditional internal functions to unconditional internal functions. Handle 3-operand unconditional forms. * match.pd (UNCOND_TERNARY, COND_TERNARY): Operator lists. Define ternary equivalents of the current rules for binary conditional internal functions. * config/aarch64/aarch64.c (aarch64_preferred_else_value): Handle ternary operations. * config/aarch64/iterators.md (UNSPEC_COND_FMLA, UNSPEC_COND_FMLS) (UNSPEC_COND_FNMLA, UNSPEC_COND_FNMLS): New unspecs. (optab): Handle them. (SVE_COND_FP_TERNARY): New int iterator. (sve_fmla_op, sve_fmad_op): New int attributes. * config/aarch64/aarch64-sve.md (cond_<optab><mode>) (*cond_<optab><mode>_2, *cond_<optab><mode_4) (*cond_<optab><mode>_any): New SVE_COND_FP_TERNARY patterns. gcc/testsuite/ * gcc.dg/vect/vect-cond-arith-3.c: New test. * gcc.target/aarch64/sve/vcond_13.c: Likewise. * gcc.target/aarch64/sve/vcond_13_run.c: Likewise. * gcc.target/aarch64/sve/vcond_14.c: Likewise. * gcc.target/aarch64/sve/vcond_14_run.c: Likewise. * gcc.target/aarch64/sve/vcond_15.c: Likewise. * gcc.target/aarch64/sve/vcond_15_run.c: Likewise. * gcc.target/aarch64/sve/vcond_16.c: Likewise. * gcc.target/aarch64/sve/vcond_16_run.c: Likewise. From-SVN: r262587
This commit is contained in:
parent
6a86928d98
commit
b41d1f6ed7
|
@ -1,3 +1,50 @@
|
|||
2018-07-12 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* doc/md.texi (cond_fma, cond_fms, cond_fnma, cond_fnms): Document.
|
||||
* optabs.def (cond_fma_optab, cond_fms_optab, cond_fnma_optab)
|
||||
(cond_fnms_optab): New optabs.
|
||||
* internal-fn.def (COND_FMA, COND_FMS, COND_FNMA, COND_FNMS): New
|
||||
internal functions.
|
||||
(FMA): Use DEF_INTERNAL_FLT_FN rather than DEF_INTERNAL_FLT_FLOATN_FN.
|
||||
* internal-fn.h (get_conditional_internal_fn): Declare.
|
||||
(get_unconditional_internal_fn): Likewise.
|
||||
* internal-fn.c (cond_ternary_direct): New macro.
|
||||
(expand_cond_ternary_optab_fn): Likewise.
|
||||
(direct_cond_ternary_optab_supported_p): Likewise.
|
||||
(FOR_EACH_COND_FN_PAIR): Likewise.
|
||||
(get_conditional_internal_fn): New function.
|
||||
(get_unconditional_internal_fn): Likewise.
|
||||
* gimple-match.h (gimple_match_op::MAX_NUM_OPS): Bump to 5.
|
||||
(gimple_match_op::gimple_match_op): Add a new overload for 5
|
||||
operands.
|
||||
(gimple_match_op::set_op): Likewise.
|
||||
(gimple_resimplify5): Declare.
|
||||
* genmatch.c (decision_tree::gen): Generate simplifications for
|
||||
5 operands.
|
||||
* gimple-match-head.c (gimple_simplify): Define an overload for
|
||||
5 operands. Handle calls with 5 arguments in the top-level overload.
|
||||
(convert_conditional_op): Handle conversions from unconditional
|
||||
internal functions to conditional ones.
|
||||
(gimple_resimplify5): New function.
|
||||
(build_call_internal): Pass a fifth operand.
|
||||
(maybe_push_res_to_seq): Likewise.
|
||||
(try_conditional_simplification): Try converting conditional
|
||||
internal functions to unconditional internal functions.
|
||||
Handle 3-operand unconditional forms.
|
||||
* match.pd (UNCOND_TERNARY, COND_TERNARY): Operator lists.
|
||||
Define ternary equivalents of the current rules for binary conditional
|
||||
internal functions.
|
||||
* config/aarch64/aarch64.c (aarch64_preferred_else_value): Handle
|
||||
ternary operations.
|
||||
* config/aarch64/iterators.md (UNSPEC_COND_FMLA, UNSPEC_COND_FMLS)
|
||||
(UNSPEC_COND_FNMLA, UNSPEC_COND_FNMLS): New unspecs.
|
||||
(optab): Handle them.
|
||||
(SVE_COND_FP_TERNARY): New int iterator.
|
||||
(sve_fmla_op, sve_fmad_op): New int attributes.
|
||||
* config/aarch64/aarch64-sve.md (cond_<optab><mode>)
|
||||
(*cond_<optab><mode>_2, *cond_<optab><mode_4)
|
||||
(*cond_<optab><mode>_any): New SVE_COND_FP_TERNARY patterns.
|
||||
|
||||
2018-07-12 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* target.def (preferred_else_value): New target hook.
|
||||
|
|
|
@ -2906,6 +2906,101 @@
|
|||
UNSPEC_SEL))]
|
||||
)
|
||||
|
||||
;; Predicated floating-point ternary operations with select.
|
||||
(define_expand "cond_<optab><mode>"
|
||||
[(set (match_operand:SVE_F 0 "register_operand")
|
||||
(unspec:SVE_F
|
||||
[(match_operand:<VPRED> 1 "register_operand")
|
||||
(unspec:SVE_F
|
||||
[(match_operand:SVE_F 2 "register_operand")
|
||||
(match_operand:SVE_F 3 "register_operand")
|
||||
(match_operand:SVE_F 4 "register_operand")]
|
||||
SVE_COND_FP_TERNARY)
|
||||
(match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")]
|
||||
UNSPEC_SEL))]
|
||||
"TARGET_SVE"
|
||||
{
|
||||
/* Swap the multiplication operands if the fallback value is the
|
||||
second of the two. */
|
||||
if (rtx_equal_p (operands[3], operands[5]))
|
||||
std::swap (operands[2], operands[3]);
|
||||
})
|
||||
|
||||
;; Predicated floating-point ternary operations using the FMAD-like form.
|
||||
(define_insn "*cond_<optab><mode>_2"
|
||||
[(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
|
||||
(unspec:SVE_F
|
||||
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
|
||||
(unspec:SVE_F
|
||||
[(match_operand:SVE_F 2 "register_operand" "0, w")
|
||||
(match_operand:SVE_F 3 "register_operand" "w, w")
|
||||
(match_operand:SVE_F 4 "register_operand" "w, w")]
|
||||
SVE_COND_FP_TERNARY)
|
||||
(match_dup 2)]
|
||||
UNSPEC_SEL))]
|
||||
"TARGET_SVE"
|
||||
"@
|
||||
<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
|
||||
movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
|
||||
[(set_attr "movprfx" "*,yes")]
|
||||
)
|
||||
|
||||
;; Predicated floating-point ternary operations using the FMLA-like form.
|
||||
(define_insn "*cond_<optab><mode>_4"
|
||||
[(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
|
||||
(unspec:SVE_F
|
||||
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
|
||||
(unspec:SVE_F
|
||||
[(match_operand:SVE_F 2 "register_operand" "w, w")
|
||||
(match_operand:SVE_F 3 "register_operand" "w, w")
|
||||
(match_operand:SVE_F 4 "register_operand" "0, w")]
|
||||
SVE_COND_FP_TERNARY)
|
||||
(match_dup 4)]
|
||||
UNSPEC_SEL))]
|
||||
"TARGET_SVE"
|
||||
"@
|
||||
<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
|
||||
movprfx\t%0, %4\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
|
||||
[(set_attr "movprfx" "*,yes")]
|
||||
)
|
||||
|
||||
;; Predicated floating-point ternary operations in which the value for
|
||||
;; inactive lanes is distinct from the other inputs.
|
||||
(define_insn_and_split "*cond_<optab><mode>_any"
|
||||
[(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w")
|
||||
(unspec:SVE_F
|
||||
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
|
||||
(unspec:SVE_F
|
||||
[(match_operand:SVE_F 2 "register_operand" "w, w, w")
|
||||
(match_operand:SVE_F 3 "register_operand" "w, w, w")
|
||||
(match_operand:SVE_F 4 "register_operand" "w, w, w")]
|
||||
SVE_COND_FP_TERNARY)
|
||||
(match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
|
||||
UNSPEC_SEL))]
|
||||
"TARGET_SVE
|
||||
&& !rtx_equal_p (operands[2], operands[5])
|
||||
&& !rtx_equal_p (operands[3], operands[5])
|
||||
&& !rtx_equal_p (operands[4], operands[5])"
|
||||
"@
|
||||
movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
|
||||
movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
|
||||
#"
|
||||
"&& reload_completed
|
||||
&& !CONSTANT_P (operands[5])
|
||||
&& !rtx_equal_p (operands[0], operands[5])"
|
||||
[(set (match_dup 0)
|
||||
(unspec:SVE_F [(match_dup 1) (match_dup 4) (match_dup 5)] UNSPEC_SEL))
|
||||
(set (match_dup 0)
|
||||
(unspec:SVE_F
|
||||
[(match_dup 1)
|
||||
(unspec:SVE_F [(match_dup 2) (match_dup 3) (match_dup 0)]
|
||||
SVE_COND_FP_TERNARY)
|
||||
(match_dup 0)]
|
||||
UNSPEC_SEL))]
|
||||
""
|
||||
[(set_attr "movprfx" "yes")]
|
||||
)
|
||||
|
||||
;; Shift an SVE vector left and insert a scalar into element 0.
|
||||
(define_insn "vec_shl_insert_<mode>"
|
||||
[(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
|
||||
|
|
|
@ -1320,14 +1320,18 @@ aarch64_get_mask_mode (poly_uint64 nunits, poly_uint64 nbytes)
|
|||
return default_get_mask_mode (nunits, nbytes);
|
||||
}
|
||||
|
||||
/* Implement TARGET_PREFERRED_ELSE_VALUE. Prefer to use the first
|
||||
arithmetic operand as the else value if the else value doesn't matter,
|
||||
since that exactly matches the SVE destructive merging form. */
|
||||
/* Implement TARGET_PREFERRED_ELSE_VALUE. For binary operations,
|
||||
prefer to use the first arithmetic operand as the else value if
|
||||
the else value doesn't matter, since that exactly matches the SVE
|
||||
destructive merging form. For ternary operations we could either
|
||||
pick the first operand and use FMAD-like instructions or the last
|
||||
operand and use FMLA-like instructions; the latter seems more
|
||||
natural. */
|
||||
|
||||
static tree
|
||||
aarch64_preferred_else_value (unsigned, tree, unsigned int, tree *ops)
|
||||
aarch64_preferred_else_value (unsigned, tree, unsigned int nops, tree *ops)
|
||||
{
|
||||
return ops[0];
|
||||
return nops == 3 ? ops[2] : ops[0];
|
||||
}
|
||||
|
||||
/* Implement TARGET_HARD_REGNO_NREGS. */
|
||||
|
|
|
@ -471,6 +471,10 @@
|
|||
UNSPEC_COND_DIV ; Used in aarch64-sve.md.
|
||||
UNSPEC_COND_MAX ; Used in aarch64-sve.md.
|
||||
UNSPEC_COND_MIN ; Used in aarch64-sve.md.
|
||||
UNSPEC_COND_FMLA ; Used in aarch64-sve.md.
|
||||
UNSPEC_COND_FMLS ; Used in aarch64-sve.md.
|
||||
UNSPEC_COND_FNMLA ; Used in aarch64-sve.md.
|
||||
UNSPEC_COND_FNMLS ; Used in aarch64-sve.md.
|
||||
UNSPEC_COND_LT ; Used in aarch64-sve.md.
|
||||
UNSPEC_COND_LE ; Used in aarch64-sve.md.
|
||||
UNSPEC_COND_EQ ; Used in aarch64-sve.md.
|
||||
|
@ -1567,6 +1571,11 @@
|
|||
UNSPEC_COND_MUL UNSPEC_COND_DIV
|
||||
UNSPEC_COND_MAX UNSPEC_COND_MIN])
|
||||
|
||||
(define_int_iterator SVE_COND_FP_TERNARY [UNSPEC_COND_FMLA
|
||||
UNSPEC_COND_FMLS
|
||||
UNSPEC_COND_FNMLA
|
||||
UNSPEC_COND_FNMLS])
|
||||
|
||||
(define_int_iterator SVE_COND_FP_CMP [UNSPEC_COND_LT UNSPEC_COND_LE
|
||||
UNSPEC_COND_EQ UNSPEC_COND_NE
|
||||
UNSPEC_COND_GE UNSPEC_COND_GT])
|
||||
|
@ -1599,7 +1608,11 @@
|
|||
(UNSPEC_COND_MUL "mul")
|
||||
(UNSPEC_COND_DIV "div")
|
||||
(UNSPEC_COND_MAX "smax")
|
||||
(UNSPEC_COND_MIN "smin")])
|
||||
(UNSPEC_COND_MIN "smin")
|
||||
(UNSPEC_COND_FMLA "fma")
|
||||
(UNSPEC_COND_FMLS "fnma")
|
||||
(UNSPEC_COND_FNMLA "fnms")
|
||||
(UNSPEC_COND_FNMLS "fms")])
|
||||
|
||||
(define_int_attr maxmin_uns [(UNSPEC_UMAXV "umax")
|
||||
(UNSPEC_UMINV "umin")
|
||||
|
@ -1826,6 +1839,16 @@
|
|||
(UNSPEC_COND_MAX "fmaxnm")
|
||||
(UNSPEC_COND_MIN "fminnm")])
|
||||
|
||||
(define_int_attr sve_fmla_op [(UNSPEC_COND_FMLA "fmla")
|
||||
(UNSPEC_COND_FMLS "fmls")
|
||||
(UNSPEC_COND_FNMLA "fnmla")
|
||||
(UNSPEC_COND_FNMLS "fnmls")])
|
||||
|
||||
(define_int_attr sve_fmad_op [(UNSPEC_COND_FMLA "fmad")
|
||||
(UNSPEC_COND_FMLS "fmsb")
|
||||
(UNSPEC_COND_FNMLA "fnmad")
|
||||
(UNSPEC_COND_FNMLS "fnmsb")])
|
||||
|
||||
(define_int_attr commutative [(UNSPEC_COND_ADD "true")
|
||||
(UNSPEC_COND_SUB "false")
|
||||
(UNSPEC_COND_MUL "true")
|
||||
|
|
|
@ -6438,6 +6438,23 @@ Operands 0, 2, 3 and 4 all have mode @var{m}. Operand 1 is a scalar
|
|||
integer if @var{m} is scalar, otherwise it has the mode returned by
|
||||
@code{TARGET_VECTORIZE_GET_MASK_MODE}.
|
||||
|
||||
@cindex @code{cond_fma@var{mode}} instruction pattern
|
||||
@cindex @code{cond_fms@var{mode}} instruction pattern
|
||||
@cindex @code{cond_fnma@var{mode}} instruction pattern
|
||||
@cindex @code{cond_fnms@var{mode}} instruction pattern
|
||||
@item @samp{cond_fma@var{mode}}
|
||||
@itemx @samp{cond_fms@var{mode}}
|
||||
@itemx @samp{cond_fnma@var{mode}}
|
||||
@itemx @samp{cond_fnms@var{mode}}
|
||||
Like @samp{cond_add@var{m}}, except that the conditional operation
|
||||
takes 3 operands rather than two. For example, the vector form of
|
||||
@samp{cond_fma@var{mode}} is equivalent to:
|
||||
|
||||
@smallexample
|
||||
for (i = 0; i < GET_MODE_NUNITS (@var{m}); i++)
|
||||
op0[i] = op1[i] ? fma (op2[i], op3[i], op4[i]) : op5[i];
|
||||
@end smallexample
|
||||
|
||||
@cindex @code{neg@var{mode}cc} instruction pattern
|
||||
@item @samp{neg@var{mode}cc}
|
||||
Similar to @samp{mov@var{mode}cc} but for conditional negation. Conditionally
|
||||
|
|
|
@ -3750,7 +3750,7 @@ decision_tree::gen (FILE *f, bool gimple)
|
|||
}
|
||||
fprintf (stderr, "removed %u duplicate tails\n", rcnt);
|
||||
|
||||
for (unsigned n = 1; n <= 4; ++n)
|
||||
for (unsigned n = 1; n <= 5; ++n)
|
||||
{
|
||||
/* First generate split-out functions. */
|
||||
for (unsigned i = 0; i < root->kids.length (); i++)
|
||||
|
|
|
@ -54,6 +54,8 @@ static bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree),
|
|||
code_helper, tree, tree, tree, tree);
|
||||
static bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree),
|
||||
code_helper, tree, tree, tree, tree, tree);
|
||||
static bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree),
|
||||
code_helper, tree, tree, tree, tree, tree, tree);
|
||||
|
||||
const unsigned int gimple_match_op::MAX_NUM_OPS;
|
||||
|
||||
|
@ -80,7 +82,12 @@ convert_conditional_op (gimple_match_op *orig_op,
|
|||
if (orig_op->code.is_tree_code ())
|
||||
ifn = get_conditional_internal_fn ((tree_code) orig_op->code);
|
||||
else
|
||||
return false;
|
||||
{
|
||||
combined_fn cfn = orig_op->code;
|
||||
if (!internal_fn_p (cfn))
|
||||
return false;
|
||||
ifn = get_conditional_internal_fn (as_internal_fn (cfn));
|
||||
}
|
||||
if (ifn == IFN_LAST)
|
||||
return false;
|
||||
unsigned int num_ops = orig_op->num_ops;
|
||||
|
@ -403,6 +410,34 @@ gimple_resimplify4 (gimple_seq *seq, gimple_match_op *res_op,
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Helper that matches and simplifies the toplevel result from
|
||||
a gimple_simplify run (where we don't want to build
|
||||
a stmt in case it's used in in-place folding). Replaces
|
||||
RES_OP with a simplified and/or canonicalized result and
|
||||
returns whether any change was made. */
|
||||
|
||||
bool
|
||||
gimple_resimplify5 (gimple_seq *seq, gimple_match_op *res_op,
|
||||
tree (*valueize)(tree))
|
||||
{
|
||||
/* No constant folding is defined for five-operand functions. */
|
||||
|
||||
gimple_match_op res_op2 (*res_op);
|
||||
if (gimple_simplify (&res_op2, seq, valueize,
|
||||
res_op->code, res_op->type,
|
||||
res_op->ops[0], res_op->ops[1], res_op->ops[2],
|
||||
res_op->ops[3], res_op->ops[4]))
|
||||
{
|
||||
*res_op = res_op2;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (maybe_resimplify_conditional_op (seq, res_op, valueize))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* If in GIMPLE the operation described by RES_OP should be single-rhs,
|
||||
build a GENERIC tree for that expression and update RES_OP accordingly. */
|
||||
|
||||
|
@ -444,7 +479,8 @@ build_call_internal (internal_fn fn, gimple_match_op *res_op)
|
|||
res_op->op_or_null (0),
|
||||
res_op->op_or_null (1),
|
||||
res_op->op_or_null (2),
|
||||
res_op->op_or_null (3));
|
||||
res_op->op_or_null (3),
|
||||
res_op->op_or_null (4));
|
||||
}
|
||||
|
||||
/* Push the exploded expression described by RES_OP as a statement to
|
||||
|
@ -538,7 +574,8 @@ maybe_push_res_to_seq (gimple_match_op *res_op, gimple_seq *seq, tree res)
|
|||
res_op->op_or_null (0),
|
||||
res_op->op_or_null (1),
|
||||
res_op->op_or_null (2),
|
||||
res_op->op_or_null (3));
|
||||
res_op->op_or_null (3),
|
||||
res_op->op_or_null (4));
|
||||
}
|
||||
if (!res)
|
||||
{
|
||||
|
@ -745,14 +782,22 @@ static bool
|
|||
try_conditional_simplification (internal_fn ifn, gimple_match_op *res_op,
|
||||
gimple_seq *seq, tree (*valueize) (tree))
|
||||
{
|
||||
code_helper op;
|
||||
tree_code code = conditional_internal_fn_code (ifn);
|
||||
if (code == ERROR_MARK)
|
||||
return false;
|
||||
if (code != ERROR_MARK)
|
||||
op = code;
|
||||
else
|
||||
{
|
||||
ifn = get_unconditional_internal_fn (ifn);
|
||||
if (ifn == IFN_LAST)
|
||||
return false;
|
||||
op = as_combined_fn (ifn);
|
||||
}
|
||||
|
||||
unsigned int num_ops = res_op->num_ops;
|
||||
gimple_match_op cond_op (gimple_match_cond (res_op->ops[0],
|
||||
res_op->ops[num_ops - 1]),
|
||||
code, res_op->type, num_ops - 2);
|
||||
op, res_op->type, num_ops - 2);
|
||||
for (unsigned int i = 1; i < num_ops - 1; ++i)
|
||||
cond_op.ops[i - 1] = res_op->ops[i];
|
||||
switch (num_ops - 2)
|
||||
|
@ -761,6 +806,10 @@ try_conditional_simplification (internal_fn ifn, gimple_match_op *res_op,
|
|||
if (!gimple_resimplify2 (seq, &cond_op, valueize))
|
||||
return false;
|
||||
break;
|
||||
case 3:
|
||||
if (!gimple_resimplify3 (seq, &cond_op, valueize))
|
||||
return false;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
@ -893,7 +942,7 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
|
|||
/* ??? This way we can't simplify calls with side-effects. */
|
||||
if (gimple_call_lhs (stmt) != NULL_TREE
|
||||
&& gimple_call_num_args (stmt) >= 1
|
||||
&& gimple_call_num_args (stmt) <= 4)
|
||||
&& gimple_call_num_args (stmt) <= 5)
|
||||
{
|
||||
bool valueized = false;
|
||||
combined_fn cfn;
|
||||
|
@ -943,6 +992,9 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
|
|||
case 4:
|
||||
return (gimple_resimplify4 (seq, res_op, valueize)
|
||||
|| valueized);
|
||||
case 5:
|
||||
return (gimple_resimplify5 (seq, res_op, valueize)
|
||||
|| valueized);
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
|
|
@ -91,18 +91,21 @@ struct gimple_match_op
|
|||
code_helper, tree, tree, tree, tree);
|
||||
gimple_match_op (const gimple_match_cond &,
|
||||
code_helper, tree, tree, tree, tree, tree);
|
||||
gimple_match_op (const gimple_match_cond &,
|
||||
code_helper, tree, tree, tree, tree, tree, tree);
|
||||
|
||||
void set_op (code_helper, tree, unsigned int);
|
||||
void set_op (code_helper, tree, tree);
|
||||
void set_op (code_helper, tree, tree, tree);
|
||||
void set_op (code_helper, tree, tree, tree, tree);
|
||||
void set_op (code_helper, tree, tree, tree, tree, tree);
|
||||
void set_op (code_helper, tree, tree, tree, tree, tree, tree);
|
||||
void set_value (tree);
|
||||
|
||||
tree op_or_null (unsigned int) const;
|
||||
|
||||
/* The maximum value of NUM_OPS. */
|
||||
static const unsigned int MAX_NUM_OPS = 4;
|
||||
static const unsigned int MAX_NUM_OPS = 5;
|
||||
|
||||
/* The conditions under which the operation is performed, and the value to
|
||||
use as a fallback. */
|
||||
|
@ -182,6 +185,20 @@ gimple_match_op::gimple_match_op (const gimple_match_cond &cond_in,
|
|||
ops[3] = op3;
|
||||
}
|
||||
|
||||
inline
|
||||
gimple_match_op::gimple_match_op (const gimple_match_cond &cond_in,
|
||||
code_helper code_in, tree type_in,
|
||||
tree op0, tree op1, tree op2, tree op3,
|
||||
tree op4)
|
||||
: cond (cond_in), code (code_in), type (type_in), num_ops (5)
|
||||
{
|
||||
ops[0] = op0;
|
||||
ops[1] = op1;
|
||||
ops[2] = op2;
|
||||
ops[3] = op3;
|
||||
ops[4] = op4;
|
||||
}
|
||||
|
||||
/* Change the operation performed to CODE_IN, the type of the result to
|
||||
TYPE_IN, and the number of operands to NUM_OPS_IN. The caller needs
|
||||
to set the operands itself. */
|
||||
|
@ -242,6 +259,20 @@ gimple_match_op::set_op (code_helper code_in, tree type_in,
|
|||
ops[3] = op3;
|
||||
}
|
||||
|
||||
inline void
|
||||
gimple_match_op::set_op (code_helper code_in, tree type_in,
|
||||
tree op0, tree op1, tree op2, tree op3, tree op4)
|
||||
{
|
||||
code = code_in;
|
||||
type = type_in;
|
||||
num_ops = 5;
|
||||
ops[0] = op0;
|
||||
ops[1] = op1;
|
||||
ops[2] = op2;
|
||||
ops[3] = op3;
|
||||
ops[4] = op4;
|
||||
}
|
||||
|
||||
/* Set the "operation" to be the single value VALUE, such as a constant
|
||||
or SSA_NAME. */
|
||||
|
||||
|
@ -279,6 +310,7 @@ bool gimple_resimplify1 (gimple_seq *, gimple_match_op *, tree (*)(tree));
|
|||
bool gimple_resimplify2 (gimple_seq *, gimple_match_op *, tree (*)(tree));
|
||||
bool gimple_resimplify3 (gimple_seq *, gimple_match_op *, tree (*)(tree));
|
||||
bool gimple_resimplify4 (gimple_seq *, gimple_match_op *, tree (*)(tree));
|
||||
bool gimple_resimplify5 (gimple_seq *, gimple_match_op *, tree (*)(tree));
|
||||
tree maybe_push_res_to_seq (gimple_match_op *, gimple_seq *,
|
||||
tree res = NULL_TREE);
|
||||
void maybe_build_generic_op (gimple_match_op *);
|
||||
|
|
|
@ -113,6 +113,7 @@ init_internal_fns ()
|
|||
#define ternary_direct { 0, 0, true }
|
||||
#define cond_unary_direct { 1, 1, true }
|
||||
#define cond_binary_direct { 1, 1, true }
|
||||
#define cond_ternary_direct { 1, 1, true }
|
||||
#define while_direct { 0, 2, false }
|
||||
#define fold_extract_direct { 2, 2, false }
|
||||
#define fold_left_direct { 1, 1, false }
|
||||
|
@ -2993,6 +2994,9 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
|
|||
#define expand_cond_binary_optab_fn(FN, STMT, OPTAB) \
|
||||
expand_direct_optab_fn (FN, STMT, OPTAB, 4)
|
||||
|
||||
#define expand_cond_ternary_optab_fn(FN, STMT, OPTAB) \
|
||||
expand_direct_optab_fn (FN, STMT, OPTAB, 5)
|
||||
|
||||
#define expand_fold_extract_optab_fn(FN, STMT, OPTAB) \
|
||||
expand_direct_optab_fn (FN, STMT, OPTAB, 3)
|
||||
|
||||
|
@ -3075,6 +3079,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
|
|||
#define direct_ternary_optab_supported_p direct_optab_supported_p
|
||||
#define direct_cond_unary_optab_supported_p direct_optab_supported_p
|
||||
#define direct_cond_binary_optab_supported_p direct_optab_supported_p
|
||||
#define direct_cond_ternary_optab_supported_p direct_optab_supported_p
|
||||
#define direct_mask_load_optab_supported_p direct_optab_supported_p
|
||||
#define direct_load_lanes_optab_supported_p multi_vector_optab_supported_p
|
||||
#define direct_mask_load_lanes_optab_supported_p multi_vector_optab_supported_p
|
||||
|
@ -3277,6 +3282,57 @@ conditional_internal_fn_code (internal_fn ifn)
|
|||
}
|
||||
}
|
||||
|
||||
/* Invoke T(IFN) for each internal function IFN that also has an
|
||||
IFN_COND_* form. */
|
||||
#define FOR_EACH_COND_FN_PAIR(T) \
|
||||
T (FMA) \
|
||||
T (FMS) \
|
||||
T (FNMA) \
|
||||
T (FNMS)
|
||||
|
||||
/* Return a function that only performs internal function FN when a
|
||||
certain condition is met and that uses a given fallback value otherwise.
|
||||
In other words, the returned function FN' is such that:
|
||||
|
||||
LHS = FN' (COND, A1, ... An, ELSE)
|
||||
|
||||
is equivalent to the C expression:
|
||||
|
||||
LHS = COND ? FN (A1, ..., An) : ELSE;
|
||||
|
||||
operating elementwise if the operands are vectors.
|
||||
|
||||
Return IFN_LAST if no such function exists. */
|
||||
|
||||
internal_fn
|
||||
get_conditional_internal_fn (internal_fn fn)
|
||||
{
|
||||
switch (fn)
|
||||
{
|
||||
#define CASE(NAME) case IFN_##NAME: return IFN_COND_##NAME;
|
||||
FOR_EACH_COND_FN_PAIR(CASE)
|
||||
#undef CASE
|
||||
default:
|
||||
return IFN_LAST;
|
||||
}
|
||||
}
|
||||
|
||||
/* If IFN implements the conditional form of an unconditional internal
|
||||
function, return that unconditional function, otherwise return IFN_LAST. */
|
||||
|
||||
internal_fn
|
||||
get_unconditional_internal_fn (internal_fn ifn)
|
||||
{
|
||||
switch (ifn)
|
||||
{
|
||||
#define CASE(NAME) case IFN_COND_##NAME: return IFN_##NAME;
|
||||
FOR_EACH_COND_FN_PAIR(CASE)
|
||||
#undef CASE
|
||||
default:
|
||||
return IFN_LAST;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return true if IFN is some form of load from memory. */
|
||||
|
||||
bool
|
||||
|
|
|
@ -59,7 +59,8 @@ along with GCC; see the file COPYING3. If not see
|
|||
- binary: a normal binary optab, such as vec_interleave_lo_<mode>
|
||||
- ternary: a normal ternary optab, such as fma<mode>4
|
||||
|
||||
- cond_binary: a conditional binary optab, such as add<mode>cc
|
||||
- cond_binary: a conditional binary optab, such as cond_add<mode>
|
||||
- cond_ternary: a conditional ternary optab, such as cond_fma_rev<mode>
|
||||
|
||||
- fold_left: for scalar = FN (scalar, vector), keyed off the vector mode
|
||||
|
||||
|
@ -167,6 +168,11 @@ DEF_INTERNAL_OPTAB_FN (COND_IOR, ECF_CONST | ECF_NOTHROW,
|
|||
DEF_INTERNAL_OPTAB_FN (COND_XOR, ECF_CONST | ECF_NOTHROW,
|
||||
cond_xor, cond_binary)
|
||||
|
||||
DEF_INTERNAL_OPTAB_FN (COND_FMA, ECF_CONST, cond_fma, cond_ternary)
|
||||
DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)
|
||||
DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
|
||||
DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
|
||||
|
||||
DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
|
||||
|
||||
DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
|
||||
|
@ -235,7 +241,7 @@ DEF_INTERNAL_OPTAB_FN (XORSIGN, ECF_CONST, xorsign, binary)
|
|||
DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp, binary)
|
||||
|
||||
/* Ternary math functions. */
|
||||
DEF_INTERNAL_FLT_FN (FMA, ECF_CONST, fma, ternary)
|
||||
DEF_INTERNAL_FLT_FLOATN_FN (FMA, ECF_CONST, fma, ternary)
|
||||
|
||||
/* Unary integer ops. */
|
||||
DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST | ECF_NOTHROW, clrsb, unary)
|
||||
|
|
|
@ -193,7 +193,9 @@ direct_internal_fn_supported_p (internal_fn fn, tree type0, tree type1,
|
|||
extern bool set_edom_supported_p (void);
|
||||
|
||||
extern internal_fn get_conditional_internal_fn (tree_code);
|
||||
extern internal_fn get_conditional_internal_fn (internal_fn);
|
||||
extern tree_code conditional_internal_fn_code (internal_fn);
|
||||
extern internal_fn get_unconditional_internal_fn (internal_fn);
|
||||
|
||||
extern bool internal_load_fn_p (internal_fn);
|
||||
extern bool internal_store_fn_p (internal_fn);
|
||||
|
|
29
gcc/match.pd
29
gcc/match.pd
|
@ -86,6 +86,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|||
IFN_COND_MUL IFN_COND_DIV IFN_COND_MOD IFN_COND_RDIV
|
||||
IFN_COND_MIN IFN_COND_MAX
|
||||
IFN_COND_AND IFN_COND_IOR IFN_COND_XOR)
|
||||
|
||||
/* Same for ternary operations. */
|
||||
(define_operator_list UNCOND_TERNARY
|
||||
IFN_FMA IFN_FMS IFN_FNMA IFN_FNMS)
|
||||
(define_operator_list COND_TERNARY
|
||||
IFN_COND_FMA IFN_COND_FMS IFN_COND_FNMA IFN_COND_FNMS)
|
||||
|
||||
/* As opposed to convert?, this still creates a single pattern, so
|
||||
it is not a suitable replacement for convert? in all cases. */
|
||||
|
@ -4885,6 +4891,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|||
(if (element_precision (type) == element_precision (op_type))
|
||||
(view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
|
||||
|
||||
/* Same for ternary operations. */
|
||||
(for uncond_op (UNCOND_TERNARY)
|
||||
cond_op (COND_TERNARY)
|
||||
(simplify
|
||||
(vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
|
||||
(with { tree op_type = TREE_TYPE (@5); }
|
||||
(if (element_precision (type) == element_precision (op_type))
|
||||
(view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
|
||||
(simplify
|
||||
(vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
|
||||
(with { tree op_type = TREE_TYPE (@5); }
|
||||
(if (element_precision (type) == element_precision (op_type))
|
||||
(view_convert (cond_op (bit_not @0) @2 @3 @4
|
||||
(view_convert:op_type @1)))))))
|
||||
|
||||
/* Detect cases in which a VEC_COND_EXPR effectively replaces the
|
||||
"else" value of an IFN_COND_*. */
|
||||
(for cond_op (COND_BINARY)
|
||||
|
@ -4893,3 +4914,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|||
(with { tree op_type = TREE_TYPE (@3); }
|
||||
(if (element_precision (type) == element_precision (op_type))
|
||||
(view_convert (cond_op @0 @1 @2 (view_convert:op_type @4)))))))
|
||||
|
||||
/* Same for ternary operations. */
|
||||
(for cond_op (COND_TERNARY)
|
||||
(simplify
|
||||
(vec_cond @0 (view_convert? (cond_op @0 @1 @2 @3 @4)) @5)
|
||||
(with { tree op_type = TREE_TYPE (@4); }
|
||||
(if (element_precision (type) == element_precision (op_type))
|
||||
(view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @5)))))))
|
||||
|
|
|
@ -234,6 +234,10 @@ OPTAB_D (cond_smin_optab, "cond_smin$a")
|
|||
OPTAB_D (cond_smax_optab, "cond_smax$a")
|
||||
OPTAB_D (cond_umin_optab, "cond_umin$a")
|
||||
OPTAB_D (cond_umax_optab, "cond_umax$a")
|
||||
OPTAB_D (cond_fma_optab, "cond_fma$a")
|
||||
OPTAB_D (cond_fms_optab, "cond_fms$a")
|
||||
OPTAB_D (cond_fnma_optab, "cond_fnma$a")
|
||||
OPTAB_D (cond_fnms_optab, "cond_fnms$a")
|
||||
OPTAB_D (cmov_optab, "cmov$a6")
|
||||
OPTAB_D (cstore_optab, "cstore$a4")
|
||||
OPTAB_D (ctrap_optab, "ctrap$a4")
|
||||
|
|
|
@ -1,3 +1,15 @@
|
|||
2018-07-12 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* gcc.dg/vect/vect-cond-arith-3.c: New test.
|
||||
* gcc.target/aarch64/sve/vcond_13.c: Likewise.
|
||||
* gcc.target/aarch64/sve/vcond_13_run.c: Likewise.
|
||||
* gcc.target/aarch64/sve/vcond_14.c: Likewise.
|
||||
* gcc.target/aarch64/sve/vcond_14_run.c: Likewise.
|
||||
* gcc.target/aarch64/sve/vcond_15.c: Likewise.
|
||||
* gcc.target/aarch64/sve/vcond_15_run.c: Likewise.
|
||||
* gcc.target/aarch64/sve/vcond_16.c: Likewise.
|
||||
* gcc.target/aarch64/sve/vcond_16_run.c: Likewise.
|
||||
|
||||
2018-07-12 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* gcc.dg/vect/vect-cond-arith-2.c: New test.
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
/* { dg-require-effective-target scalar_all_fma } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N (VECTOR_BITS * 11 / 64 + 3)
|
||||
|
||||
#define DEF(INV) \
|
||||
void __attribute__ ((noipa)) \
|
||||
f_##INV (double *restrict a, double *restrict b, \
|
||||
double *restrict c, double *restrict d) \
|
||||
{ \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
double mb = (INV & 1 ? -b[i] : b[i]); \
|
||||
double mc = c[i]; \
|
||||
double md = (INV & 2 ? -d[i] : d[i]); \
|
||||
double fma = __builtin_fma (mb, mc, md); \
|
||||
double truev = (INV & 4 ? -fma : fma); \
|
||||
a[i] = b[i] < 10 ? truev : 10.0; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define TEST(INV) \
|
||||
{ \
|
||||
f_##INV (a, b, c, d); \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
double mb = (INV & 1 ? -b[i] : b[i]); \
|
||||
double mc = c[i]; \
|
||||
double md = (INV & 2 ? -d[i] : d[i]); \
|
||||
double fma = __builtin_fma (mb, mc, md); \
|
||||
double truev = (INV & 4 ? -fma : fma); \
|
||||
if (a[i] != (i % 17 < 10 ? truev : 10.0)) \
|
||||
__builtin_abort (); \
|
||||
asm volatile ("" ::: "memory"); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define FOR_EACH_INV(T) \
|
||||
T (0) T (1) T (2) T (3) T (4) T (5) T (6) T (7)
|
||||
|
||||
FOR_EACH_INV (DEF)
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
double a[N], b[N], c[N], d[N];
|
||||
for (int i = 0; i < N; ++i)
|
||||
{
|
||||
b[i] = i % 17;
|
||||
c[i] = i % 9 + 11;
|
||||
d[i] = i % 13 + 14;
|
||||
asm volatile ("" ::: "memory");
|
||||
}
|
||||
FOR_EACH_INV (TEST)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times { = \.COND_FMA } 2 "optimized" { target vect_double_cond_arith } } } */
|
||||
/* { dg-final { scan-tree-dump-times { = \.COND_FMS } 2 "optimized" { target vect_double_cond_arith } } } */
|
||||
/* { dg-final { scan-tree-dump-times { = \.COND_FNMA } 2 "optimized" { target vect_double_cond_arith } } } */
|
||||
/* { dg-final { scan-tree-dump-times { = \.COND_FNMS } 2 "optimized" { target vect_double_cond_arith } } } */
|
|
@ -0,0 +1,58 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||
|
||||
#define N 119
|
||||
|
||||
#define DEF_LOOP(INV, TYPE, CMPTYPE, SUFFIX) \
|
||||
void __attribute__ ((noipa)) \
|
||||
f_##INV##_##SUFFIX (TYPE *restrict a, TYPE *restrict b, \
|
||||
TYPE *restrict c, TYPE *restrict d, \
|
||||
CMPTYPE *restrict cond) \
|
||||
{ \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
TYPE mb = (INV & 1 ? -b[i] : b[i]); \
|
||||
TYPE mc = c[i]; \
|
||||
TYPE md = (INV & 2 ? -d[i] : d[i]); \
|
||||
TYPE fma = __builtin_fma##SUFFIX (mb, mc, md); \
|
||||
TYPE truev = (INV & 4 ? -fma : fma); \
|
||||
a[i] = cond[i] < 10 ? truev : b[i]; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define FOR_EACH_TYPE(T, INV) \
|
||||
T (INV, _Float16, short, f16) \
|
||||
T (INV, float, float, f32) \
|
||||
T (INV, double, double, f64)
|
||||
|
||||
#define FOR_EACH_INV(T) \
|
||||
FOR_EACH_TYPE (T, 0) \
|
||||
FOR_EACH_TYPE (T, 1) \
|
||||
FOR_EACH_TYPE (T, 2) \
|
||||
FOR_EACH_TYPE (T, 3) \
|
||||
FOR_EACH_TYPE (T, 4) \
|
||||
FOR_EACH_TYPE (T, 5) \
|
||||
FOR_EACH_TYPE (T, 6) \
|
||||
FOR_EACH_TYPE (T, 7)
|
||||
|
||||
FOR_EACH_INV (DEF_LOOP)
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tsel\t} } } */
|
||||
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
|
||||
/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.d,} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.d,} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.h,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.s,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.d,} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.h,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.s,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.d,} 2 } } */
|
|
@ -0,0 +1,37 @@
|
|||
/* { dg-do run { target aarch64_sve_hw } } */
|
||||
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||
|
||||
#include "vcond_13.c"
|
||||
|
||||
#define TEST_LOOP(INV, TYPE, CMPTYPE, SUFFIX) \
|
||||
{ \
|
||||
TYPE a[N], b[N], c[N], d[N]; \
|
||||
CMPTYPE cond[N]; \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
b[i] = i % 15; \
|
||||
c[i] = i % 9 + 11; \
|
||||
d[i] = i % 13 + 14; \
|
||||
cond[i] = i % 17; \
|
||||
asm volatile ("" ::: "memory"); \
|
||||
} \
|
||||
f_##INV##_##SUFFIX (a, b, c, d, cond); \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
double mb = (INV & 1 ? -b[i] : b[i]); \
|
||||
double mc = c[i]; \
|
||||
double md = (INV & 2 ? -d[i] : d[i]); \
|
||||
double fma = __builtin_fma (mb, mc, md); \
|
||||
double truev = (INV & 4 ? -fma : fma); \
|
||||
if (a[i] != (i % 17 < 10 ? truev : b[i])) \
|
||||
__builtin_abort (); \
|
||||
asm volatile ("" ::: "memory"); \
|
||||
} \
|
||||
}
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
FOR_EACH_INV (TEST_LOOP)
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||
|
||||
#define N 119
|
||||
|
||||
#define DEF_LOOP(INV, TYPE, CMPTYPE, SUFFIX) \
|
||||
void __attribute__ ((noipa)) \
|
||||
f_##INV##_##SUFFIX (TYPE *restrict a, TYPE *restrict b, \
|
||||
TYPE *restrict c, TYPE *restrict d, \
|
||||
CMPTYPE *restrict cond) \
|
||||
{ \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
TYPE mb = (INV & 1 ? -b[i] : b[i]); \
|
||||
TYPE mc = c[i]; \
|
||||
TYPE md = (INV & 2 ? -d[i] : d[i]); \
|
||||
TYPE fma = __builtin_fma##SUFFIX (mb, mc, md); \
|
||||
TYPE truev = (INV & 4 ? -fma : fma); \
|
||||
a[i] = cond[i] < 10 ? truev : c[i]; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define FOR_EACH_TYPE(T, INV) \
|
||||
T (INV, _Float16, short, f16) \
|
||||
T (INV, float, float, f32) \
|
||||
T (INV, double, double, f64)
|
||||
|
||||
#define FOR_EACH_INV(T) \
|
||||
FOR_EACH_TYPE (T, 0) \
|
||||
FOR_EACH_TYPE (T, 1) \
|
||||
FOR_EACH_TYPE (T, 2) \
|
||||
FOR_EACH_TYPE (T, 3) \
|
||||
FOR_EACH_TYPE (T, 4) \
|
||||
FOR_EACH_TYPE (T, 5) \
|
||||
FOR_EACH_TYPE (T, 6) \
|
||||
FOR_EACH_TYPE (T, 7)
|
||||
|
||||
FOR_EACH_INV (DEF_LOOP)
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tsel\t} } } */
|
||||
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
|
||||
/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.d,} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.d,} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.h,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.s,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.d,} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.h,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.s,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.d,} 2 } } */
|
|
@ -0,0 +1,37 @@
|
|||
/* { dg-do run { target aarch64_sve_hw } } */
|
||||
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||
|
||||
#include "vcond_14.c"
|
||||
|
||||
#define TEST_LOOP(INV, TYPE, CMPTYPE, SUFFIX) \
|
||||
{ \
|
||||
TYPE a[N], b[N], c[N], d[N]; \
|
||||
CMPTYPE cond[N]; \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
b[i] = i % 15; \
|
||||
c[i] = i % 9 + 11; \
|
||||
d[i] = i % 13 + 14; \
|
||||
cond[i] = i % 17; \
|
||||
asm volatile ("" ::: "memory"); \
|
||||
} \
|
||||
f_##INV##_##SUFFIX (a, b, c, d, cond); \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
double mb = (INV & 1 ? -b[i] : b[i]); \
|
||||
double mc = c[i]; \
|
||||
double md = (INV & 2 ? -d[i] : d[i]); \
|
||||
double fma = __builtin_fma (mb, mc, md); \
|
||||
double truev = (INV & 4 ? -fma : fma); \
|
||||
if (a[i] != (i % 17 < 10 ? truev : c[i])) \
|
||||
__builtin_abort (); \
|
||||
asm volatile ("" ::: "memory"); \
|
||||
} \
|
||||
}
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
FOR_EACH_INV (TEST_LOOP)
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||
|
||||
#define N 119
|
||||
|
||||
#define DEF_LOOP(INV, TYPE, CMPTYPE, SUFFIX) \
|
||||
void __attribute__ ((noipa)) \
|
||||
f_##INV##_##SUFFIX (TYPE *restrict a, TYPE *restrict b, \
|
||||
TYPE *restrict c, TYPE *restrict d, \
|
||||
CMPTYPE *restrict cond) \
|
||||
{ \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
TYPE mb = (INV & 1 ? -b[i] : b[i]); \
|
||||
TYPE mc = c[i]; \
|
||||
TYPE md = (INV & 2 ? -d[i] : d[i]); \
|
||||
TYPE fma = __builtin_fma##SUFFIX (mb, mc, md); \
|
||||
TYPE truev = (INV & 4 ? -fma : fma); \
|
||||
a[i] = cond[i] < 10 ? truev : d[i]; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define FOR_EACH_TYPE(T, INV) \
|
||||
T (INV, _Float16, short, f16) \
|
||||
T (INV, float, float, f32) \
|
||||
T (INV, double, double, f64)
|
||||
|
||||
#define FOR_EACH_INV(T) \
|
||||
FOR_EACH_TYPE (T, 0) \
|
||||
FOR_EACH_TYPE (T, 1) \
|
||||
FOR_EACH_TYPE (T, 2) \
|
||||
FOR_EACH_TYPE (T, 3) \
|
||||
FOR_EACH_TYPE (T, 4) \
|
||||
FOR_EACH_TYPE (T, 5) \
|
||||
FOR_EACH_TYPE (T, 6) \
|
||||
FOR_EACH_TYPE (T, 7)
|
||||
|
||||
FOR_EACH_INV (DEF_LOOP)
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tsel\t} } } */
|
||||
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
|
||||
/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d,} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.d,} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.h,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.s,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.d,} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.h,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.s,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.d,} 2 } } */
|
|
@ -0,0 +1,37 @@
|
|||
/* { dg-do run { target aarch64_sve_hw } } */
|
||||
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||
|
||||
#include "vcond_15.c"
|
||||
|
||||
#define TEST_LOOP(INV, TYPE, CMPTYPE, SUFFIX) \
|
||||
{ \
|
||||
TYPE a[N], b[N], c[N], d[N]; \
|
||||
CMPTYPE cond[N]; \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
b[i] = i % 15; \
|
||||
c[i] = i % 9 + 11; \
|
||||
d[i] = i % 13 + 14; \
|
||||
cond[i] = i % 17; \
|
||||
asm volatile ("" ::: "memory"); \
|
||||
} \
|
||||
f_##INV##_##SUFFIX (a, b, c, d, cond); \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
double mb = (INV & 1 ? -b[i] : b[i]); \
|
||||
double mc = c[i]; \
|
||||
double md = (INV & 2 ? -d[i] : d[i]); \
|
||||
double fma = __builtin_fma (mb, mc, md); \
|
||||
double truev = (INV & 4 ? -fma : fma); \
|
||||
if (a[i] != (i % 17 < 10 ? truev : d[i])) \
|
||||
__builtin_abort (); \
|
||||
asm volatile ("" ::: "memory"); \
|
||||
} \
|
||||
}
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
FOR_EACH_INV (TEST_LOOP)
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||
|
||||
#define N 119
|
||||
|
||||
#define DEF_LOOP(INV, TYPE, CMPTYPE, SUFFIX) \
|
||||
void __attribute__ ((noipa)) \
|
||||
f_##INV##_##SUFFIX (TYPE *restrict a, TYPE *restrict b, \
|
||||
TYPE *restrict c, TYPE *restrict d, \
|
||||
CMPTYPE *restrict cond) \
|
||||
{ \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
TYPE mb = (INV & 1 ? -b[i] : b[i]); \
|
||||
TYPE mc = c[i]; \
|
||||
TYPE md = (INV & 2 ? -d[i] : d[i]); \
|
||||
TYPE fma = __builtin_fma##SUFFIX (mb, mc, md); \
|
||||
TYPE truev = (INV & 4 ? -fma : fma); \
|
||||
a[i] = cond[i] < 10 ? truev : 10; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define FOR_EACH_TYPE(T, INV) \
|
||||
T (INV, _Float16, short, f16) \
|
||||
T (INV, float, float, f32) \
|
||||
T (INV, double, double, f64)
|
||||
|
||||
#define FOR_EACH_INV(T) \
|
||||
FOR_EACH_TYPE (T, 0) \
|
||||
FOR_EACH_TYPE (T, 1) \
|
||||
FOR_EACH_TYPE (T, 2) \
|
||||
FOR_EACH_TYPE (T, 3) \
|
||||
FOR_EACH_TYPE (T, 4) \
|
||||
FOR_EACH_TYPE (T, 5) \
|
||||
FOR_EACH_TYPE (T, 6) \
|
||||
FOR_EACH_TYPE (T, 7)
|
||||
|
||||
FOR_EACH_INV (DEF_LOOP)
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tsel\t} 24 } } */
|
||||
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
|
||||
/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d,} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.d,} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.h,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.s,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.d,} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.h,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.s,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.d,} 2 } } */
|
|
@ -0,0 +1,37 @@
|
|||
/* { dg-do run { target aarch64_sve_hw } } */
|
||||
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||
|
||||
#include "vcond_16.c"
|
||||
|
||||
#define TEST_LOOP(INV, TYPE, CMPTYPE, SUFFIX) \
|
||||
{ \
|
||||
TYPE a[N], b[N], c[N], d[N]; \
|
||||
CMPTYPE cond[N]; \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
b[i] = i % 15; \
|
||||
c[i] = i % 9 + 11; \
|
||||
d[i] = i % 13 + 14; \
|
||||
cond[i] = i % 17; \
|
||||
asm volatile ("" ::: "memory"); \
|
||||
} \
|
||||
f_##INV##_##SUFFIX (a, b, c, d, cond); \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
double mb = (INV & 1 ? -b[i] : b[i]); \
|
||||
double mc = c[i]; \
|
||||
double md = (INV & 2 ? -d[i] : d[i]); \
|
||||
double fma = __builtin_fma (mb, mc, md); \
|
||||
double truev = (INV & 4 ? -fma : fma); \
|
||||
if (a[i] != (i % 17 < 10 ? truev : 10)) \
|
||||
__builtin_abort (); \
|
||||
asm volatile ("" ::: "memory"); \
|
||||
} \
|
||||
}
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
FOR_EACH_INV (TEST_LOOP)
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue