[AArch64] Rework SVE FP comparisons

This patch rewrites the SVE FP comparisons so that they always use
unspecs and so that they have an additional operand to indicate
whether the predicate is known to be a PTRUE.  It's part of a series
that rewrites the SVE FP patterns so that they can cope with non-PTRUE
predicates.

2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* config/aarch64/iterators.md (UNSPEC_COND_FCMUO): New unspec.
	(cmp_op): Handle it.
	(SVE_COND_FP_CMP): Rename to...
	(SVE_COND_FP_CMP_I0): ...this.
	(SVE_FP_CMP): Remove.
	* config/aarch64/aarch64-sve.md
	(*fcm<SVE_FP_CMP:cmp_op><SVE_F:mode>): Replace with...
	(*fcm<SVE_COND_FP_CMP_I0:cmp_op><SVE_F:mode>): ...this new pattern,
	using unspecs to represent the comparison.
	(*fcmuo<SVE_F:mode>): Use UNSPEC_COND_FCMUO.
	(*fcm<cmp_op><mode>_and_combine, *fcmuo<mode>_and_combine): Update
	accordingly.
	* config/aarch64/aarch64.c (aarch64_emit_sve_ptrue_op): Delete.
	(aarch64_unspec_cond_code): Move after integer code.  Handle
	UNORDERED.
	(aarch64_emit_sve_predicated_cond): Replace with...
	(aarch64_emit_sve_fp_cond): ...this new function.
	(aarch64_emit_sve_or_conds): Replace with...
	(aarch64_emit_sve_or_fp_conds): ...this new function.
	(aarch64_emit_sve_inverted_cond): Replace with...
	(aarch64_emit_sve_invert_fp_cond): ...this new function.
	(aarch64_expand_sve_vec_cmp_float): Update accordingly.

From-SVN: r274421
This commit is contained in:
Richard Sandiford 2019-08-14 08:29:56 +00:00 committed by Richard Sandiford
parent a70965b114
commit 4a942af61c
4 changed files with 120 additions and 147 deletions

View File

@ -1,3 +1,28 @@
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/iterators.md (UNSPEC_COND_FCMUO): New unspec.
(cmp_op): Handle it.
(SVE_COND_FP_CMP): Rename to...
(SVE_COND_FP_CMP_I0): ...this.
(SVE_FP_CMP): Remove.
* config/aarch64/aarch64-sve.md
(*fcm<SVE_FP_CMP:cmp_op><SVE_F:mode>): Replace with...
(*fcm<SVE_COND_FP_CMP_I0:cmp_op><SVE_F:mode>): ...this new pattern,
using unspecs to represent the comparison.
(*fcmuo<SVE_F:mode>): Use UNSPEC_COND_FCMUO.
(*fcm<cmp_op><mode>_and_combine, *fcmuo<mode>_and_combine): Update
accordingly.
* config/aarch64/aarch64.c (aarch64_emit_sve_ptrue_op): Delete.
(aarch64_unspec_cond_code): Move after integer code. Handle
UNORDERED.
(aarch64_emit_sve_predicated_cond): Replace with...
(aarch64_emit_sve_fp_cond): ...this new function.
(aarch64_emit_sve_or_conds): Replace with...
(aarch64_emit_sve_or_fp_conds): ...this new function.
(aarch64_emit_sve_inverted_cond): Replace with...
(aarch64_emit_sve_invert_fp_cond): ...this new function.
(aarch64_expand_sve_vec_cmp_float): Update accordingly.
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/iterators.md (SVE_HSD): New mode iterator.

View File

@ -3136,15 +3136,15 @@
}
)
;; Floating-point comparisons predicated with a PTRUE.
;; Predicated floating-point comparisons.
(define_insn "*fcm<cmp_op><mode>"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
(unspec:<VPRED>
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(SVE_FP_CMP:<VPRED>
(match_operand:SVE_F 2 "register_operand" "w, w")
(match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
UNSPEC_MERGE_PTRUE))]
(match_operand:SI 4 "aarch64_sve_ptrue_flag")
(match_operand:SVE_F 2 "register_operand" "w, w")
(match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
SVE_COND_FP_CMP_I0))]
"TARGET_SVE"
"@
fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
@ -3156,10 +3156,10 @@
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
(unspec:<VPRED>
[(match_operand:<VPRED> 1 "register_operand" "Upl")
(unordered:<VPRED>
(match_operand:SVE_F 2 "register_operand" "w")
(match_operand:SVE_F 3 "register_operand" "w"))]
UNSPEC_MERGE_PTRUE))]
(match_operand:SI 4 "aarch64_sve_ptrue_flag")
(match_operand:SVE_F 2 "register_operand" "w")
(match_operand:SVE_F 3 "register_operand" "w")]
UNSPEC_COND_FCMUO))]
"TARGET_SVE"
"fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
@ -3177,20 +3177,21 @@
(and:<VPRED>
(unspec:<VPRED>
[(match_operand:<VPRED> 1)
(SVE_FP_CMP
(match_operand:SVE_F 2 "register_operand" "w, w")
(match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
UNSPEC_MERGE_PTRUE)
(const_int SVE_KNOWN_PTRUE)
(match_operand:SVE_F 2 "register_operand" "w, w")
(match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
SVE_COND_FP_CMP_I0)
(match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
"TARGET_SVE"
"#"
"&& 1"
[(set (match_dup 0)
(and:<VPRED>
(SVE_FP_CMP:<VPRED>
(match_dup 2)
(match_dup 3))
(match_dup 4)))]
(unspec:<VPRED>
[(match_dup 4)
(const_int SVE_MAYBE_NOT_PTRUE)
(match_dup 2)
(match_dup 3)]
SVE_COND_FP_CMP_I0))]
)
;; Same for unordered comparisons.
@ -3199,62 +3200,21 @@
(and:<VPRED>
(unspec:<VPRED>
[(match_operand:<VPRED> 1)
(unordered
(match_operand:SVE_F 2 "register_operand" "w")
(match_operand:SVE_F 3 "register_operand" "w"))]
UNSPEC_MERGE_PTRUE)
(const_int SVE_KNOWN_PTRUE)
(match_operand:SVE_F 2 "register_operand" "w")
(match_operand:SVE_F 3 "register_operand" "w")]
UNSPEC_COND_FCMUO)
(match_operand:<VPRED> 4 "register_operand" "Upl")))]
"TARGET_SVE"
"#"
"&& 1"
[(set (match_dup 0)
(and:<VPRED>
(unordered:<VPRED>
(match_dup 2)
(match_dup 3))
(match_dup 4)))]
)
;; Unpredicated floating-point comparisons, with the results ANDed with
;; another predicate. This is a valid fold for the same reasons as above.
(define_insn "*fcm<cmp_op><mode>_and"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
(and:<VPRED>
(SVE_FP_CMP:<VPRED>
(match_operand:SVE_F 2 "register_operand" "w, w")
(match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))
(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))]
"TARGET_SVE"
"@
fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
;; Same for unordered comparisons.
(define_insn "*fcmuo<mode>_and"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
(and:<VPRED>
(unordered:<VPRED>
(match_operand:SVE_F 2 "register_operand" "w")
(match_operand:SVE_F 3 "register_operand" "w"))
(match_operand:<VPRED> 1 "register_operand" "Upl")))]
"TARGET_SVE"
"fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
;; Predicated floating-point comparisons. We don't need a version
;; of this for unordered comparisons.
(define_insn "*pred_fcm<cmp_op><mode>"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
(unspec:<VPRED>
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(match_operand:SVE_F 2 "register_operand" "w, w")
(match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
SVE_COND_FP_CMP))]
"TARGET_SVE"
"@
fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
[(match_dup 4)
(const_int SVE_MAYBE_NOT_PTRUE)
(match_dup 2)
(match_dup 3)]
UNSPEC_COND_FCMUO))]
)
;; -------------------------------------------------------------------------

View File

@ -17700,19 +17700,8 @@ aarch64_sve_cmp_operand_p (rtx_code op_code, rtx x)
(set TARGET OP)
given that PTRUE is an all-true predicate of the appropriate mode. */
static void
aarch64_emit_sve_ptrue_op (rtx target, rtx ptrue, rtx op)
{
rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
gen_rtvec (2, ptrue, op),
UNSPEC_MERGE_PTRUE);
rtx_insn *insn = emit_set_insn (target, unspec);
set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
}
/* Likewise, but also clobber the condition codes. */
given that PTRUE is an all-true predicate of the appropriate mode
and that the instruction clobbers the condition codes. */
static void
aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op)
@ -17724,6 +17713,24 @@ aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op)
set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
}
/* Expand an SVE integer comparison using the SVE equivalent of:
(set TARGET (CODE OP0 OP1)). */
void
aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
{
machine_mode pred_mode = GET_MODE (target);
machine_mode data_mode = GET_MODE (op0);
if (!aarch64_sve_cmp_operand_p (code, op1))
op1 = force_reg (data_mode, op1);
rtx ptrue = aarch64_ptrue_reg (pred_mode);
rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond);
}
/* Return the UNSPEC_COND_* code for comparison CODE. */
static unsigned int
@ -17743,6 +17750,8 @@ aarch64_unspec_cond_code (rtx_code code)
return UNSPEC_COND_FCMLE;
case GE:
return UNSPEC_COND_FCMGE;
case UNORDERED:
return UNSPEC_COND_FCMUO;
default:
gcc_unreachable ();
}
@ -17750,78 +17759,58 @@ aarch64_unspec_cond_code (rtx_code code)
/* Emit:
(set TARGET (unspec [PRED OP0 OP1] UNSPEC_COND_<X>))
(set TARGET (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X>))
where <X> is the operation associated with comparison CODE. This form
of instruction is used when (and (CODE OP0 OP1) PRED) would have different
semantics, such as when PRED might not be all-true and when comparing
inactive lanes could have side effects. */
where <X> is the operation associated with comparison CODE.
KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */
static void
aarch64_emit_sve_predicated_cond (rtx target, rtx_code code,
rtx pred, rtx op0, rtx op1)
aarch64_emit_sve_fp_cond (rtx target, rtx_code code, rtx pred,
bool known_ptrue_p, rtx op0, rtx op1)
{
rtx flag = gen_int_mode (known_ptrue_p, SImode);
rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred),
gen_rtvec (3, pred, op0, op1),
gen_rtvec (4, pred, flag, op0, op1),
aarch64_unspec_cond_code (code));
emit_set_insn (target, unspec);
}
/* Expand an SVE integer comparison using the SVE equivalent of:
(set TARGET (CODE OP0 OP1)). */
void
aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
{
machine_mode pred_mode = GET_MODE (target);
machine_mode data_mode = GET_MODE (op0);
if (!aarch64_sve_cmp_operand_p (code, op1))
op1 = force_reg (data_mode, op1);
rtx ptrue = aarch64_ptrue_reg (pred_mode);
rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond);
}
/* Emit the SVE equivalent of:
(set TMP1 (CODE1 OP0 OP1))
(set TMP2 (CODE2 OP0 OP1))
(set TMP1 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X1>))
(set TMP2 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X2>))
(set TARGET (ior:PRED_MODE TMP1 TMP2))
PTRUE is an all-true predicate with the same mode as TARGET. */
where <Xi> is the operation associated with comparison CODEi.
KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */
static void
aarch64_emit_sve_or_conds (rtx target, rtx_code code1, rtx_code code2,
rtx ptrue, rtx op0, rtx op1)
aarch64_emit_sve_or_fp_conds (rtx target, rtx_code code1, rtx_code code2,
rtx pred, bool known_ptrue_p, rtx op0, rtx op1)
{
machine_mode pred_mode = GET_MODE (ptrue);
machine_mode pred_mode = GET_MODE (pred);
rtx tmp1 = gen_reg_rtx (pred_mode);
aarch64_emit_sve_ptrue_op (tmp1, ptrue,
gen_rtx_fmt_ee (code1, pred_mode, op0, op1));
aarch64_emit_sve_fp_cond (tmp1, code1, pred, known_ptrue_p, op0, op1);
rtx tmp2 = gen_reg_rtx (pred_mode);
aarch64_emit_sve_ptrue_op (tmp2, ptrue,
gen_rtx_fmt_ee (code2, pred_mode, op0, op1));
aarch64_emit_sve_fp_cond (tmp2, code2, pred, known_ptrue_p, op0, op1);
aarch64_emit_binop (target, ior_optab, tmp1, tmp2);
}
/* Emit the SVE equivalent of:
(set TMP (CODE OP0 OP1))
(set TMP (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X>))
(set TARGET (not TMP))
PTRUE is an all-true predicate with the same mode as TARGET. */
where <X> is the operation associated with comparison CODE.
KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */
static void
aarch64_emit_sve_inverted_cond (rtx target, rtx ptrue, rtx_code code,
rtx op0, rtx op1)
aarch64_emit_sve_invert_fp_cond (rtx target, rtx_code code, rtx pred,
bool known_ptrue_p, rtx op0, rtx op1)
{
machine_mode pred_mode = GET_MODE (ptrue);
machine_mode pred_mode = GET_MODE (pred);
rtx tmp = gen_reg_rtx (pred_mode);
aarch64_emit_sve_ptrue_op (tmp, ptrue,
gen_rtx_fmt_ee (code, pred_mode, op0, op1));
aarch64_emit_sve_fp_cond (tmp, code, pred, known_ptrue_p, op0, op1);
aarch64_emit_unop (target, one_cmpl_optab, tmp);
}
@ -17854,14 +17843,13 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
case NE:
{
/* There is native support for the comparison. */
rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
aarch64_emit_sve_ptrue_op (target, ptrue, cond);
aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
return false;
}
case LTGT:
/* This is a trapping operation (LT or GT). */
aarch64_emit_sve_or_conds (target, LT, GT, ptrue, op0, op1);
aarch64_emit_sve_or_fp_conds (target, LT, GT, ptrue, true, op0, op1);
return false;
case UNEQ:
@ -17869,7 +17857,8 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
{
/* This would trap for signaling NaNs. */
op1 = force_reg (data_mode, op1);
aarch64_emit_sve_or_conds (target, UNORDERED, EQ, ptrue, op0, op1);
aarch64_emit_sve_or_fp_conds (target, UNORDERED, EQ,
ptrue, true, op0, op1);
return false;
}
/* fall through */
@ -17882,7 +17871,8 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
/* Work out which elements are ordered. */
rtx ordered = gen_reg_rtx (pred_mode);
op1 = force_reg (data_mode, op1);
aarch64_emit_sve_inverted_cond (ordered, ptrue, UNORDERED, op0, op1);
aarch64_emit_sve_invert_fp_cond (ordered, UNORDERED,
ptrue, true, op0, op1);
/* Test the opposite condition for the ordered elements,
then invert the result. */
@ -17892,13 +17882,12 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
code = reverse_condition_maybe_unordered (code);
if (can_invert_p)
{
aarch64_emit_sve_predicated_cond (target, code,
ordered, op0, op1);
aarch64_emit_sve_fp_cond (target, code,
ordered, false, op0, op1);
return true;
}
rtx tmp = gen_reg_rtx (pred_mode);
aarch64_emit_sve_predicated_cond (tmp, code, ordered, op0, op1);
aarch64_emit_unop (target, one_cmpl_optab, tmp);
aarch64_emit_sve_invert_fp_cond (target, code,
ordered, false, op0, op1);
return false;
}
break;
@ -17916,11 +17905,10 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
code = reverse_condition_maybe_unordered (code);
if (can_invert_p)
{
rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
aarch64_emit_sve_ptrue_op (target, ptrue, cond);
aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
return true;
}
aarch64_emit_sve_inverted_cond (target, ptrue, code, op0, op1);
aarch64_emit_sve_invert_fp_cond (target, code, ptrue, true, op0, op1);
return false;
}

View File

@ -479,6 +479,7 @@
UNSPEC_COND_FCMLE ; Used in aarch64-sve.md.
UNSPEC_COND_FCMLT ; Used in aarch64-sve.md.
UNSPEC_COND_FCMNE ; Used in aarch64-sve.md.
UNSPEC_COND_FCMUO ; Used in aarch64-sve.md.
UNSPEC_COND_FDIV ; Used in aarch64-sve.md.
UNSPEC_COND_FMAXNM ; Used in aarch64-sve.md.
UNSPEC_COND_FMINNM ; Used in aarch64-sve.md.
@ -1273,9 +1274,6 @@
;; SVE integer comparisons.
(define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
;; SVE floating-point comparisons.
(define_code_iterator SVE_FP_CMP [lt le eq ne ge gt])
;; -------------------------------------------------------------------
;; Code Attributes
;; -------------------------------------------------------------------
@ -1663,12 +1661,13 @@
UNSPEC_COND_FNMLA
UNSPEC_COND_FNMLS])
(define_int_iterator SVE_COND_FP_CMP [UNSPEC_COND_FCMEQ
UNSPEC_COND_FCMGE
UNSPEC_COND_FCMGT
UNSPEC_COND_FCMLE
UNSPEC_COND_FCMLT
UNSPEC_COND_FCMNE])
;; SVE FP comparisons that accept #0.0.
(define_int_iterator SVE_COND_FP_CMP_I0 [UNSPEC_COND_FCMEQ
UNSPEC_COND_FCMGE
UNSPEC_COND_FCMGT
UNSPEC_COND_FCMLE
UNSPEC_COND_FCMLT
UNSPEC_COND_FCMNE])
(define_int_iterator FCADD [UNSPEC_FCADD90
UNSPEC_FCADD270])
@ -1955,7 +1954,8 @@
(UNSPEC_COND_FCMGT "gt")
(UNSPEC_COND_FCMLE "le")
(UNSPEC_COND_FCMLT "lt")
(UNSPEC_COND_FCMNE "ne")])
(UNSPEC_COND_FCMNE "ne")
(UNSPEC_COND_FCMUO "uo")])
(define_int_attr sve_int_op [(UNSPEC_ANDV "andv")
(UNSPEC_IORV "orv")