sve: combine inverted masks into NOTs
The following example void f10(double * restrict z, double * restrict w, double * restrict x, double * restrict y, int n) { for (int i = 0; i < n; i++) { z[i] = (w[i] > 0) ? x[i] + w[i] : y[i] - w[i]; } } generates currently: ld1d z1.d, p1/z, [x1, x5, lsl 3] fcmgt p2.d, p1/z, z1.d, #0.0 fcmgt p0.d, p3/z, z1.d, #0.0 ld1d z2.d, p2/z, [x2, x5, lsl 3] bic p0.b, p3/z, p1.b, p0.b ld1d z0.d, p0/z, [x3, x5, lsl 3] where a BIC is generated between p1 and p0 where a NOT would be better here since we won't require the use of p3 and opens the pattern up to being CSEd. After this patch using a 2 -> 2 split we generate: ld1d z1.d, p0/z, [x1, x5, lsl 3] fcmgt p2.d, p0/z, z1.d, #0.0 not p1.b, p0/z, p2.b The additional scratch is needed such that we can CSE the two operations. If both statements wrote to the same register then CSE won't be able to CSE the values if there are other statements in between that use the register. A second pattern is needed to capture the nor case as combine will match the longest sequence first. So without this pattern we end up de-optimizing nor and instead emit two nots. I did not find a better way to do this. gcc/ChangeLog: * config/aarch64/aarch64-sve.md (*fcm<cmp_op><mode>_bic_combine, *fcm<cmp_op><mode>_nor_combine, *fcmuo<mode>_bic_combine, *fcmuo<mode>_nor_combine): New. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/pred-not-gen-1.c: New test. * gcc.target/aarch64/sve/pred-not-gen-2.c: New test. * gcc.target/aarch64/sve/pred-not-gen-3.c: New test. * gcc.target/aarch64/sve/pred-not-gen-4.c: New test.
This commit is contained in:
parent
a1a7d09430
commit
e36206c994
@ -8126,6 +8126,160 @@
|
||||
UNSPEC_COND_FCMUO))]
|
||||
)
|
||||
|
||||
;; Similar to *fcm<cmp_op><mode>_and_combine, but for BIC rather than AND.
|
||||
;; In this case, we still need a separate NOT/BIC operation, but predicating
|
||||
;; the comparison on the BIC operand removes the need for a PTRUE.
|
||||
(define_insn_and_split "*fcm<cmp_op><mode>_bic_combine"
|
||||
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
|
||||
(and:<VPRED>
|
||||
(and:<VPRED>
|
||||
(not:<VPRED>
|
||||
(unspec:<VPRED>
|
||||
[(match_operand:<VPRED> 1)
|
||||
(const_int SVE_KNOWN_PTRUE)
|
||||
(match_operand:SVE_FULL_F 2 "register_operand" "w")
|
||||
(match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
|
||||
SVE_COND_FP_CMP_I0))
|
||||
(match_operand:<VPRED> 4 "register_operand" "Upa"))
|
||||
(match_dup:<VPRED> 1)))
|
||||
(clobber (match_scratch:<VPRED> 5 "=&Upl"))]
|
||||
"TARGET_SVE"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(set (match_dup 5)
|
||||
(unspec:<VPRED>
|
||||
[(match_dup 4)
|
||||
(const_int SVE_MAYBE_NOT_PTRUE)
|
||||
(match_dup 2)
|
||||
(match_dup 3)]
|
||||
SVE_COND_FP_CMP_I0))
|
||||
(set (match_dup 0)
|
||||
(and:<VPRED>
|
||||
(not:<VPRED>
|
||||
(match_dup 5))
|
||||
(match_dup 4)))]
|
||||
{
|
||||
if (can_create_pseudo_p ())
|
||||
operands[5] = gen_reg_rtx (<VPRED>mode);
|
||||
}
|
||||
)
|
||||
|
||||
;; Make sure that we expand to a nor when the operand 4 of
|
||||
;; *fcm<cmp_op><mode>_bic_combine is a not.
|
||||
(define_insn_and_split "*fcm<cmp_op><mode>_nor_combine"
|
||||
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
|
||||
(and:<VPRED>
|
||||
(and:<VPRED>
|
||||
(not:<VPRED>
|
||||
(unspec:<VPRED>
|
||||
[(match_operand:<VPRED> 1)
|
||||
(const_int SVE_KNOWN_PTRUE)
|
||||
(match_operand:SVE_FULL_F 2 "register_operand" "w")
|
||||
(match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
|
||||
SVE_COND_FP_CMP_I0))
|
||||
(not:<VPRED>
|
||||
(match_operand:<VPRED> 4 "register_operand" "Upa")))
|
||||
(match_dup:<VPRED> 1)))
|
||||
(clobber (match_scratch:<VPRED> 5 "=&Upl"))]
|
||||
"TARGET_SVE"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(set (match_dup 5)
|
||||
(unspec:<VPRED>
|
||||
[(match_dup 1)
|
||||
(const_int SVE_KNOWN_PTRUE)
|
||||
(match_dup 2)
|
||||
(match_dup 3)]
|
||||
SVE_COND_FP_CMP_I0))
|
||||
(set (match_dup 0)
|
||||
(and:<VPRED>
|
||||
(and:<VPRED>
|
||||
(not:<VPRED>
|
||||
(match_dup 5))
|
||||
(not:<VPRED>
|
||||
(match_dup 4)))
|
||||
(match_dup 1)))]
|
||||
{
|
||||
if (can_create_pseudo_p ())
|
||||
operands[5] = gen_reg_rtx (<VPRED>mode);
|
||||
}
|
||||
)
|
||||
|
||||
(define_insn_and_split "*fcmuo<mode>_bic_combine"
|
||||
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
|
||||
(and:<VPRED>
|
||||
(and:<VPRED>
|
||||
(not:<VPRED>
|
||||
(unspec:<VPRED>
|
||||
[(match_operand:<VPRED> 1)
|
||||
(const_int SVE_KNOWN_PTRUE)
|
||||
(match_operand:SVE_FULL_F 2 "register_operand" "w")
|
||||
(match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
|
||||
UNSPEC_COND_FCMUO))
|
||||
(match_operand:<VPRED> 4 "register_operand" "Upa"))
|
||||
(match_dup:<VPRED> 1)))
|
||||
(clobber (match_scratch:<VPRED> 5 "=&Upl"))]
|
||||
"TARGET_SVE"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(set (match_dup 5)
|
||||
(unspec:<VPRED>
|
||||
[(match_dup 4)
|
||||
(const_int SVE_MAYBE_NOT_PTRUE)
|
||||
(match_dup 2)
|
||||
(match_dup 3)]
|
||||
UNSPEC_COND_FCMUO))
|
||||
(set (match_dup 0)
|
||||
(and:<VPRED>
|
||||
(not:<VPRED>
|
||||
(match_dup 5))
|
||||
(match_dup 4)))]
|
||||
{
|
||||
if (can_create_pseudo_p ())
|
||||
operands[5] = gen_reg_rtx (<VPRED>mode);
|
||||
}
|
||||
)
|
||||
|
||||
;; Same for unordered comparisons.
|
||||
(define_insn_and_split "*fcmuo<mode>_nor_combine"
|
||||
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
|
||||
(and:<VPRED>
|
||||
(and:<VPRED>
|
||||
(not:<VPRED>
|
||||
(unspec:<VPRED>
|
||||
[(match_operand:<VPRED> 1)
|
||||
(const_int SVE_KNOWN_PTRUE)
|
||||
(match_operand:SVE_FULL_F 2 "register_operand" "w")
|
||||
(match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
|
||||
UNSPEC_COND_FCMUO))
|
||||
(not:<VPRED>
|
||||
(match_operand:<VPRED> 4 "register_operand" "Upa")))
|
||||
(match_dup:<VPRED> 1)))
|
||||
(clobber (match_scratch:<VPRED> 5 "=&Upl"))]
|
||||
"TARGET_SVE"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(set (match_dup 5)
|
||||
(unspec:<VPRED>
|
||||
[(match_dup 1)
|
||||
(const_int SVE_KNOWN_PTRUE)
|
||||
(match_dup 2)
|
||||
(match_dup 3)]
|
||||
UNSPEC_COND_FCMUO))
|
||||
(set (match_dup 0)
|
||||
(and:<VPRED>
|
||||
(and:<VPRED>
|
||||
(not:<VPRED>
|
||||
(match_dup 5))
|
||||
(not:<VPRED>
|
||||
(match_dup 4)))
|
||||
(match_dup 1)))]
|
||||
{
|
||||
if (can_create_pseudo_p ())
|
||||
operands[5] = gen_reg_rtx (<VPRED>mode);
|
||||
}
|
||||
)
|
||||
|
||||
;; -------------------------------------------------------------------------
|
||||
;; ---- [FP] Absolute comparisons
|
||||
;; -------------------------------------------------------------------------
|
||||
|
23
gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c
Normal file
23
gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c
Normal file
@ -0,0 +1,23 @@
|
||||
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
|
||||
/* { dg-options "-O3 --save-temps" } */
|
||||
|
||||
/*
|
||||
** f10:
|
||||
** ...
|
||||
** ld1d z1.d, p0/z, \[x1, x5, lsl 3\]
|
||||
** fcmgt p2.d, p0/z, z1.d, #0.0
|
||||
** ld1d z2.d, p2/z, \[x2, x5, lsl 3\]
|
||||
** not p1.b, p0/z, p2.b
|
||||
** ld1d z0.d, p1/z, \[x3, x5, lsl 3\]
|
||||
** ...
|
||||
*/
|
||||
|
||||
void f10(double * restrict z, double * restrict w, double * restrict x, double * restrict y, int n)
|
||||
{
|
||||
for (int i = 0; i < n; i++) {
|
||||
z[i] = (w[i] > 0) ? x[i] + w[i] : y[i] - w[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tbic\t} } } */
|
||||
/* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b\n} 1 } } */
|
23
gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c
Normal file
23
gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c
Normal file
@ -0,0 +1,23 @@
|
||||
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
|
||||
/* { dg-options "-O3 --save-temps" } */
|
||||
|
||||
/*
|
||||
** f11:
|
||||
** ...
|
||||
** ld1d z0.d, p0/z, \[x1, x2, lsl 3\]
|
||||
** fcmgt p2.d, p3/z, z0.d, #0.0
|
||||
** fcmgt p1.d, p0/z, z0.d, #0.0
|
||||
** not p1.b, p0/z, p1.b
|
||||
** ld1d z1.d, p1/z, \[x3, x2, lsl 3\]
|
||||
** ...
|
||||
*/
|
||||
|
||||
void f11(double * restrict z, double * restrict w, double * restrict x, double * restrict y, int n)
|
||||
{
|
||||
for (int i = 0; i < n; i++) {
|
||||
z[i] = (w[i] > 0) ? w[i] : y[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tbic\t} } } */
|
||||
/* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b\n} 1 } } */
|
21
gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c
Normal file
21
gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c
Normal file
@ -0,0 +1,21 @@
|
||||
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
|
||||
/* { dg-options "-O3 --save-temps" } */
|
||||
|
||||
/*
|
||||
** f12:
|
||||
** ...
|
||||
** ld1w z1.s, p0/z, \[x1, x2, lsl 2\]
|
||||
** cmple p1.s, p0/z, z1.s, #0
|
||||
** ld1w z0.s, p1/z, \[x3, x2, lsl 2\]
|
||||
** ...
|
||||
*/
|
||||
|
||||
void f12(int * restrict z, int * restrict w, int * restrict x, int * restrict y, int n)
|
||||
{
|
||||
for (int i = 0; i < n; i++) {
|
||||
z[i] = (w[i] > 0) ? w[i] : y[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tbic\t} } } */
|
||||
/* { dg-final { scan-assembler-not {\tnot\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b\n} } } */
|
14
gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
Normal file
14
gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
Normal file
@ -0,0 +1,14 @@
|
||||
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
|
||||
/* { dg-options "-O3 --save-temps" } */
|
||||
|
||||
#include <math.h>
|
||||
|
||||
void f13(double * restrict z, double * restrict w, double * restrict x, double * restrict y, int n)
|
||||
{
|
||||
for (int i = 0; i < n; i++) {
|
||||
z[i] = (isunordered(w[i], 0)) ? x[i] + w[i] : y[i] - w[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tbic\t} } } */
|
||||
/* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b\n} 1 } } */
|
Loading…
x
Reference in New Issue
Block a user