aarch64: Fix SVE2 BCAX pattern [PR97730]

This patch adds a missing not to the SVE2 BCAX (Bitwise clear and
exclusive or) pattern, fixing the PR. Since SVE doesn't have an
unpredicated not instruction, we need to use a (vacuously) predicated
not here.

To ensure that the predicate is instantiated correctly (to all 1s) for
the intrinsics, we pull out a separate expander from the define_insn.

From the ISA reference [1]:
> Bitwise AND elements of the second source vector with the
> corresponding inverted elements of the third source vector, then
> exclusive OR the results with corresponding elements of the first
> source vector.

[1] : https://developer.arm.com/docs/ddi0602/g/a64-sve-instructions-alphabetic-order/bcax-bitwise-clear-and-exclusive-or

gcc/ChangeLog:

	PR target/97730
	* config/aarch64/aarch64-sve2.md (@aarch64_sve2_bcax<mode>):
	Change to define_expand, add missing (trivially-predicated) not
	rtx to fix wrong code bug.
	(*aarch64_sve2_bcax<mode>): New.

gcc/testsuite/ChangeLog:

	PR target/97730
	* gcc.target/aarch64/sve2/bcax_1.c (OP): Add missing bitwise not
	to match correct bcax semantics.
	* gcc.dg/vect/pr97730.c: New test.
This commit is contained in:
Alex Coplan 2020-11-12 10:03:21 +00:00
parent 64326bb428
commit 7f445b5d61
3 changed files with 41 additions and 4 deletions

View File

@ -786,17 +786,42 @@
;; -------------------------------------------------------------------------
;; Unpredicated exclusive OR of AND.
(define_insn "@aarch64_sve2_bcax<mode>"
(define_expand "@aarch64_sve2_bcax<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(xor:SVE_FULL_I
(and:SVE_FULL_I
(unspec:SVE_FULL_I
[(match_dup 4)
(not:SVE_FULL_I
(match_operand:SVE_FULL_I 3 "register_operand"))]
UNSPEC_PRED_X)
(match_operand:SVE_FULL_I 2 "register_operand"))
(match_operand:SVE_FULL_I 1 "register_operand")))]
"TARGET_SVE2"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
)
(define_insn_and_rewrite "*aarch64_sve2_bcax<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(xor:SVE_FULL_I
(and:SVE_FULL_I
(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
(match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
(unspec:SVE_FULL_I
[(match_operand 4)
(not:SVE_FULL_I
(match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
UNSPEC_PRED_X)
(match_operand:SVE_FULL_I 2 "register_operand" "w, w"))
(match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
"TARGET_SVE2"
"@
bcax\t%0.d, %0.d, %2.d, %3.d
movprfx\t%0, %1\;bcax\t%0.d, %0.d, %2.d, %3.d"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)

View File

@ -0,0 +1,12 @@
/* { dg-additional-options "-O1" } */
unsigned b = 0xce8e5a48, c = 0xb849691a;
unsigned a[8080];
int main() {
a[0] = b;
c = c;
unsigned f = 0xb1e8;
for (int h = 0; h < 5; h++)
a[h] = (b & c) ^ f;
if (a[0] != 0x8808f9e0)
__builtin_abort();
}

View File

@ -1,7 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#define OP(x,y,z) ((x) ^ ((y) & (z)))
#define OP(x,y,z) ((x) ^ (~(y) & (z)))
#include "bitsel_1.c"