PR101609: Use the correct iterator for AArch64 vector right shift pattern

Loops containing long long shifts fail to vectorize due to the vectorizer
not being able to recognize long long right shifts. This is due to a bug
in the iterator used for the vashr and vlshr patterns in aarch64-simd.md.

2021-08-09  Tejas Belagod  <tejas.belagod@arm.com>

gcc/ChangeLog
	PR target/101609
	* config/aarch64/aarch64-simd.md (vlshr<mode>3, vashr<mode>3): Use
	the right iterator.

gcc/testsuite/ChangeLog
	* gcc.target/aarch64/vect-shr-reg.c: New testcase.
	* gcc.target/aarch64/vect-shr-reg-run.c: Likewise.
This commit is contained in:
Tejas Belagod 2021-08-09 11:33:30 +01:00
parent 0095afa82a
commit e2e0b85c1e
3 changed files with 89 additions and 9 deletions

View File

@ -1299,13 +1299,10 @@
DONE;
})
;; Using mode VDQ_BHSI as there is no V2DImode neg!
;; Negating individual lanes most certainly offsets the
;; gain from vectorization.
(define_expand "vashr<mode>3"
[(match_operand:VDQ_BHSI 0 "register_operand")
(match_operand:VDQ_BHSI 1 "register_operand")
(match_operand:VDQ_BHSI 2 "register_operand")]
[(match_operand:VDQ_I 0 "register_operand")
(match_operand:VDQ_I 1 "register_operand")
(match_operand:VDQ_I 2 "register_operand")]
"TARGET_SIMD"
{
rtx neg = gen_reg_rtx (<MODE>mode);
@ -1333,9 +1330,9 @@
)
(define_expand "vlshr<mode>3"
[(match_operand:VDQ_BHSI 0 "register_operand")
(match_operand:VDQ_BHSI 1 "register_operand")
(match_operand:VDQ_BHSI 2 "register_operand")]
[(match_operand:VDQ_I 0 "register_operand")
(match_operand:VDQ_I 1 "register_operand")
(match_operand:VDQ_I 2 "register_operand")]
"TARGET_SIMD"
{
rtx neg = gen_reg_rtx (<MODE>mode);

View File

@ -0,0 +1,53 @@
/* { dg-do run } */
/* { dg-options "-O3 -march=armv8.2-a" } */
#include "vect-shr-reg.c"
int
main(void)
{
int64_t a[16];
int64_t b[16];
int64_t c[17];
uint64_t ua[16];
uint64_t ub[16];
uint64_t uc[17];
int64_t res_a[16];
uint64_t res_ua[16];
int i;
/* Set up inputs. */
for (i = 0; i < 16; i++)
{
b[i] = -2;
c[i] = 34;
ub[i] = 0xffffffffffffffff;
uc[i] = 52;
}
/* Set up reference values. */
for (i = 0; i < 16; i++)
{
res_a[i] = -1LL;
res_ua[i] = 0x0fffLL;
}
/* Do the shifts. */
f (ua, ub, uc);
g (a, b, c);
/* Compare outputs against reference values. */
for (i = 0; i < 16; i++)
{
if (a[i] != res_a[i])
__builtin_abort ();
if (ua[i] != res_ua[i])
__builtin_abort ();
}
return 0;
}

View File

@ -0,0 +1,30 @@
/* { dg-do compile } */
/* { dg-options "-O3 -march=armv8.2-a" } */
#include <stdint.h>
#include <stdio.h>
#pragma GCC target "+nosve"
int __attribute__((noinline))
f(uint64_t *__restrict a, uint64_t *__restrict b, uint64_t *__restrict c)
{
int i;
for (i = 0; i < 16; i++)
a[i] = b[i] >> c[i];
}
int __attribute__((noinline))
g(int64_t *__restrict a, int64_t *__restrict b, int64_t *__restrict c)
{
int i;
for (i = 0; i < 16; i++)
a[i] = b[i] >> c[i];
}
/* { dg-final { scan-assembler "neg\\tv" } } */
/* { dg-final { scan-assembler "ushl\\tv" } } */
/* { dg-final { scan-assembler "sshl\\tv" } } */