Fix off-by-one error in vrev for Neon.

From-SVN: r189069
This commit is contained in:
Ramana Radhakrishnan 2012-06-29 09:48:59 +00:00 committed by Ramana Radhakrishnan
parent 8b8347a537
commit cc05bf43bf
4 changed files with 128 additions and 3 deletions

View File

@ -1,3 +1,9 @@
2012-06-29 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from mainline.
2012-05-30 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
* config/arm/arm.c (arm_evpc_neon_vrev): Adjust off by one error.
2012-06-28 Georg-Johann Lay <avr@gjlay.de>
Backport from 2012-06-28 mainline r189049

View File

@ -25270,10 +25270,18 @@ arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
return false;
}
for (i = 0; i < nelt; i += diff)
for (i = 0; i < nelt ; i += diff + 1)
for (j = 0; j <= diff; j += 1)
if (d->perm[i + j] != i + diff - j)
return false;
{
/* This is guaranteed to be true as the value of diff
is 7, 3, 1 and we should have enough elements in the
queue to generate this. Getting a vector mask with a
value of diff other than these values implies that
something is wrong by the time we get here. */
gcc_assert (i + j < nelt);
if (d->perm[i + j] != i + diff - j)
return false;
}
/* Success! */
if (d->testing_p)

View File

@ -1,3 +1,9 @@
2012-06-29 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from mainline.
2012-05-30 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
* gcc.target/arm/neon-vrev.c: New.
2012-06-28 Richard Guenther <rguenther@suse.de>
PR middle-end/53790

View File

@ -0,0 +1,105 @@
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include <arm_neon.h>
uint16x4_t
tst_vrev642_u16 (uint16x4_t __a)
{
uint16x4_t __rv;
uint16x4_t __mask1 = { 3, 2, 1, 0};
return __builtin_shuffle ( __a, __mask1) ;
}
uint16x8_t
tst_vrev64q2_u16 (uint16x8_t __a)
{
uint16x8_t __rv;
uint16x8_t __mask1 = {3, 2, 1, 0, 7, 6, 5, 4 };
return __builtin_shuffle ( __a, __mask1) ;
}
uint8x8_t
tst_vrev642_u8 (uint8x8_t __a)
{
uint8x8_t __rv;
uint8x8_t __mask1 = { 7, 6, 5, 4, 3, 2, 1, 0};
return __builtin_shuffle ( __a, __mask1) ;
}
uint8x16_t
tst_vrev64q2_u8 (uint8x16_t __a)
{
uint8x16_t __rv;
uint8x16_t __mask1 = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8};
return __builtin_shuffle ( __a, __mask1) ;
}
uint32x2_t
tst_vrev642_u32 (uint32x2_t __a)
{
uint32x2_t __rv;
uint32x2_t __mask1 = {1, 0};
return __builtin_shuffle ( __a, __mask1) ;
}
uint32x4_t
tst_vrev64q2_u32 (uint32x4_t __a)
{
uint32x4_t __rv;
uint32x4_t __mask1 = {1, 0, 3, 2};
return __builtin_shuffle ( __a, __mask1) ;
}
uint16x4_t
tst_vrev322_u16 (uint16x4_t __a)
{
uint16x4_t __mask1 = { 1, 0, 3, 2 };
return __builtin_shuffle (__a, __mask1);
}
uint16x8_t
tst_vrev32q2_u16 (uint16x8_t __a)
{
uint16x8_t __mask1 = { 1, 0, 3, 2, 5, 4, 7, 6 };
return __builtin_shuffle (__a, __mask1);
}
uint8x8_t
tst_vrev322_u8 (uint8x8_t __a)
{
uint8x8_t __mask1 = { 3, 2, 1, 0, 7, 6, 5, 4};
return __builtin_shuffle (__a, __mask1);
}
uint8x16_t
tst_vrev32q2_u8 (uint8x16_t __a)
{
uint8x16_t __mask1 = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12};
return __builtin_shuffle (__a, __mask1);
}
uint8x8_t
tst_vrev162_u8 (uint8x8_t __a)
{
uint8x8_t __mask = { 1, 0, 3, 2, 5, 4, 7, 6};
return __builtin_shuffle (__a, __mask);
}
uint8x16_t
tst_vrev16q2_u8 (uint8x16_t __a)
{
uint8x16_t __mask = { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
return __builtin_shuffle (__a, __mask);
}
/* { dg-final {scan-assembler-times "vrev32\.16\\t" 2} } */
/* { dg-final {scan-assembler-times "vrev32\.8\\t" 2} } */
/* { dg-final {scan-assembler-times "vrev16\.8\\t" 2} } */
/* { dg-final {scan-assembler-times "vrev64\.8\\t" 2} } */
/* { dg-final {scan-assembler-times "vrev64\.32\\t" 2} } */
/* { dg-final {scan-assembler-times "vrev64\.16\\t" 2} } */