Fix off-by-one error in vrev for Neon.

From-SVN: r189069
2012-06-29 09:48:59 +00:00 · 2012-06-29 09:48:59 +00:00 · cc05bf43bf
parent 8b8347a537
commit cc05bf43bf
4 changed files with 128 additions and 3 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,9 @@
+2012-06-29  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline.
+        2012-05-30  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+	* config/arm/arm.c (arm_evpc_neon_vrev): Adjust off by one error.
+
 2012-06-28  Georg-Johann Lay  <avr@gjlay.de>

 	Backport from 2012-06-28 mainline r189049
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@ -25270,10 +25270,18 @@ arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
      return false;
    }

-  for (i = 0; i < nelt; i += diff)
+  for (i = 0; i < nelt ; i += diff + 1)
    for (j = 0; j <= diff; j += 1)
-      if (d->perm[i + j] != i + diff - j)
-	return false;
+      {
+	/* This is guaranteed to be true as the value of diff
+	   is 7, 3, 1 and we should have enough elements in the
+	   queue to generate this. Getting a vector mask with a
+	   value of diff other than these values implies that
+	   something is wrong by the time we get here.  */
+	gcc_assert (i + j < nelt);
+	if (d->perm[i + j] != i + diff - j)
+	  return false;
+      }

  /* Success! */
  if (d->testing_p)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,9 @@
+2012-06-29  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline.
+	2012-05-30  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+	* gcc.target/arm/neon-vrev.c: New.
+
 2012-06-28  Richard Guenther  <rguenther@suse.de>

 	PR middle-end/53790
--- a/gcc/testsuite/gcc.target/arm/neon-vrev.c
+++ b/gcc/testsuite/gcc.target/arm/neon-vrev.c
@ -0,0 +1,105 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+#include <arm_neon.h>
+
+uint16x4_t
+tst_vrev642_u16 (uint16x4_t __a)
+{
+  uint16x4_t __rv;
+  uint16x4_t __mask1 = { 3, 2, 1, 0};
+  return __builtin_shuffle ( __a, __mask1) ;
+}
+
+uint16x8_t
+tst_vrev64q2_u16 (uint16x8_t __a)
+{
+  uint16x8_t __rv;
+  uint16x8_t __mask1 = {3, 2, 1, 0, 7, 6, 5, 4 };
+  return __builtin_shuffle ( __a, __mask1) ;
+}
+
+uint8x8_t
+tst_vrev642_u8 (uint8x8_t __a)
+{
+  uint8x8_t __rv;
+  uint8x8_t __mask1 = { 7, 6, 5, 4, 3, 2, 1, 0};
+  return __builtin_shuffle ( __a, __mask1) ;
+}
+
+uint8x16_t
+tst_vrev64q2_u8 (uint8x16_t __a)
+{
+  uint8x16_t __rv;
+  uint8x16_t __mask1 = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8};
+  return __builtin_shuffle ( __a, __mask1) ;
+
+}
+
+uint32x2_t
+tst_vrev642_u32 (uint32x2_t __a)
+{
+  uint32x2_t __rv;
+  uint32x2_t __mask1 = {1, 0};
+  return __builtin_shuffle ( __a, __mask1) ;
+
+}
+
+uint32x4_t
+tst_vrev64q2_u32 (uint32x4_t __a)
+{
+  uint32x4_t __rv;
+  uint32x4_t __mask1 = {1, 0, 3, 2};
+  return __builtin_shuffle ( __a, __mask1) ;
+}
+
+uint16x4_t
+tst_vrev322_u16 (uint16x4_t __a)
+{
+  uint16x4_t __mask1 = { 1, 0, 3, 2 };
+  return __builtin_shuffle (__a, __mask1);
+}
+
+uint16x8_t
+tst_vrev32q2_u16 (uint16x8_t __a)
+{
+  uint16x8_t __mask1 = { 1, 0, 3, 2, 5, 4, 7, 6 }; 
+  return __builtin_shuffle (__a, __mask1);
+}
+
+uint8x8_t
+tst_vrev322_u8 (uint8x8_t __a)
+{
+  uint8x8_t __mask1 = { 3, 2, 1, 0, 7, 6, 5, 4};
+  return __builtin_shuffle (__a, __mask1);
+}
+
+uint8x16_t
+tst_vrev32q2_u8 (uint8x16_t __a)
+{
+  uint8x16_t __mask1 = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12};
+  return __builtin_shuffle (__a, __mask1);
+}
+
+uint8x8_t
+tst_vrev162_u8 (uint8x8_t __a)
+{
+  uint8x8_t __mask = { 1, 0, 3, 2, 5, 4, 7, 6};
+  return __builtin_shuffle (__a, __mask);
+}
+
+uint8x16_t
+tst_vrev16q2_u8 (uint8x16_t __a)
+{
+  uint8x16_t __mask = { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
+  return __builtin_shuffle (__a, __mask);
+}
+
+/* { dg-final {scan-assembler-times "vrev32\.16\\t" 2} }  */
+/* { dg-final {scan-assembler-times "vrev32\.8\\t" 2} }  */ 
+/* { dg-final {scan-assembler-times "vrev16\.8\\t" 2} }  */
+/* { dg-final {scan-assembler-times "vrev64\.8\\t" 2} }  */
+/* { dg-final {scan-assembler-times "vrev64\.32\\t" 2} }  */
+/* { dg-final {scan-assembler-times "vrev64\.16\\t" 2} }  */