[AArch64] VDUP Testcases

This patch adds vdup intrinsic testcases for AArch64. those testcases
are nice to have, as it allows to reason about vdup consistency for
both LE and BE compiler flavors.

From-SVN: r209713
This commit is contained in:
Alex Velenko 2014-04-23 17:02:49 +00:00 committed by Marcus Shawcroft
parent 36e170203e
commit 0e4d63c5ee
4 changed files with 1398 additions and 0 deletions

View File

@ -1,3 +1,9 @@
2014-04-23 Alex Velenko <Alex.Velenko@arm.com>
* gcc.target/aarch64/vdup_lane_1.c: New testcase.
* gcc.target/aarch64/vdup_lane_2.c: New testcase.
* gcc.target/aarch64/vdup_n_1.c: New testcase.
2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/arm/rev16.c: New test.

View File

@ -0,0 +1,430 @@
/* Test vdup_lane intrinsics work correctly. */
/* { dg-do run } */
/* { dg-options "--save-temps -O1" } */
#include <arm_neon.h>
extern void abort (void);
float32x2_t __attribute__ ((noinline))
wrap_vdup_lane_f32_0 (float32x2_t a)
{
return vdup_lane_f32 (a, 0);
}
float32x2_t __attribute__ ((noinline))
wrap_vdup_lane_f32_1 (float32x2_t a)
{
return vdup_lane_f32 (a, 1);
}
int __attribute__ ((noinline))
test_vdup_lane_f32 ()
{
float32x2_t a;
float32x2_t b;
int i;
float32_t c[2] = { 0.0 , 3.14 };
float32_t d[2];
a = vld1_f32 (c);
b = wrap_vdup_lane_f32_0 (a);
vst1_f32 (d, b);
for (i = 0; i < 2; i++)
if (c[0] != d[i])
return 1;
b = wrap_vdup_lane_f32_1 (a);
vst1_f32 (d, b);
for (i = 0; i < 2; i++)
if (c[1] != d[i])
return 1;
return 0;
}
float32x4_t __attribute__ ((noinline))
wrap_vdupq_lane_f32_0 (float32x2_t a)
{
return vdupq_lane_f32 (a, 0);
}
float32x4_t __attribute__ ((noinline))
wrap_vdupq_lane_f32_1 (float32x2_t a)
{
return vdupq_lane_f32 (a, 1);
}
int __attribute__ ((noinline))
test_vdupq_lane_f32 ()
{
float32x2_t a;
float32x4_t b;
int i;
float32_t c[2] = { 0.0 , 3.14 };
float32_t d[4];
a = vld1_f32 (c);
b = wrap_vdupq_lane_f32_0 (a);
vst1q_f32 (d, b);
for (i = 0; i < 4; i++)
if (c[0] != d[i])
return 1;
b = wrap_vdupq_lane_f32_1 (a);
vst1q_f32 (d, b);
for (i = 0; i < 4; i++)
if (c[1] != d[i])
return 1;
return 0;
}
int8x8_t __attribute__ ((noinline))
wrap_vdup_lane_s8_0 (int8x8_t a)
{
return vdup_lane_s8 (a, 0);
}
int8x8_t __attribute__ ((noinline))
wrap_vdup_lane_s8_1 (int8x8_t a)
{
return vdup_lane_s8 (a, 1);
}
int __attribute__ ((noinline))
test_vdup_lane_s8 ()
{
int8x8_t a;
int8x8_t b;
int i;
/* Only two first cases are interesting. */
int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
int8_t d[8];
a = vld1_s8 (c);
b = wrap_vdup_lane_s8_0 (a);
vst1_s8 (d, b);
for (i = 0; i < 8; i++)
if (c[0] != d[i])
return 1;
b = wrap_vdup_lane_s8_1 (a);
vst1_s8 (d, b);
for (i = 0; i < 8; i++)
if (c[1] != d[i])
return 1;
return 0;
}
int8x16_t __attribute__ ((noinline))
wrap_vdupq_lane_s8_0 (int8x8_t a)
{
return vdupq_lane_s8 (a, 0);
}
int8x16_t __attribute__ ((noinline))
wrap_vdupq_lane_s8_1 (int8x8_t a)
{
return vdupq_lane_s8 (a, 1);
}
int __attribute__ ((noinline))
test_vdupq_lane_s8 ()
{
int8x8_t a;
int8x16_t b;
int i;
/* Only two first cases are interesting. */
int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
int8_t d[16];
a = vld1_s8 (c);
b = wrap_vdupq_lane_s8_0 (a);
vst1q_s8 (d, b);
for (i = 0; i < 16; i++)
if (c[0] != d[i])
return 1;
b = wrap_vdupq_lane_s8_1 (a);
vst1q_s8 (d, b);
for (i = 0; i < 16; i++)
if (c[1] != d[i])
return 1;
return 0;
}
int16x4_t __attribute__ ((noinline))
wrap_vdup_lane_s16_0 (int16x4_t a)
{
return vdup_lane_s16 (a, 0);
}
int16x4_t __attribute__ ((noinline))
wrap_vdup_lane_s16_1 (int16x4_t a)
{
return vdup_lane_s16 (a, 1);
}
int __attribute__ ((noinline))
test_vdup_lane_s16 ()
{
int16x4_t a;
int16x4_t b;
int i;
/* Only two first cases are interesting. */
int16_t c[4] = { 0, 1, 2, 3 };
int16_t d[4];
a = vld1_s16 (c);
b = wrap_vdup_lane_s16_0 (a);
vst1_s16 (d, b);
for (i = 0; i < 4; i++)
if (c[0] != d[i])
return 1;
b = wrap_vdup_lane_s16_1 (a);
vst1_s16 (d, b);
for (i = 0; i < 4; i++)
if (c[1] != d[i])
return 1;
return 0;
}
int16x8_t __attribute__ ((noinline))
wrap_vdupq_lane_s16_0 (int16x4_t a)
{
return vdupq_lane_s16 (a, 0);
}
int16x8_t __attribute__ ((noinline))
wrap_vdupq_lane_s16_1 (int16x4_t a)
{
return vdupq_lane_s16 (a, 1);
}
int __attribute__ ((noinline))
test_vdupq_lane_s16 ()
{
int16x4_t a;
int16x8_t b;
int i;
/* Only two first cases are interesting. */
int16_t c[4] = { 0, 1, 2, 3 };
int16_t d[8];
a = vld1_s16 (c);
b = wrap_vdupq_lane_s16_0 (a);
vst1q_s16 (d, b);
for (i = 0; i < 8; i++)
if (c[0] != d[i])
return 1;
b = wrap_vdupq_lane_s16_1 (a);
vst1q_s16 (d, b);
for (i = 0; i < 8; i++)
if (c[1] != d[i])
return 1;
return 0;
}
int32x2_t __attribute__ ((noinline))
wrap_vdup_lane_s32_0 (int32x2_t a)
{
return vdup_lane_s32 (a, 0);
}
int32x2_t __attribute__ ((noinline))
wrap_vdup_lane_s32_1 (int32x2_t a)
{
return vdup_lane_s32 (a, 1);
}
int __attribute__ ((noinline))
test_vdup_lane_s32 ()
{
int32x2_t a;
int32x2_t b;
int i;
int32_t c[2] = { 0, 1 };
int32_t d[2];
a = vld1_s32 (c);
b = wrap_vdup_lane_s32_0 (a);
vst1_s32 (d, b);
for (i = 0; i < 2; i++)
if (c[0] != d[i])
return 1;
b = wrap_vdup_lane_s32_1 (a);
vst1_s32 (d, b);
for (i = 0; i < 2; i++)
if (c[1] != d[i])
return 1;
return 0;
}
int32x4_t __attribute__ ((noinline))
wrap_vdupq_lane_s32_0 (int32x2_t a)
{
return vdupq_lane_s32 (a, 0);
}
int32x4_t __attribute__ ((noinline))
wrap_vdupq_lane_s32_1 (int32x2_t a)
{
return vdupq_lane_s32 (a, 1);
}
int __attribute__ ((noinline))
test_vdupq_lane_s32 ()
{
int32x2_t a;
int32x4_t b;
int i;
int32_t c[2] = { 0, 1 };
int32_t d[4];
a = vld1_s32 (c);
b = wrap_vdupq_lane_s32_0 (a);
vst1q_s32 (d, b);
for (i = 0; i < 4; i++)
if (c[0] != d[i])
return 1;
b = wrap_vdupq_lane_s32_1 (a);
vst1q_s32 (d, b);
for (i = 0; i < 4; i++)
if (c[1] != d[i])
return 1;
return 0;
}
int64x1_t __attribute__ ((noinline))
wrap_vdup_lane_s64_0 (int64x1_t a)
{
return vdup_lane_s64 (a, 0);
}
int64x1_t __attribute__ ((noinline))
wrap_vdup_lane_s64_1 (int64x1_t a)
{
return vdup_lane_s64 (a, 1);
}
int __attribute__ ((noinline))
test_vdup_lane_s64 ()
{
int64x1_t a;
int64x1_t b;
int64_t c[1];
int64_t d[1];
c[0] = 0;
a = vld1_s64 (c);
b = wrap_vdup_lane_s64_0 (a);
vst1_s64 (d, b);
if (c[0] != d[0])
return 1;
c[0] = 1;
a = vld1_s64 (c);
b = wrap_vdup_lane_s64_1 (a);
vst1_s64 (d, b);
if (c[0] != d[0])
return 1;
return 0;
}
int64x2_t __attribute__ ((noinline))
wrap_vdupq_lane_s64_0 (int64x1_t a)
{
return vdupq_lane_s64 (a, 0);
}
int64x2_t __attribute__ ((noinline))
wrap_vdupq_lane_s64_1 (int64x1_t a)
{
return vdupq_lane_s64 (a, 1);
}
int __attribute__ ((noinline))
test_vdupq_lane_s64 ()
{
int64x1_t a;
int64x2_t b;
int i;
int64_t c[1];
int64_t d[2];
c[0] = 0;
a = vld1_s64 (c);
b = wrap_vdupq_lane_s64_0 (a);
vst1q_s64 (d, b);
for (i = 0; i < 2; i++)
if (c[0] != d[i])
return 1;
c[0] = 1;
a = vld1_s64 (c);
b = wrap_vdupq_lane_s64_1 (a);
vst1q_s64 (d, b);
for (i = 0; i < 2; i++)
if (c[0] != d[i])
return 1;
return 0;
}
int
main ()
{
if (test_vdup_lane_f32 ())
abort ();
if (test_vdup_lane_s8 ())
abort ();
if (test_vdup_lane_s16 ())
abort ();
if (test_vdup_lane_s32 ())
abort ();
if (test_vdup_lane_s64 ())
abort ();
if (test_vdupq_lane_f32 ())
abort ();
if (test_vdupq_lane_s8 ())
abort ();
if (test_vdupq_lane_s16 ())
abort ();
if (test_vdupq_lane_s32 ())
abort ();
if (test_vdupq_lane_s64 ())
abort ();
return 0;
}
/* Asm check for test_vdup_lane_s8. */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */
/* Asm check for test_vdupq_lane_s8. */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */
/* Asm check for test_vdup_lane_s16. */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
/* Asm check for test_vdup_lane_s16. */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
/* Asm check for test_vdupq_lane_s16. */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
/* Asm check for test_vdupq_lane_s16. */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
/* Asm check for test_vdup_lane_f32 and test_vdup_lane_s32. */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
/* Asm check for test_vdupq_lane_f32 and test_vdupq_lane_s32. */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -0,0 +1,343 @@
/* Test vdup_lane intrinsics work correctly. */
/* { dg-do run } */
/* { dg-options "-O1 --save-temps" } */
#include <arm_neon.h>
#define force_simd(V1) asm volatile ("" \
: "=w"(V1) \
: "w"(V1) \
: /* No clobbers */)
extern void abort (void);
float32_t __attribute__ ((noinline))
wrap_vdups_lane_f32_0 (float32x2_t dummy, float32x2_t a)
{
return vdups_lane_f32 (a, 0);
}
float32_t __attribute__ ((noinline))
wrap_vdups_lane_f32_1 (float32x2_t a)
{
return vdups_lane_f32 (a, 1);
}
int __attribute__ ((noinline))
test_vdups_lane_f32 ()
{
float32x2_t a;
float32_t b;
float32_t c[2] = { 0.0, 1.0 };
a = vld1_f32 (c);
b = wrap_vdups_lane_f32_0 (a, a);
if (c[0] != b)
return 1;
b = wrap_vdups_lane_f32_1 (a);
if (c[1] != b)
return 1;
return 0;
}
float64_t __attribute__ ((noinline))
wrap_vdupd_lane_f64_0 (float64x1_t dummy, float64x1_t a)
{
return vdupd_lane_f64 (a, 0);
}
int __attribute__ ((noinline))
test_vdupd_lane_f64 ()
{
float64x1_t a;
float64_t b;
float64_t c[1] = { 0.0 };
a = vld1_f64 (c);
b = wrap_vdupd_lane_f64_0 (a, a);
if (c[0] != b)
return 1;
return 0;
}
int8_t __attribute__ ((noinline))
wrap_vdupb_lane_s8_0 (int8x8_t dummy, int8x8_t a)
{
int8_t result = vdupb_lane_s8 (a, 0);
force_simd (result);
return result;
}
int8_t __attribute__ ((noinline))
wrap_vdupb_lane_s8_1 (int8x8_t a)
{
int8_t result = vdupb_lane_s8 (a, 1);
force_simd (result);
return result;
}
int __attribute__ ((noinline))
test_vdupb_lane_s8 ()
{
int8x8_t a;
int8_t b;
int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
a = vld1_s8 (c);
b = wrap_vdupb_lane_s8_0 (a, a);
if (c[0] != b)
return 1;
b = wrap_vdupb_lane_s8_1 (a);
if (c[1] != b)
return 1;
return 0;
}
uint8_t __attribute__ ((noinline))
wrap_vdupb_lane_u8_0 (uint8x8_t dummy, uint8x8_t a)
{
uint8_t result = vdupb_lane_u8 (a, 0);
force_simd (result);
return result;
}
uint8_t __attribute__ ((noinline))
wrap_vdupb_lane_u8_1 (uint8x8_t a)
{
uint8_t result = vdupb_lane_u8 (a, 1);
force_simd (result);
return result;
}
int __attribute__ ((noinline))
test_vdupb_lane_u8 ()
{
uint8x8_t a;
uint8_t b;
uint8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
a = vld1_u8 (c);
b = wrap_vdupb_lane_u8_0 (a, a);
if (c[0] != b)
return 1;
b = wrap_vdupb_lane_u8_1 (a);
if (c[1] != b)
return 1;
return 0;
}
int16_t __attribute__ ((noinline))
wrap_vduph_lane_s16_0 (int16x4_t dummy, int16x4_t a)
{
int16_t result = vduph_lane_s16 (a, 0);
force_simd (result);
return result;
}
int16_t __attribute__ ((noinline))
wrap_vduph_lane_s16_1 (int16x4_t a)
{
int16_t result = vduph_lane_s16 (a, 1);
force_simd (result);
return result;
}
int __attribute__ ((noinline))
test_vduph_lane_s16 ()
{
int16x4_t a;
int16_t b;
int16_t c[4] = { 0, 1, 2, 3 };
a = vld1_s16 (c);
b = wrap_vduph_lane_s16_0 (a, a);
if (c[0] != b)
return 1;
b = wrap_vduph_lane_s16_1 (a);
if (c[1] != b)
return 1;
return 0;
}
uint16_t __attribute__ ((noinline))
wrap_vduph_lane_u16_0 (uint16x4_t dummy, uint16x4_t a)
{
uint16_t result = vduph_lane_u16 (a, 0);
force_simd (result);
return result;
}
uint16_t __attribute__ ((noinline))
wrap_vduph_lane_u16_1 (uint16x4_t a)
{
uint16_t result = vduph_lane_u16 (a, 1);
force_simd (result);
return result;
}
int __attribute__ ((noinline))
test_vduph_lane_u16 ()
{
uint16x4_t a;
uint16_t b;
uint16_t c[4] = { 0, 1, 2, 3 };
a = vld1_u16 (c);
b = wrap_vduph_lane_u16_0 (a, a);
if (c[0] != b)
return 1;
b = wrap_vduph_lane_u16_1 (a);
if (c[1] != b)
return 1;
return 0;
}
int32_t __attribute__ ((noinline))
wrap_vdups_lane_s32_0 (int32x2_t dummy, int32x2_t a)
{
int32_t result = vdups_lane_s32 (a, 0);
force_simd (result);
return result;
}
int32_t __attribute__ ((noinline))
wrap_vdups_lane_s32_1 (int32x2_t a)
{
int32_t result = vdups_lane_s32 (a, 1);
force_simd (result);
return result;
}
int __attribute__ ((noinline))
test_vdups_lane_s32 ()
{
int32x2_t a;
int32_t b;
int32_t c[2] = { 0, 1 };
a = vld1_s32 (c);
b = wrap_vdups_lane_s32_0 (vcreate_s32 (0), a);
if (c[0] != b)
return 1;
b = wrap_vdups_lane_s32_1 (a);
if (c[1] != b)
return 1;
return 0;
}
uint32_t __attribute__ ((noinline))
wrap_vdups_lane_u32_0 (uint32x2_t dummy, uint32x2_t a)
{
uint32_t result = vdups_lane_u32 (a, 0);
force_simd (result);
return result;
}
uint32_t __attribute__ ((noinline))
wrap_vdups_lane_u32_1 (uint32x2_t a)
{
uint32_t result = vdups_lane_u32 (a, 1);
force_simd (result);
return result;
}
int __attribute__ ((noinline))
test_vdups_lane_u32 ()
{
uint32x2_t a;
uint32_t b;
uint32_t c[2] = { 0, 1 };
a = vld1_u32 (c);
b = wrap_vdups_lane_u32_0 (a, a);
if (c[0] != b)
return 1;
b = wrap_vdups_lane_u32_1 (a);
if (c[1] != b)
return 1;
return 0;
}
uint64_t __attribute__ ((noinline))
wrap_vdupd_lane_u64_0 (uint64x1_t dummy, uint64x1_t a)
{
return vdupd_lane_u64 (a, 0);;
}
int __attribute__ ((noinline))
test_vdupd_lane_u64 ()
{
uint64x1_t a;
uint64_t b;
uint64_t c[1] = { 0 };
a = vld1_u64 (c);
b = wrap_vdupd_lane_u64_0 (a, a);
if (c[0] != b)
return 1;
return 0;
}
int64_t __attribute__ ((noinline))
wrap_vdupd_lane_s64_0 (uint64x1_t dummy, int64x1_t a)
{
return vdupd_lane_u64 (a, 0);
}
int __attribute__ ((noinline))
test_vdupd_lane_s64 ()
{
int64x1_t a;
int64_t b;
int64_t c[1] = { 0 };
a = vld1_s64 (c);
b = wrap_vdupd_lane_s64_0 (a, a);
if (c[0] != b)
return 1;
return 0;
}
int
main ()
{
if (test_vdups_lane_f32 ())
abort ();
if (test_vdupd_lane_f64 ())
abort ();
if (test_vdupb_lane_s8 ())
abort ();
if (test_vdupb_lane_u8 ())
abort ();
if (test_vduph_lane_s16 ())
abort ();
if (test_vduph_lane_u16 ())
abort ();
if (test_vdups_lane_s32 ())
abort ();
if (test_vdups_lane_u32 ())
abort ();
if (test_vdupd_lane_s64 ())
abort ();
if (test_vdupd_lane_u64 ())
abort ();
return 0;
}
/* Asm check for vdupb_lane_s8, vdupb_lane_u8. */
/* { dg-final { scan-assembler-not "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[0\\\]" } } */
/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[1\\\]" 2 } } */
/* Asm check for vduph_lane_h16, vduph_lane_h16. */
/* { dg-final { scan-assembler-not "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[0\\\]" } } */
/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[1\\\]" 2 } } */
/* Asm check for vdups_lane_f32, vdups_lane_s32, vdups_lane_u32. */
/* Can't generate "dup s<n>, v<m>[0]" for vdups_lane_s32 and vdups_lane_u32. */
/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[0\\\]" 1} } */
/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[1\\\]" 3 } } */
/* Asm check for vdupd_lane_f64, vdupd_lane_s64, vdupd_lane_u64. */
/* Attempts to make the compiler generate vdupd are not practical. */
/* { dg-final { scan-assembler-not "dup\\td\[0-9\]+, v\[0-9\]+\.d\\\[0\\\]" } }
/* { dg-final { cleanup-saved-temps } } */

View File

@ -0,0 +1,619 @@
/* Test vdup_lane intrinsics work correctly. */
/* { dg-do run } */
/* { dg-options "-O1 --save-temps" } */
#include <arm_neon.h>
extern void abort (void);
float32x2_t __attribute__ ((noinline))
wrap_vdup_n_f32 (float32_t a)
{
return vdup_n_f32 (a);
}
int __attribute__ ((noinline))
test_vdup_n_f32 ()
{
float32_t a = 1.0;
float32x2_t b;
float32_t c[2];
int i;
b = wrap_vdup_n_f32 (a);
vst1_f32 (c, b);
for (i = 0; i < 2; i++)
if (a != c[i])
return 1;
return 0;
}
float32x4_t __attribute__ ((noinline))
wrap_vdupq_n_f32 (float32_t a)
{
return vdupq_n_f32 (a);
}
int __attribute__ ((noinline))
test_vdupq_n_f32 ()
{
float32_t a = 1.0;
float32x4_t b;
float32_t c[4];
int i;
b = wrap_vdupq_n_f32 (a);
vst1q_f32 (c, b);
for (i = 0; i < 4; i++)
if (a != c[i])
return 1;
return 0;
}
float64x1_t __attribute__ ((noinline))
wrap_vdup_n_f64 (float64_t a)
{
return vdup_n_f64 (a);
}
int __attribute__ ((noinline))
test_vdup_n_f64 ()
{
float64_t a = 1.0;
float64x1_t b;
float64_t c[1];
int i;
b = wrap_vdup_n_f64 (a);
vst1_f64 (c, b);
for (i = 0; i < 1; i++)
if (a != c[i])
return 1;
return 0;
}
float64x2_t __attribute__ ((noinline))
wrap_vdupq_n_f64 (float64_t a)
{
return vdupq_n_f64 (a);
}
int __attribute__ ((noinline))
test_vdupq_n_f64 ()
{
float64_t a = 1.0;
float64x2_t b;
float64_t c[2];
int i;
b = wrap_vdupq_n_f64 (a);
vst1q_f64 (c, b);
for (i = 0; i < 2; i++)
if (a != c[i])
return 1;
return 0;
}
poly8x8_t __attribute__ ((noinline))
wrap_vdup_n_p8 (poly8_t a)
{
return vdup_n_p8 (a);
}
int __attribute__ ((noinline))
test_vdup_n_p8 ()
{
poly8_t a = 1;
poly8x8_t b;
poly8_t c[8];
int i;
b = wrap_vdup_n_p8 (a);
vst1_p8 (c, b);
for (i = 0; i < 8; i++)
if (a != c[i])
return 1;
return 0;
}
poly8x16_t __attribute__ ((noinline))
wrap_vdupq_n_p8 (poly8_t a)
{
return vdupq_n_p8 (a);
}
int __attribute__ ((noinline))
test_vdupq_n_p8 ()
{
poly8_t a = 1;
poly8x16_t b;
poly8_t c[16];
int i;
b = wrap_vdupq_n_p8 (a);
vst1q_p8 (c, b);
for (i = 0; i < 16; i++)
if (a != c[i])
return 1;
return 0;
}
int8x8_t __attribute__ ((noinline))
wrap_vdup_n_s8 (int8_t a)
{
return vdup_n_s8 (a);
}
int __attribute__ ((noinline))
test_vdup_n_s8 ()
{
int8_t a = 1;
int8x8_t b;
int8_t c[8];
int i;
b = wrap_vdup_n_s8 (a);
vst1_s8 (c, b);
for (i = 0; i < 8; i++)
if (a != c[i])
return 1;
return 0;
}
int8x16_t __attribute__ ((noinline))
wrap_vdupq_n_s8 (int8_t a)
{
return vdupq_n_s8 (a);
}
int __attribute__ ((noinline))
test_vdupq_n_s8 ()
{
int8_t a = 1;
int8x16_t b;
int8_t c[16];
int i;
b = wrap_vdupq_n_s8 (a);
vst1q_s8 (c, b);
for (i = 0; i < 16; i++)
if (a != c[i])
return 1;
return 0;
}
uint8x8_t __attribute__ ((noinline))
wrap_vdup_n_u8 (uint8_t a)
{
return vdup_n_u8 (a);
}
int __attribute__ ((noinline))
test_vdup_n_u8 ()
{
uint8_t a = 1;
uint8x8_t b;
uint8_t c[8];
int i;
b = wrap_vdup_n_u8 (a);
vst1_u8 (c, b);
for (i = 0; i < 8; i++)
if (a != c[i])
return 1;
return 0;
}
uint8x16_t __attribute__ ((noinline))
wrap_vdupq_n_u8 (uint8_t a)
{
return vdupq_n_u8 (a);
}
int __attribute__ ((noinline))
test_vdupq_n_u8 ()
{
uint8_t a = 1;
uint8x16_t b;
uint8_t c[16];
int i;
b = wrap_vdupq_n_u8 (a);
vst1q_u8 (c, b);
for (i = 0; i < 16; i++)
if (a != c[i])
return 1;
return 0;
}
poly16x4_t __attribute__ ((noinline))
wrap_vdup_n_p16 (poly16_t a)
{
return vdup_n_p16 (a);
}
int __attribute__ ((noinline))
test_vdup_n_p16 ()
{
poly16_t a = 1;
poly16x4_t b;
poly16_t c[4];
int i;
b = wrap_vdup_n_p16 (a);
vst1_p16 (c, b);
for (i = 0; i < 4; i++)
if (a != c[i])
return 1;
return 0;
}
poly16x8_t __attribute__ ((noinline))
wrap_vdupq_n_p16 (poly16_t a)
{
return vdupq_n_p16 (a);
}
int __attribute__ ((noinline))
test_vdupq_n_p16 ()
{
poly16_t a = 1;
poly16x8_t b;
poly16_t c[8];
int i;
b = wrap_vdupq_n_p16 (a);
vst1q_p16 (c, b);
for (i = 0; i < 8; i++)
if (a != c[i])
return 1;
return 0;
}
int16x4_t __attribute__ ((noinline))
wrap_vdup_n_s16 (int16_t a)
{
return vdup_n_s16 (a);
}
int __attribute__ ((noinline))
test_vdup_n_s16 ()
{
int16_t a = 1;
int16x4_t b;
int16_t c[4];
int i;
b = wrap_vdup_n_s16 (a);
vst1_s16 (c, b);
for (i = 0; i < 4; i++)
if (a != c[i])
return 1;
return 0;
}
int16x8_t __attribute__ ((noinline))
wrap_vdupq_n_s16 (int16_t a)
{
return vdupq_n_s16 (a);
}
int __attribute__ ((noinline))
test_vdupq_n_s16 ()
{
int16_t a = 1;
int16x8_t b;
int16_t c[8];
int i;
b = wrap_vdupq_n_s16 (a);
vst1q_s16 (c, b);
for (i = 0; i < 8; i++)
if (a != c[i])
return 1;
return 0;
}
uint16x4_t __attribute__ ((noinline))
wrap_vdup_n_u16 (uint16_t a)
{
return vdup_n_u16 (a);
}
int __attribute__ ((noinline))
test_vdup_n_u16 ()
{
uint16_t a = 1;
uint16x4_t b;
uint16_t c[4];
int i;
b = wrap_vdup_n_u16 (a);
vst1_u16 (c, b);
for (i = 0; i < 4; i++)
if (a != c[i])
return 1;
return 0;
}
uint16x8_t __attribute__ ((noinline))
wrap_vdupq_n_u16 (uint16_t a)
{
return vdupq_n_u16 (a);
}
int __attribute__ ((noinline))
test_vdupq_n_u16 ()
{
uint16_t a = 1;
uint16x8_t b;
uint16_t c[8];
int i;
b = wrap_vdupq_n_u16 (a);
vst1q_u16 (c, b);
for (i = 0; i < 8; i++)
if (a != c[i])
return 1;
return 0;
}
int32x2_t __attribute__ ((noinline))
wrap_vdup_n_s32 (int32_t a)
{
return vdup_n_s32 (a);
}
int __attribute__ ((noinline))
test_vdup_n_s32 ()
{
int32_t a = 1;
int32x2_t b;
int32_t c[2];
int i;
b = wrap_vdup_n_s32 (a);
vst1_s32 (c, b);
for (i = 0; i < 2; i++)
if (a != c[i])
return 1;
return 0;
}
int32x4_t __attribute__ ((noinline))
wrap_vdupq_n_s32 (int32_t a)
{
return vdupq_n_s32 (a);
}
int __attribute__ ((noinline))
test_vdupq_n_s32 ()
{
int32_t a = 1;
int32x4_t b;
int32_t c[4];
int i;
b = wrap_vdupq_n_s32 (a);
vst1q_s32 (c, b);
for (i = 0; i < 4; i++)
if (a != c[i])
return 1;
return 0;
}
uint32x2_t __attribute__ ((noinline))
wrap_vdup_n_u32 (uint32_t a)
{
return vdup_n_u32 (a);
}
int __attribute__ ((noinline))
test_vdup_n_u32 ()
{
uint32_t a = 1;
uint32x2_t b;
uint32_t c[2];
int i;
b = wrap_vdup_n_u32 (a);
vst1_u32 (c, b);
for (i = 0; i < 2; i++)
if (a != c[i])
return 1;
return 0;
}
uint32x4_t __attribute__ ((noinline))
wrap_vdupq_n_u32 (uint32_t a)
{
return vdupq_n_u32 (a);
}
int __attribute__ ((noinline))
test_vdupq_n_u32 ()
{
uint32_t a = 1;
uint32x4_t b;
uint32_t c[4];
int i;
b = wrap_vdupq_n_u32 (a);
vst1q_u32 (c, b);
for (i = 0; i < 4; i++)
if (a != c[i])
return 1;
return 0;
}
int64x1_t __attribute__ ((noinline))
wrap_vdup_n_s64 (int64_t a)
{
return vdup_n_s64 (a);
}
int __attribute__ ((noinline))
test_vdup_n_s64 ()
{
int64_t a = 1;
int64x1_t b;
int64_t c[1];
int i;
b = wrap_vdup_n_s64 (a);
vst1_s64 (c, b);
for (i = 0; i < 1; i++)
if (a != c[i])
return 1;
return 0;
}
int64x2_t __attribute__ ((noinline))
wrap_vdupq_n_s64 (int64_t a)
{
return vdupq_n_s64 (a);
}
int __attribute__ ((noinline))
test_vdupq_n_s64 ()
{
int64_t a = 1;
int64x2_t b;
int64_t c[2];
int i;
b = wrap_vdupq_n_s64 (a);
vst1q_s64 (c, b);
for (i = 0; i < 2; i++)
if (a != c[i])
return 1;
return 0;
}
uint64x1_t __attribute__ ((noinline))
wrap_vdup_n_u64 (uint64_t a)
{
return vdup_n_u64 (a);
}
int __attribute__ ((noinline))
test_vdup_n_u64 ()
{
uint64_t a = 1;
uint64x1_t b;
uint64_t c[1];
int i;
b = wrap_vdup_n_u64 (a);
vst1_u64 (c, b);
for (i = 0; i < 1; i++)
if (a != c[i])
return 1;
return 0;
}
uint64x2_t __attribute__ ((noinline))
wrap_vdupq_n_u64 (uint64_t a)
{
return vdupq_n_u64 (a);
}
int __attribute__ ((noinline))
test_vdupq_n_u64 ()
{
uint64_t a = 1;
uint64x2_t b;
uint64_t c[2];
int i;
b = wrap_vdupq_n_u64 (a);
vst1q_u64 (c, b);
for (i = 0; i < 2; i++)
if (a != c[i])
return 1;
return 0;
}
int
main ()
{
if (test_vdup_n_f32 ())
abort ();
if (test_vdup_n_f64 ())
abort ();
if (test_vdup_n_p8 ())
abort ();
if (test_vdup_n_u8 ())
abort ();
if (test_vdup_n_s8 ())
abort ();
if (test_vdup_n_p16 ())
abort ();
if (test_vdup_n_s16 ())
abort ();
if (test_vdup_n_u16 ())
abort ();
if (test_vdup_n_s32 ())
abort ();
if (test_vdup_n_u32 ())
abort ();
if (test_vdup_n_s64 ())
abort ();
if (test_vdup_n_u64 ())
abort ();
if (test_vdupq_n_f32 ())
abort ();
if (test_vdupq_n_f64 ())
abort ();
if (test_vdupq_n_p8 ())
abort ();
if (test_vdupq_n_u8 ())
abort ();
if (test_vdupq_n_s8 ())
abort ();
if (test_vdupq_n_p16 ())
abort ();
if (test_vdupq_n_s16 ())
abort ();
if (test_vdupq_n_u16 ())
abort ();
if (test_vdupq_n_s32 ())
abort ();
if (test_vdupq_n_u32 ())
abort ();
if (test_vdupq_n_s64 ())
abort ();
if (test_vdupq_n_u64 ())
abort ();
return 0;
}
/* No asm checks for vdup_n_f32, vdupq_n_f32, vdup_n_f64 and vdupq_n_f64.
Cannot force floating point value in general purpose regester. */
/* Asm check for test_vdup_n_p8, test_vdup_n_s8, test_vdup_n_u8. */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, w\[0-9\]+" 3 } } */
/* Asm check for test_vdupq_n_p8, test_vdupq_n_s8, test_vdupq_n_u8. */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, w\[0-9\]+" 3 } } */
/* Asm check for test_vdup_n_p16, test_vdup_n_s16, test_vdup_n_u16. */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, w\[0-9\]+" 3 } } */
/* Asm check for test_vdupq_n_p16, test_vdupq_n_s16, test_vdupq_n_u16. */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, w\[0-9\]+" 3 } } */
/* Asm check for test_vdup_n_s32, test_vdup_n_u32. */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, w\[0-9\]+" 2 } } */
/* Asm check for test_vdupq_n_s32, test_vdupq_n_u32. */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, w\[0-9\]+" 2 } } */
/* Asm check for test_vdup_n_s64, test_vdup_n_u64 are left out.
Attempts to make the compiler generate "dup\\td\[0-9\]+, x\[0-9\]+"
are not practical. */
/* Asm check for test_vdupq_n_s64, test_vdupq_n_u64. */
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, x\[0-9\]+" 2 } } */
/* { dg-final { cleanup-saved-temps } } */