[ARM,AArch64] more poly64 intrinsics and tests
2017-02-06 Christophe Lyon <christophe.lyon@linaro.org> gcc/ * config/aarch64/arm_neon.h (vtst_p8): Rewrite without asm. (vtst_p16): Likewise. (vtstq_p8): Likewise. (vtstq_p16): Likewise. (vtst_p64): New. (vtstq_p64): Likewise. * config/arm/arm_neon.h (vgetq_lane_p64): New. (vset_lane_p64): New. (vsetq_lane_p64): New. 2017-02-06 Christophe Lyon <christophe.lyon@linaro.org> gcc/testsuite/ * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c (vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1): New. (vmov_n_expected0, vmov_n_expected1, vmov_n_expected2) (expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0) (expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0) (expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3) (vtst_expected_poly64x2): Move to aarch64-only section. (vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64) (vtst_p64, vtstq_p64): New tests. From-SVN: r245328
This commit is contained in:
parent
a7c8ed0c36
commit
052ef81d2d
|
@ -1,3 +1,15 @@
|
|||
2017-02-10 Christophe Lyon <christophe.lyon@linaro.org>
|
||||
|
||||
* config/aarch64/arm_neon.h (vtst_p8): Rewrite without asm.
|
||||
(vtst_p16): Likewise.
|
||||
(vtstq_p8): Likewise.
|
||||
(vtstq_p16): Likewise.
|
||||
(vtst_p64): New.
|
||||
(vtstq_p64): Likewise.
|
||||
* config/arm/arm_neon.h (vgetq_lane_p64): New.
|
||||
(vset_lane_p64): New.
|
||||
(vsetq_lane_p64): New.
|
||||
|
||||
2017-02-10 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/79411
|
||||
|
|
|
@ -10862,48 +10862,47 @@ __extension__ extern __inline uint8x8_t
|
|||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vtst_p8 (poly8x8_t a, poly8x8_t b)
|
||||
{
|
||||
uint8x8_t result;
|
||||
__asm__ ("cmtst %0.8b, %1.8b, %2.8b"
|
||||
: "=w"(result)
|
||||
: "w"(a), "w"(b)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
return (uint8x8_t) ((((uint8x8_t) a) & ((uint8x8_t) b))
|
||||
!= 0);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vtst_p16 (poly16x4_t a, poly16x4_t b)
|
||||
{
|
||||
uint16x4_t result;
|
||||
__asm__ ("cmtst %0.4h, %1.4h, %2.4h"
|
||||
: "=w"(result)
|
||||
: "w"(a), "w"(b)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
return (uint16x4_t) ((((uint16x4_t) a) & ((uint16x4_t) b))
|
||||
!= 0);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vtst_p64 (poly64x1_t a, poly64x1_t b)
|
||||
{
|
||||
return (uint64x1_t) ((a & b) != __AARCH64_INT64_C (0));
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint8x16_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vtstq_p8 (poly8x16_t a, poly8x16_t b)
|
||||
{
|
||||
uint8x16_t result;
|
||||
__asm__ ("cmtst %0.16b, %1.16b, %2.16b"
|
||||
: "=w"(result)
|
||||
: "w"(a), "w"(b)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
return (uint8x16_t) ((((uint8x16_t) a) & ((uint8x16_t) b))
|
||||
!= 0);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vtstq_p16 (poly16x8_t a, poly16x8_t b)
|
||||
{
|
||||
uint16x8_t result;
|
||||
__asm__ ("cmtst %0.8h, %1.8h, %2.8h"
|
||||
: "=w"(result)
|
||||
: "w"(a), "w"(b)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
return (uint16x8_t) ((((uint16x8_t) a) & ((uint16x8_t) b))
|
||||
!= 0);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint64x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vtstq_p64 (poly64x2_t a, poly64x2_t b)
|
||||
{
|
||||
return (uint64x2_t) ((((uint64x2_t) a) & ((uint64x2_t) b))
|
||||
!= __AARCH64_INT64_C (0));
|
||||
}
|
||||
|
||||
/* End of temporary inline asm implementations. */
|
||||
|
|
|
@ -6309,6 +6309,16 @@ vgetq_lane_s64 (int64x2_t __a, const int __b)
|
|||
return (int64_t)__builtin_neon_vget_lanev2di (__a, __b);
|
||||
}
|
||||
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("fpu=crypto-neon-fp-armv8")
|
||||
__extension__ extern __inline poly64_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vgetq_lane_p64 (poly64x2_t __a, const int __b)
|
||||
{
|
||||
return (poly64_t)__builtin_neon_vget_lanev2di ((int64x2_t) __a, __b);
|
||||
}
|
||||
|
||||
#pragma GCC pop_options
|
||||
__extension__ extern __inline uint64_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vgetq_lane_u64 (uint64x2_t __a, const int __b)
|
||||
|
@ -6405,6 +6415,16 @@ vset_lane_u64 (uint64_t __a, uint64x1_t __b, const int __c)
|
|||
return (uint64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, (int64x1_t) __b, __c);
|
||||
}
|
||||
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("fpu=crypto-neon-fp-armv8")
|
||||
__extension__ extern __inline poly64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vset_lane_p64 (poly64_t __a, poly64x1_t __b, const int __c)
|
||||
{
|
||||
return (poly64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, (int64x1_t) __b, __c);
|
||||
}
|
||||
|
||||
#pragma GCC pop_options
|
||||
__extension__ extern __inline int8x16_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __c)
|
||||
|
@ -6496,6 +6516,13 @@ vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __c)
|
|||
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("fpu=crypto-neon-fp-armv8")
|
||||
__extension__ extern __inline poly64x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vsetq_lane_p64 (poly64_t __a, poly64x2_t __b, const int __c)
|
||||
{
|
||||
return (poly64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, (int64x2_t) __b, __c);
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcreate_p64 (uint64_t __a)
|
||||
|
|
|
@ -1,3 +1,16 @@
|
|||
2017-02-10 Christophe Lyon <christophe.lyon@linaro.org>
|
||||
|
||||
* gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
|
||||
(vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):
|
||||
New.
|
||||
(vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)
|
||||
(expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)
|
||||
(expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)
|
||||
(expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)
|
||||
(vtst_expected_poly64x2): Move to aarch64-only section.
|
||||
(vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)
|
||||
(vtst_p64, vtstq_p64): New tests.
|
||||
|
||||
2017-02-10 Paolo Carlini <paolo.carlini@oracle.com>
|
||||
|
||||
PR c++/71737
|
||||
|
|
|
@ -39,17 +39,6 @@ VECT_VAR_DECL(vdup_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
|
|||
VECT_VAR_DECL(vdup_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
|
||||
0xfffffffffffffff2 };
|
||||
|
||||
/* Expected results: vmov_n. */
|
||||
VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
|
||||
VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
|
||||
0xfffffffffffffff0 };
|
||||
VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
|
||||
VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
|
||||
0xfffffffffffffff1 };
|
||||
VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
|
||||
VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
|
||||
0xfffffffffffffff2 };
|
||||
|
||||
/* Expected results: vext. */
|
||||
VECT_VAR_DECL(vext_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
|
||||
VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
|
||||
|
@ -124,6 +113,29 @@ VECT_VAR_DECL(vst1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
|
|||
VECT_VAR_DECL(vst1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0,
|
||||
0x3333333333333333 };
|
||||
|
||||
/* Expected results: vget_lane. */
|
||||
VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
|
||||
VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
|
||||
|
||||
/* Expected results: vset_lane. */
|
||||
VECT_VAR_DECL(vset_lane_expected,poly,64,1) [] = { 0x88 };
|
||||
VECT_VAR_DECL(vset_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x11 };
|
||||
|
||||
/* Expected results: vtst. */
|
||||
VECT_VAR_DECL(vtst_expected,uint,64,1) [] = { 0x0 };
|
||||
|
||||
#ifdef __aarch64__
|
||||
/* Expected results: vmov_n. */
|
||||
VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
|
||||
VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
|
||||
0xfffffffffffffff0 };
|
||||
VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
|
||||
VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
|
||||
0xfffffffffffffff1 };
|
||||
VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
|
||||
VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
|
||||
0xfffffffffffffff2 };
|
||||
|
||||
/* Expected results: vldX_lane. */
|
||||
VECT_VAR_DECL(expected_vld_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
|
||||
VECT_VAR_DECL(expected_vld_st2_0,poly,64,2) [] = { 0xfffffffffffffff0,
|
||||
|
@ -153,9 +165,9 @@ VECT_VAR_DECL(expected_vld_st4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
|
|||
VECT_VAR_DECL(expected_vld_st4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
|
||||
0xaaaaaaaaaaaaaaaa };
|
||||
|
||||
/* Expected results: vget_lane. */
|
||||
VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
|
||||
VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
|
||||
/* Expected results: vtst. */
|
||||
VECT_VAR_DECL(vtst_expected,uint,64,2) [] = { 0x0, 0xffffffffffffffff };
|
||||
#endif
|
||||
|
||||
int main (void)
|
||||
{
|
||||
|
@ -727,7 +739,105 @@ int main (void)
|
|||
CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vst1_lane_expected, "");
|
||||
CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vst1_lane_expected, "");
|
||||
|
||||
/* vget_lane_p64 tests. */
|
||||
#undef TEST_MSG
|
||||
#define TEST_MSG "VGET_LANE/VGETQ_LANE"
|
||||
|
||||
#define TEST_VGET_LANE(Q, T1, T2, W, N, L) \
|
||||
VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vget_lane_vector1, T1, W, N), L); \
|
||||
if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) { \
|
||||
fprintf(stderr, \
|
||||
"ERROR in %s (%s line %d in result '%s') at type %s " \
|
||||
"got 0x%" PRIx##W " != 0x%" PRIx##W "\n", \
|
||||
TEST_MSG, __FILE__, __LINE__, \
|
||||
STR(VECT_VAR(vget_lane_expected, T1, W, N)), \
|
||||
STR(VECT_NAME(T1, W, N)), \
|
||||
VECT_VAR(vget_lane_vector, T1, W, N), \
|
||||
VECT_VAR(vget_lane_expected, T1, W, N)); \
|
||||
abort (); \
|
||||
}
|
||||
|
||||
/* Initialize input values. */
|
||||
DECL_VARIABLE(vget_lane_vector1, poly, 64, 1);
|
||||
DECL_VARIABLE(vget_lane_vector1, poly, 64, 2);
|
||||
|
||||
VLOAD(vget_lane_vector1, buffer, , poly, p, 64, 1);
|
||||
VLOAD(vget_lane_vector1, buffer, q, poly, p, 64, 2);
|
||||
|
||||
VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
|
||||
VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
|
||||
|
||||
TEST_VGET_LANE( , poly, p, 64, 1, 0);
|
||||
TEST_VGET_LANE(q, poly, p, 64, 2, 0);
|
||||
|
||||
|
||||
/* vset_lane_p64 tests. */
|
||||
#undef TEST_MSG
|
||||
#define TEST_MSG "VSET_LANE/VSETQ_LANE"
|
||||
|
||||
#define TEST_VSET_LANE(Q, T1, T2, W, N, V, L) \
|
||||
VECT_VAR(vset_lane_vector, T1, W, N) = \
|
||||
vset##Q##_lane_##T2##W(V, \
|
||||
VECT_VAR(vset_lane_vector, T1, W, N), \
|
||||
L); \
|
||||
vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vset_lane_vector, T1, W, N))
|
||||
|
||||
/* Initialize input values. */
|
||||
DECL_VARIABLE(vset_lane_vector, poly, 64, 1);
|
||||
DECL_VARIABLE(vset_lane_vector, poly, 64, 2);
|
||||
|
||||
CLEAN(result, uint, 64, 1);
|
||||
CLEAN(result, uint, 64, 2);
|
||||
|
||||
VLOAD(vset_lane_vector, buffer, , poly, p, 64, 1);
|
||||
VLOAD(vset_lane_vector, buffer, q, poly, p, 64, 2);
|
||||
|
||||
/* Choose value and lane arbitrarily. */
|
||||
TEST_VSET_LANE(, poly, p, 64, 1, 0x88, 0);
|
||||
TEST_VSET_LANE(q, poly, p, 64, 2, 0x11, 1);
|
||||
|
||||
CHECK(TEST_MSG, poly, 64, 1, PRIx64, vset_lane_expected, "");
|
||||
CHECK(TEST_MSG, poly, 64, 2, PRIx64, vset_lane_expected, "");
|
||||
|
||||
|
||||
/* vtst_p64 tests. */
|
||||
#undef TEST_MSG
|
||||
#define TEST_MSG "VTST"
|
||||
|
||||
#define TEST_VTST1(INSN, Q, T1, T2, W, N) \
|
||||
VECT_VAR(vtst_vector_res, uint, W, N) = \
|
||||
INSN##Q##_##T2##W(VECT_VAR(vtst_vector, T1, W, N), \
|
||||
VECT_VAR(vtst_vector2, T1, W, N)); \
|
||||
vst1##Q##_u##W(VECT_VAR(result, uint, W, N), \
|
||||
VECT_VAR(vtst_vector_res, uint, W, N))
|
||||
|
||||
#define TEST_VTST(INSN, Q, T1, T2, W, N) \
|
||||
TEST_VTST1(INSN, Q, T1, T2, W, N) \
|
||||
|
||||
/* Initialize input values. */
|
||||
DECL_VARIABLE(vtst_vector, poly, 64, 1);
|
||||
DECL_VARIABLE(vtst_vector2, poly, 64, 1);
|
||||
DECL_VARIABLE(vtst_vector_res, uint, 64, 1);
|
||||
|
||||
CLEAN(result, uint, 64, 1);
|
||||
|
||||
VLOAD(vtst_vector, buffer, , poly, p, 64, 1);
|
||||
VDUP(vtst_vector2, , poly, p, 64, 1, 5);
|
||||
|
||||
TEST_VTST(vtst, , poly, p, 64, 1);
|
||||
|
||||
CHECK(TEST_MSG, uint, 64, 1, PRIx64, vtst_expected, "");
|
||||
|
||||
/* vtstq_p64 is supported by aarch64 only. */
|
||||
#ifdef __aarch64__
|
||||
DECL_VARIABLE(vtst_vector, poly, 64, 2);
|
||||
DECL_VARIABLE(vtst_vector2, poly, 64, 2);
|
||||
DECL_VARIABLE(vtst_vector_res, uint, 64, 2);
|
||||
CLEAN(result, uint, 64, 2);
|
||||
VLOAD(vtst_vector, buffer, q, poly, p, 64, 2);
|
||||
VDUP(vtst_vector2, q, poly, p, 64, 2, 5);
|
||||
TEST_VTST(vtst, q, poly, p, 64, 2);
|
||||
CHECK(TEST_MSG, uint, 64, 2, PRIx64, vtst_expected, "");
|
||||
|
||||
/* vmov_n_p64 tests. */
|
||||
#undef TEST_MSG
|
||||
|
@ -767,37 +877,6 @@ int main (void)
|
|||
}
|
||||
}
|
||||
|
||||
/* vget_lane_p64 tests. */
|
||||
#undef TEST_MSG
|
||||
#define TEST_MSG "VGET_LANE/VGETQ_LANE"
|
||||
|
||||
#define TEST_VGET_LANE(Q, T1, T2, W, N, L) \
|
||||
VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
|
||||
if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) { \
|
||||
fprintf(stderr, \
|
||||
"ERROR in %s (%s line %d in result '%s') at type %s " \
|
||||
"got 0x%" PRIx##W " != 0x%" PRIx##W "\n", \
|
||||
TEST_MSG, __FILE__, __LINE__, \
|
||||
STR(VECT_VAR(vget_lane_expected, T1, W, N)), \
|
||||
STR(VECT_NAME(T1, W, N)), \
|
||||
(uint##W##_t)VECT_VAR(vget_lane_vector, T1, W, N), \
|
||||
(uint##W##_t)VECT_VAR(vget_lane_expected, T1, W, N)); \
|
||||
abort (); \
|
||||
}
|
||||
|
||||
/* Initialize input values. */
|
||||
DECL_VARIABLE(vector, poly, 64, 1);
|
||||
DECL_VARIABLE(vector, poly, 64, 2);
|
||||
|
||||
VLOAD(vector, buffer, , poly, p, 64, 1);
|
||||
VLOAD(vector, buffer, q, poly, p, 64, 2);
|
||||
|
||||
VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
|
||||
VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
|
||||
|
||||
TEST_VGET_LANE( , poly, p, 64, 1, 0);
|
||||
TEST_VGET_LANE(q, poly, p, 64, 2, 0);
|
||||
|
||||
/* vldx_lane_p64 tests. */
|
||||
#undef TEST_MSG
|
||||
#define TEST_MSG "VLDX_LANE/VLDXQ_LANE"
|
||||
|
|
Loading…
Reference in New Issue