[AArch64] Rewrite vabs<q>_s<8,16,32,64> AdvSIMD intrinsics to fold to tree
gcc/ * config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): Fold abs in all modes. * config/aarch64/aarch64-simd-builtins.def (abs): Enable for all modes. * config/aarch64/arm_neon.h (vabs<q>_s<8,16,32,64): Rewrite using builtins. (vabs_f64): Add missing intrinsic. gcc/testsuite/ * gcc.target/aarch64/vabs_intrinsic_1.c: New file. From-SVN: r201083
This commit is contained in:
parent
7a29a1b527
commit
d05d070973
@ -1,3 +1,13 @@
|
||||
2013-07-20 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-builtins.c
|
||||
(aarch64_fold_builtin): Fold abs in all modes.
|
||||
* config/aarch64/aarch64-simd-builtins.def
|
||||
(abs): Enable for all modes.
|
||||
* config/aarch64/arm_neon.h
|
||||
(vabs<q>_s<8,16,32,64): Rewrite using builtins.
|
||||
(vabs_f64): Add missing intrinsic.
|
||||
|
||||
2013-07-19 Ian Bolton <ian.bolton@arm.com>
|
||||
|
||||
* config/aarch64/arm_neon.h (vabs_s64): New function
|
||||
|
@ -1325,7 +1325,7 @@ aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
|
||||
|
||||
switch (fcode)
|
||||
{
|
||||
BUILTIN_VDQF (UNOP, abs, 2)
|
||||
BUILTIN_VALLDI (UNOP, abs, 2)
|
||||
return fold_build1 (ABS_EXPR, type, args[0]);
|
||||
break;
|
||||
BUILTIN_VALLDI (BINOP, cmge, 0)
|
||||
|
@ -347,7 +347,7 @@
|
||||
BUILTIN_VDQF (UNOP, frecpe, 0)
|
||||
BUILTIN_VDQF (BINOP, frecps, 0)
|
||||
|
||||
BUILTIN_VDQF (UNOP, abs, 2)
|
||||
BUILTIN_VALLDI (UNOP, abs, 2)
|
||||
|
||||
VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf)
|
||||
VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
|
||||
|
@ -4468,83 +4468,6 @@ vabds_f32 (float32_t a, float32_t b)
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
|
||||
vabs_s8 (int8x8_t a)
|
||||
{
|
||||
int8x8_t result;
|
||||
__asm__ ("abs %0.8b,%1.8b"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
|
||||
vabs_s16 (int16x4_t a)
|
||||
{
|
||||
int16x4_t result;
|
||||
__asm__ ("abs %0.4h,%1.4h"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
|
||||
vabs_s32 (int32x2_t a)
|
||||
{
|
||||
int32x2_t result;
|
||||
__asm__ ("abs %0.2s,%1.2s"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
|
||||
vabsq_s8 (int8x16_t a)
|
||||
{
|
||||
int8x16_t result;
|
||||
__asm__ ("abs %0.16b,%1.16b"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
|
||||
vabsq_s16 (int16x8_t a)
|
||||
{
|
||||
int16x8_t result;
|
||||
__asm__ ("abs %0.8h,%1.8h"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
|
||||
vabsq_s32 (int32x4_t a)
|
||||
{
|
||||
int32x4_t result;
|
||||
__asm__ ("abs %0.4s,%1.4s"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
|
||||
vabsq_s64 (int64x2_t a)
|
||||
{
|
||||
int64x2_t result;
|
||||
__asm__ ("abs %0.2d,%1.2d"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
|
||||
vaddlv_s8 (int8x8_t a)
|
||||
{
|
||||
@ -17395,6 +17318,30 @@ vabs_f32 (float32x2_t __a)
|
||||
return __builtin_aarch64_absv2sf (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
|
||||
vabs_f64 (float64x1_t __a)
|
||||
{
|
||||
return __builtin_fabs (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
|
||||
vabs_s8 (int8x8_t __a)
|
||||
{
|
||||
return __builtin_aarch64_absv8qi (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
|
||||
vabs_s16 (int16x4_t __a)
|
||||
{
|
||||
return __builtin_aarch64_absv4hi (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
|
||||
vabs_s32 (int32x2_t __a)
|
||||
{
|
||||
return __builtin_aarch64_absv2si (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
|
||||
vabs_s64 (int64x1_t __a)
|
||||
{
|
||||
@ -17413,6 +17360,30 @@ vabsq_f64 (float64x2_t __a)
|
||||
return __builtin_aarch64_absv2df (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
|
||||
vabsq_s8 (int8x16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_absv16qi (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
|
||||
vabsq_s16 (int16x8_t __a)
|
||||
{
|
||||
return __builtin_aarch64_absv8hi (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
|
||||
vabsq_s32 (int32x4_t __a)
|
||||
{
|
||||
return __builtin_aarch64_absv4si (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
|
||||
vabsq_s64 (int64x2_t __a)
|
||||
{
|
||||
return __builtin_aarch64_absv2di (__a);
|
||||
}
|
||||
|
||||
/* vadd */
|
||||
|
||||
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
|
||||
|
@ -1,3 +1,7 @@
|
||||
2013-07-20 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
* gcc.target/aarch64/vabs_intrinsic_1.c: New file.
|
||||
|
||||
2013-07-20 Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
|
||||
* gcc.dg/pr57154.c: Add dg-require-effective-target scheduling.
|
||||
|
101
gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c
Normal file
101
gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c
Normal file
@ -0,0 +1,101 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O3 --save-temps" } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
extern void abort (void);
|
||||
|
||||
#define ETYPE(size) int##size##_t
|
||||
#define VTYPE(size, lanes) int##size##x##lanes##_t
|
||||
|
||||
#define TEST_VABS(q, size, lanes) \
|
||||
static void \
|
||||
test_vabs##q##_##size (ETYPE (size) * res, \
|
||||
const ETYPE (size) *in1) \
|
||||
{ \
|
||||
VTYPE (size, lanes) a = vld1##q##_s##size (res); \
|
||||
VTYPE (size, lanes) b = vld1##q##_s##size (in1); \
|
||||
a = vabs##q##_s##size (b); \
|
||||
vst1##q##_s##size (res, a); \
|
||||
}
|
||||
|
||||
#define BUILD_VARS(width, n_lanes, n_half_lanes) \
|
||||
TEST_VABS (, width, n_half_lanes) \
|
||||
TEST_VABS (q, width, n_lanes) \
|
||||
|
||||
BUILD_VARS (64, 2, 1)
|
||||
BUILD_VARS (32, 4, 2)
|
||||
BUILD_VARS (16, 8, 4)
|
||||
BUILD_VARS (8, 16, 8)
|
||||
|
||||
#define POOL1 {-10}
|
||||
#define POOL2 {2, -10}
|
||||
#define POOL4 {0, -10, 2, -3}
|
||||
#define POOL8 {0, -10, 2, -3, 4, -50, 6, -70}
|
||||
#define POOL16 {0, -10, 2, -3, 4, -50, 6, -70, \
|
||||
-5, 10, -2, 3, -4, 50, -6, 70}
|
||||
|
||||
#define EXPECTED1 {10}
|
||||
#define EXPECTED2 {2, 10}
|
||||
#define EXPECTED4 {0, 10, 2, 3}
|
||||
#define EXPECTED8 {0, 10, 2, 3, 4, 50, 6, 70}
|
||||
#define EXPECTED16 {0, 10, 2, 3, 4, 50, 6, 70, \
|
||||
5, 10, 2, 3, 4, 50, 6, 70}
|
||||
|
||||
#define BUILD_TEST(size, lanes_64, lanes_128) \
|
||||
static void \
|
||||
test_##size (void) \
|
||||
{ \
|
||||
int i; \
|
||||
ETYPE (size) pool1[lanes_64] = POOL##lanes_64; \
|
||||
ETYPE (size) res1[lanes_64] = {0}; \
|
||||
ETYPE (size) expected1[lanes_64] = EXPECTED##lanes_64; \
|
||||
ETYPE (size) pool2[lanes_128] = POOL##lanes_128; \
|
||||
ETYPE (size) res2[lanes_128] = {0}; \
|
||||
ETYPE (size) expected2[lanes_128] = EXPECTED##lanes_128; \
|
||||
\
|
||||
/* Forcefully avoid optimization. */ \
|
||||
asm volatile ("" : : : "memory"); \
|
||||
test_vabs_##size (res1, pool1); \
|
||||
for (i = 0; i < lanes_64; i++) \
|
||||
if (res1[i] != expected1[i]) \
|
||||
abort (); \
|
||||
\
|
||||
/* Forcefully avoid optimization. */ \
|
||||
asm volatile ("" : : : "memory"); \
|
||||
test_vabsq_##size (res2, pool2); \
|
||||
for (i = 0; i < lanes_128; i++) \
|
||||
if (res2[i] != expected2[i]) \
|
||||
abort (); \
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
|
||||
BUILD_TEST (8 , 8, 16)
|
||||
|
||||
/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */
|
||||
BUILD_TEST (16, 4, 8)
|
||||
|
||||
/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */
|
||||
BUILD_TEST (32, 2, 4)
|
||||
|
||||
/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
|
||||
BUILD_TEST (64, 1, 2)
|
||||
|
||||
#undef BUILD_TEST
|
||||
|
||||
#define BUILD_TEST(size) test_##size ()
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
BUILD_TEST (8);
|
||||
BUILD_TEST (16);
|
||||
BUILD_TEST (32);
|
||||
BUILD_TEST (64);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
Loading…
Reference in New Issue
Block a user