[AArch64] Rewrite vabs<q>_s<8,16,32,64> AdvSIMD intrinsics to fold to tree

gcc/
	* config/aarch64/aarch64-builtins.c
	(aarch64_fold_builtin): Fold abs in all modes.
	* config/aarch64/aarch64-simd-builtins.def
	(abs): Enable for all modes.
	* config/aarch64/arm_neon.h
	(vabs<q>_s<8,16,32,64): Rewrite using builtins.
	(vabs_f64): Add missing intrinsic.

gcc/testsuite/
	* gcc.target/aarch64/vabs_intrinsic_1.c: New file.

From-SVN: r201083
This commit is contained in:
James Greenhalgh 2013-07-20 07:56:41 +00:00 committed by James Greenhalgh
parent 7a29a1b527
commit d05d070973
6 changed files with 165 additions and 79 deletions

View File

@ -1,3 +1,13 @@
2013-07-20 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64-builtins.c
(aarch64_fold_builtin): Fold abs in all modes.
* config/aarch64/aarch64-simd-builtins.def
(abs): Enable for all modes.
* config/aarch64/arm_neon.h
(vabs<q>_s<8,16,32,64): Rewrite using builtins.
(vabs_f64): Add missing intrinsic.
2013-07-19 Ian Bolton <ian.bolton@arm.com>
* config/aarch64/arm_neon.h (vabs_s64): New function

View File

@ -1325,7 +1325,7 @@ aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
switch (fcode)
{
BUILTIN_VDQF (UNOP, abs, 2)
BUILTIN_VALLDI (UNOP, abs, 2)
return fold_build1 (ABS_EXPR, type, args[0]);
break;
BUILTIN_VALLDI (BINOP, cmge, 0)

View File

@ -347,7 +347,7 @@
BUILTIN_VDQF (UNOP, frecpe, 0)
BUILTIN_VDQF (BINOP, frecps, 0)
BUILTIN_VDQF (UNOP, abs, 2)
BUILTIN_VALLDI (UNOP, abs, 2)
VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf)
VAR1 (BINOP, float_truncate_hi_, 0, v4sf)

View File

@ -4468,83 +4468,6 @@ vabds_f32 (float32_t a, float32_t b)
return result;
}
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vabs_s8 (int8x8_t a)
{
int8x8_t result;
__asm__ ("abs %0.8b,%1.8b"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vabs_s16 (int16x4_t a)
{
int16x4_t result;
__asm__ ("abs %0.4h,%1.4h"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vabs_s32 (int32x2_t a)
{
int32x2_t result;
__asm__ ("abs %0.2s,%1.2s"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vabsq_s8 (int8x16_t a)
{
int8x16_t result;
__asm__ ("abs %0.16b,%1.16b"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vabsq_s16 (int16x8_t a)
{
int16x8_t result;
__asm__ ("abs %0.8h,%1.8h"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vabsq_s32 (int32x4_t a)
{
int32x4_t result;
__asm__ ("abs %0.4s,%1.4s"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vabsq_s64 (int64x2_t a)
{
int64x2_t result;
__asm__ ("abs %0.2d,%1.2d"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddlv_s8 (int8x8_t a)
{
@ -17395,6 +17318,30 @@ vabs_f32 (float32x2_t __a)
return __builtin_aarch64_absv2sf (__a);
}
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vabs_f64 (float64x1_t __a)
{
return __builtin_fabs (__a);
}
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vabs_s8 (int8x8_t __a)
{
return __builtin_aarch64_absv8qi (__a);
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vabs_s16 (int16x4_t __a)
{
return __builtin_aarch64_absv4hi (__a);
}
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vabs_s32 (int32x2_t __a)
{
return __builtin_aarch64_absv2si (__a);
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vabs_s64 (int64x1_t __a)
{
@ -17413,6 +17360,30 @@ vabsq_f64 (float64x2_t __a)
return __builtin_aarch64_absv2df (__a);
}
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vabsq_s8 (int8x16_t __a)
{
return __builtin_aarch64_absv16qi (__a);
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vabsq_s16 (int16x8_t __a)
{
return __builtin_aarch64_absv8hi (__a);
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vabsq_s32 (int32x4_t __a)
{
return __builtin_aarch64_absv4si (__a);
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vabsq_s64 (int64x2_t __a)
{
return __builtin_aarch64_absv2di (__a);
}
/* vadd */
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))

View File

@ -1,3 +1,7 @@
2013-07-20 James Greenhalgh <james.greenhalgh@arm.com>
* gcc.target/aarch64/vabs_intrinsic_1.c: New file.
2013-07-20 Joern Rennecke <joern.rennecke@embecosm.com>
* gcc.dg/pr57154.c: Add dg-require-effective-target scheduling.

View File

@ -0,0 +1,101 @@
/* { dg-do run } */
/* { dg-options "-O3 --save-temps" } */
#include <arm_neon.h>
extern void abort (void);
#define ETYPE(size) int##size##_t
#define VTYPE(size, lanes) int##size##x##lanes##_t
#define TEST_VABS(q, size, lanes) \
static void \
test_vabs##q##_##size (ETYPE (size) * res, \
const ETYPE (size) *in1) \
{ \
VTYPE (size, lanes) a = vld1##q##_s##size (res); \
VTYPE (size, lanes) b = vld1##q##_s##size (in1); \
a = vabs##q##_s##size (b); \
vst1##q##_s##size (res, a); \
}
#define BUILD_VARS(width, n_lanes, n_half_lanes) \
TEST_VABS (, width, n_half_lanes) \
TEST_VABS (q, width, n_lanes) \
BUILD_VARS (64, 2, 1)
BUILD_VARS (32, 4, 2)
BUILD_VARS (16, 8, 4)
BUILD_VARS (8, 16, 8)
#define POOL1 {-10}
#define POOL2 {2, -10}
#define POOL4 {0, -10, 2, -3}
#define POOL8 {0, -10, 2, -3, 4, -50, 6, -70}
#define POOL16 {0, -10, 2, -3, 4, -50, 6, -70, \
-5, 10, -2, 3, -4, 50, -6, 70}
#define EXPECTED1 {10}
#define EXPECTED2 {2, 10}
#define EXPECTED4 {0, 10, 2, 3}
#define EXPECTED8 {0, 10, 2, 3, 4, 50, 6, 70}
#define EXPECTED16 {0, 10, 2, 3, 4, 50, 6, 70, \
5, 10, 2, 3, 4, 50, 6, 70}
#define BUILD_TEST(size, lanes_64, lanes_128) \
static void \
test_##size (void) \
{ \
int i; \
ETYPE (size) pool1[lanes_64] = POOL##lanes_64; \
ETYPE (size) res1[lanes_64] = {0}; \
ETYPE (size) expected1[lanes_64] = EXPECTED##lanes_64; \
ETYPE (size) pool2[lanes_128] = POOL##lanes_128; \
ETYPE (size) res2[lanes_128] = {0}; \
ETYPE (size) expected2[lanes_128] = EXPECTED##lanes_128; \
\
/* Forcefully avoid optimization. */ \
asm volatile ("" : : : "memory"); \
test_vabs_##size (res1, pool1); \
for (i = 0; i < lanes_64; i++) \
if (res1[i] != expected1[i]) \
abort (); \
\
/* Forcefully avoid optimization. */ \
asm volatile ("" : : : "memory"); \
test_vabsq_##size (res2, pool2); \
for (i = 0; i < lanes_128; i++) \
if (res2[i] != expected2[i]) \
abort (); \
}
/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b" 1 } } */
/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
BUILD_TEST (8 , 8, 16)
/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */
BUILD_TEST (16, 4, 8)
/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 1 } } */
/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */
BUILD_TEST (32, 2, 4)
/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
BUILD_TEST (64, 1, 2)
#undef BUILD_TEST
#define BUILD_TEST(size) test_##size ()
int
main (int argc, char **argv)
{
BUILD_TEST (8);
BUILD_TEST (16);
BUILD_TEST (32);
BUILD_TEST (64);
return 0;
}
/* { dg-final { cleanup-saved-temps } } */