Committed on behalf of Matthew Wahab
gcc/ 2017-05-16 Matthew Wahab <matthew.wahab@arm.com> * config/arm/arm_neon.h (vadd_f16): Use standard arithmetic operations in fast-math mode. (vaddq_f16): Likewise. (vmul_f16): Likewise. (vmulq_f16): Likewise. (vsub_f16): Likewise. (vsubq_f16): Likewise. * config/arm/neon.md (add<mode>3): New. (sub<mode>3): New. (fma:<VH:mode>3): New. Also remove outdated comment. (mul<mode>3): New. testsuite/ 2017-05-16 Matthew Wahab <matthew.wahab@arm.com> * gcc.target/arm/armv8_2-fp16-arith-1.c: Expand comment. Update expected output of vadd, vsub and vmul instructions. * gcc.target/arm/armv8_2-fp16-arith-2.c: New. * gcc.target/arm/armv8_2-fp16-neon-2.c: New. * gcc.target/arm/armv8_2-fp16-neon-3.c: New. From-SVN: r248090
This commit is contained in:
parent
d8c9bc3627
commit
6da3785703
@ -1,3 +1,17 @@
|
||||
2017-05-16 Matthew Wahab <matthew.wahab@arm.com>
|
||||
|
||||
* config/arm/arm_neon.h (vadd_f16): Use standard arithmetic
|
||||
operations in fast-math mode.
|
||||
(vaddq_f16): Likewise.
|
||||
(vmul_f16): Likewise.
|
||||
(vmulq_f16): Likewise.
|
||||
(vsub_f16): Likewise.
|
||||
(vsubq_f16): Likewise.
|
||||
* config/arm/neon.md (add<mode>3): New.
|
||||
(sub<mode>3): New.
|
||||
(fma:<VH:mode>3): New. Also remove outdated comment.
|
||||
(mul<mode>3): New.
|
||||
|
||||
2017-05-16 Martin Liska <mliska@suse.cz>
|
||||
|
||||
PR ipa/79849.
|
||||
|
@ -17069,14 +17069,22 @@ __extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vadd_f16 (float16x4_t __a, float16x4_t __b)
|
||||
{
|
||||
#ifdef __FAST_MATH__
|
||||
return __a + __b;
|
||||
#else
|
||||
return __builtin_neon_vaddv4hf (__a, __b);
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vaddq_f16 (float16x8_t __a, float16x8_t __b)
|
||||
{
|
||||
#ifdef __FAST_MATH__
|
||||
return __a + __b;
|
||||
#else
|
||||
return __builtin_neon_vaddv8hf (__a, __b);
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint16x4_t
|
||||
@ -17587,7 +17595,11 @@ __extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmul_f16 (float16x4_t __a, float16x4_t __b)
|
||||
{
|
||||
#ifdef __FAST_MATH__
|
||||
return __a * __b;
|
||||
#else
|
||||
return __builtin_neon_vmulfv4hf (__a, __b);
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
@ -17608,7 +17620,11 @@ __extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmulq_f16 (float16x8_t __a, float16x8_t __b)
|
||||
{
|
||||
#ifdef __FAST_MATH__
|
||||
return __a * __b;
|
||||
#else
|
||||
return __builtin_neon_vmulfv8hf (__a, __b);
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
@ -17804,14 +17820,22 @@ __extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vsub_f16 (float16x4_t __a, float16x4_t __b)
|
||||
{
|
||||
#ifdef __FAST_MATH__
|
||||
return __a - __b;
|
||||
#else
|
||||
return __builtin_neon_vsubv4hf (__a, __b);
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vsubq_f16 (float16x8_t __a, float16x8_t __b)
|
||||
{
|
||||
#ifdef __FAST_MATH__
|
||||
return __a - __b;
|
||||
#else
|
||||
return __builtin_neon_vsubv8hf (__a, __b);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* __ARM_FEATURE_VECTOR_FP16_ARITHMETIC. */
|
||||
|
@ -505,6 +505,23 @@
|
||||
(const_string "neon_add<q>")))]
|
||||
)
|
||||
|
||||
;; As with SFmode, full support for HFmode vector arithmetic is only available
|
||||
;; when flag-unsafe-math-optimizations is enabled.
|
||||
|
||||
(define_insn "add<mode>3"
|
||||
[(set
|
||||
(match_operand:VH 0 "s_register_operand" "=w")
|
||||
(plus:VH
|
||||
(match_operand:VH 1 "s_register_operand" "w")
|
||||
(match_operand:VH 2 "s_register_operand" "w")))]
|
||||
"TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
|
||||
"vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
|
||||
[(set (attr "type")
|
||||
(if_then_else (match_test "<Is_float_mode>")
|
||||
(const_string "neon_fp_addsub_s<q>")
|
||||
(const_string "neon_add<q>")))]
|
||||
)
|
||||
|
||||
(define_insn "add<mode>3_fp16"
|
||||
[(set
|
||||
(match_operand:VH 0 "s_register_operand" "=w")
|
||||
@ -557,6 +574,17 @@
|
||||
(const_string "neon_sub<q>")))]
|
||||
)
|
||||
|
||||
(define_insn "sub<mode>3"
|
||||
[(set
|
||||
(match_operand:VH 0 "s_register_operand" "=w")
|
||||
(minus:VH
|
||||
(match_operand:VH 1 "s_register_operand" "w")
|
||||
(match_operand:VH 2 "s_register_operand" "w")))]
|
||||
"TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
|
||||
"vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
|
||||
[(set_attr "type" "neon_sub<q>")]
|
||||
)
|
||||
|
||||
(define_insn "sub<mode>3_fp16"
|
||||
[(set
|
||||
(match_operand:VH 0 "s_register_operand" "=w")
|
||||
@ -664,8 +692,17 @@
|
||||
[(set_attr "type" "neon_fp_mla_s<q>")]
|
||||
)
|
||||
|
||||
;; There is limited support for unsafe-math optimizations using the NEON FP16
|
||||
;; arithmetic instructions, so only the intrinsic is currently supported.
|
||||
(define_insn "fma<VH:mode>4"
|
||||
[(set (match_operand:VH 0 "register_operand" "=w")
|
||||
(fma:VH
|
||||
(match_operand:VH 1 "register_operand" "w")
|
||||
(match_operand:VH 2 "register_operand" "w")
|
||||
(match_operand:VH 3 "register_operand" "0")))]
|
||||
"TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
|
||||
"vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
|
||||
[(set_attr "type" "neon_fp_mla_s<q>")]
|
||||
)
|
||||
|
||||
(define_insn "fma<VH:mode>4_intrinsic"
|
||||
[(set (match_operand:VH 0 "register_operand" "=w")
|
||||
(fma:VH
|
||||
@ -2175,6 +2212,17 @@
|
||||
(const_string "neon_mul_<V_elem_ch><q>")))]
|
||||
)
|
||||
|
||||
(define_insn "mul<mode>3"
|
||||
[(set
|
||||
(match_operand:VH 0 "s_register_operand" "=w")
|
||||
(mult:VH
|
||||
(match_operand:VH 1 "s_register_operand" "w")
|
||||
(match_operand:VH 2 "s_register_operand" "w")))]
|
||||
"TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
|
||||
"vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
|
||||
[(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vmulf<mode>"
|
||||
[(set
|
||||
(match_operand:VH 0 "s_register_operand" "=w")
|
||||
|
@ -1,3 +1,11 @@
|
||||
2016-05-16 Matthew Wahab <matthew.wahab@arm.com>
|
||||
|
||||
* gcc.target/arm/armv8_2-fp16-arith-1.c: Expand comment. Update
|
||||
expected output of vadd, vsub and vmul instructions.
|
||||
* gcc.target/arm/armv8_2-fp16-arith-2.c: New.
|
||||
* gcc.target/arm/armv8_2-fp16-neon-2.c: New.
|
||||
* gcc.target/arm/armv8_2-fp16-neon-3.c: New.
|
||||
|
||||
2017-05-15 Jerry DeLisle <jvdelisle@gcc.gnu.org>
|
||||
|
||||
PR libgfortran/80727
|
||||
|
@ -3,7 +3,8 @@
|
||||
/* { dg-options "-O2 -ffast-math" } */
|
||||
/* { dg-add-options arm_v8_2a_fp16_neon } */
|
||||
|
||||
/* Test instructions generated for half-precision arithmetic. */
|
||||
/* Test instructions generated for half-precision arithmetic with
|
||||
unsafe-math-optimizations enabled. */
|
||||
|
||||
typedef __fp16 float16_t;
|
||||
typedef __simd64_float16_t float16x4_t;
|
||||
@ -90,9 +91,18 @@ TEST_CMP (greaterthanqual, >=, int16x8_t, float16x8_t)
|
||||
/* { dg-final { scan-assembler-times {vneg\.f16\tq[0-9]+, q[0-9]+} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vabs\.f16\ts[0-9]+, s[0-9]+} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */
|
||||
/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */
|
||||
/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */
|
||||
/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vadd\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vadd\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vsub\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vsub\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */
|
||||
/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 26 } } */
|
||||
/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 52 } } */
|
||||
|
109
gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c
Normal file
109
gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c
Normal file
@ -0,0 +1,109 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_v8_2a_fp16_neon_ok } */
|
||||
/* { dg-options "-O2 -fno-fast-math" } */
|
||||
/* { dg-add-options arm_v8_2a_fp16_neon } */
|
||||
|
||||
/* Test instructions generated for half-precision arithmetic without
|
||||
unsafe-math-optimizations. */
|
||||
|
||||
typedef __fp16 float16_t;
|
||||
typedef __simd64_float16_t float16x4_t;
|
||||
typedef __simd128_float16_t float16x8_t;
|
||||
|
||||
typedef short int16x4_t __attribute__ ((vector_size (8)));
|
||||
typedef short int int16x8_t __attribute__ ((vector_size (16)));
|
||||
|
||||
float16_t
|
||||
fp16_abs (float16_t a)
|
||||
{
|
||||
return (a < 0) ? -a : a;
|
||||
}
|
||||
|
||||
#define TEST_UNOP(NAME, OPERATOR, TY) \
|
||||
TY test_##NAME##_##TY (TY a) \
|
||||
{ \
|
||||
return OPERATOR (a); \
|
||||
}
|
||||
|
||||
#define TEST_BINOP(NAME, OPERATOR, TY) \
|
||||
TY test_##NAME##_##TY (TY a, TY b) \
|
||||
{ \
|
||||
return a OPERATOR b; \
|
||||
}
|
||||
|
||||
#define TEST_CMP(NAME, OPERATOR, RTY, TY) \
|
||||
RTY test_##NAME##_##TY (TY a, TY b) \
|
||||
{ \
|
||||
return a OPERATOR b; \
|
||||
}
|
||||
|
||||
/* Scalars. */
|
||||
|
||||
TEST_UNOP (neg, -, float16_t)
|
||||
TEST_UNOP (abs, fp16_abs, float16_t)
|
||||
|
||||
TEST_BINOP (add, +, float16_t)
|
||||
TEST_BINOP (sub, -, float16_t)
|
||||
TEST_BINOP (mult, *, float16_t)
|
||||
TEST_BINOP (div, /, float16_t)
|
||||
|
||||
TEST_CMP (equal, ==, int, float16_t)
|
||||
TEST_CMP (unequal, !=, int, float16_t)
|
||||
TEST_CMP (lessthan, <, int, float16_t)
|
||||
TEST_CMP (greaterthan, >, int, float16_t)
|
||||
TEST_CMP (lessthanequal, <=, int, float16_t)
|
||||
TEST_CMP (greaterthanqual, >=, int, float16_t)
|
||||
|
||||
/* Vectors of size 4. */
|
||||
|
||||
TEST_UNOP (neg, -, float16x4_t)
|
||||
|
||||
TEST_BINOP (add, +, float16x4_t)
|
||||
TEST_BINOP (sub, -, float16x4_t)
|
||||
TEST_BINOP (mult, *, float16x4_t)
|
||||
TEST_BINOP (div, /, float16x4_t)
|
||||
|
||||
TEST_CMP (equal, ==, int16x4_t, float16x4_t)
|
||||
TEST_CMP (unequal, !=, int16x4_t, float16x4_t)
|
||||
TEST_CMP (lessthan, <, int16x4_t, float16x4_t)
|
||||
TEST_CMP (greaterthan, >, int16x4_t, float16x4_t)
|
||||
TEST_CMP (lessthanequal, <=, int16x4_t, float16x4_t)
|
||||
TEST_CMP (greaterthanqual, >=, int16x4_t, float16x4_t)
|
||||
|
||||
/* Vectors of size 8. */
|
||||
|
||||
TEST_UNOP (neg, -, float16x8_t)
|
||||
|
||||
TEST_BINOP (add, +, float16x8_t)
|
||||
TEST_BINOP (sub, -, float16x8_t)
|
||||
TEST_BINOP (mult, *, float16x8_t)
|
||||
TEST_BINOP (div, /, float16x8_t)
|
||||
|
||||
TEST_CMP (equal, ==, int16x8_t, float16x8_t)
|
||||
TEST_CMP (unequal, !=, int16x8_t, float16x8_t)
|
||||
TEST_CMP (lessthan, <, int16x8_t, float16x8_t)
|
||||
TEST_CMP (greaterthan, >, int16x8_t, float16x8_t)
|
||||
TEST_CMP (lessthanequal, <=, int16x8_t, float16x8_t)
|
||||
TEST_CMP (greaterthanqual, >=, int16x8_t, float16x8_t)
|
||||
|
||||
/* { dg-final { scan-assembler-times {vneg\.f16\ts[0-9]+, s[0-9]+} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vneg\.f16\td[0-9]+, d[0-9]+} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vneg\.f16\tq[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */
|
||||
/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */
|
||||
/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */
|
||||
/* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */
|
||||
/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 26 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 52 } } */
|
||||
/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, #0} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-not {vabs\.f16} } } */
|
||||
|
||||
/* { dg-final { scan-assembler-not {vadd\.f32} } } */
|
||||
/* { dg-final { scan-assembler-not {vsub\.f32} } } */
|
||||
/* { dg-final { scan-assembler-not {vmul\.f32} } } */
|
||||
/* { dg-final { scan-assembler-not {vdiv\.f32} } } */
|
||||
/* { dg-final { scan-assembler-not {vcmp\.f16} } } */
|
||||
/* { dg-final { scan-assembler-not {vcmpe\.f16} } } */
|
491
gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-2.c
Normal file
491
gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-2.c
Normal file
@ -0,0 +1,491 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_v8_2a_fp16_neon_ok } */
|
||||
/* { dg-options "-O2 -ffast-math" } */
|
||||
/* { dg-add-options arm_v8_2a_fp16_neon } */
|
||||
|
||||
/* Test instructions generated for the FP16 vector intrinsics with
|
||||
-ffast-math */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#define MSTRCAT(L, str) L##str
|
||||
|
||||
#define UNOP_TEST(insn) \
|
||||
float16x4_t \
|
||||
MSTRCAT (test_##insn, _16x4) (float16x4_t a) \
|
||||
{ \
|
||||
return MSTRCAT (insn, _f16) (a); \
|
||||
} \
|
||||
float16x8_t \
|
||||
MSTRCAT (test_##insn, _16x8) (float16x8_t a) \
|
||||
{ \
|
||||
return MSTRCAT (insn, q_f16) (a); \
|
||||
}
|
||||
|
||||
#define BINOP_TEST(insn) \
|
||||
float16x4_t \
|
||||
MSTRCAT (test_##insn, _16x4) (float16x4_t a, float16x4_t b) \
|
||||
{ \
|
||||
return MSTRCAT (insn, _f16) (a, b); \
|
||||
} \
|
||||
float16x8_t \
|
||||
MSTRCAT (test_##insn, _16x8) (float16x8_t a, float16x8_t b) \
|
||||
{ \
|
||||
return MSTRCAT (insn, q_f16) (a, b); \
|
||||
}
|
||||
|
||||
#define BINOP_LANE_TEST(insn, I) \
|
||||
float16x4_t \
|
||||
MSTRCAT (test_##insn##_lane, _16x4) (float16x4_t a, float16x4_t b) \
|
||||
{ \
|
||||
return MSTRCAT (insn, _lane_f16) (a, b, I); \
|
||||
} \
|
||||
float16x8_t \
|
||||
MSTRCAT (test_##insn##_lane, _16x8) (float16x8_t a, float16x4_t b) \
|
||||
{ \
|
||||
return MSTRCAT (insn, q_lane_f16) (a, b, I); \
|
||||
}
|
||||
|
||||
#define BINOP_LANEQ_TEST(insn, I) \
|
||||
float16x4_t \
|
||||
MSTRCAT (test_##insn##_laneq, _16x4) (float16x4_t a, float16x8_t b) \
|
||||
{ \
|
||||
return MSTRCAT (insn, _laneq_f16) (a, b, I); \
|
||||
} \
|
||||
float16x8_t \
|
||||
MSTRCAT (test_##insn##_laneq, _16x8) (float16x8_t a, float16x8_t b) \
|
||||
{ \
|
||||
return MSTRCAT (insn, q_laneq_f16) (a, b, I); \
|
||||
} \
|
||||
|
||||
#define BINOP_N_TEST(insn) \
|
||||
float16x4_t \
|
||||
MSTRCAT (test_##insn##_n, _16x4) (float16x4_t a, float16_t b) \
|
||||
{ \
|
||||
return MSTRCAT (insn, _n_f16) (a, b); \
|
||||
} \
|
||||
float16x8_t \
|
||||
MSTRCAT (test_##insn##_n, _16x8) (float16x8_t a, float16_t b) \
|
||||
{ \
|
||||
return MSTRCAT (insn, q_n_f16) (a, b); \
|
||||
}
|
||||
|
||||
#define TERNOP_TEST(insn) \
|
||||
float16_t \
|
||||
MSTRCAT (test_##insn, _16) (float16_t a, float16_t b, float16_t c) \
|
||||
{ \
|
||||
return MSTRCAT (insn, h_f16) (a, b, c); \
|
||||
} \
|
||||
float16x4_t \
|
||||
MSTRCAT (test_##insn, _16x4) (float16x4_t a, float16x4_t b, \
|
||||
float16x4_t c) \
|
||||
{ \
|
||||
return MSTRCAT (insn, _f16) (a, b, c); \
|
||||
} \
|
||||
float16x8_t \
|
||||
MSTRCAT (test_##insn, _16x8) (float16x8_t a, float16x8_t b, \
|
||||
float16x8_t c) \
|
||||
{ \
|
||||
return MSTRCAT (insn, q_f16) (a, b, c); \
|
||||
}
|
||||
|
||||
#define VCMP1_TEST(insn) \
|
||||
uint16x4_t \
|
||||
MSTRCAT (test_##insn, _16x4) (float16x4_t a) \
|
||||
{ \
|
||||
return MSTRCAT (insn, _f16) (a); \
|
||||
} \
|
||||
uint16x8_t \
|
||||
MSTRCAT (test_##insn, _16x8) (float16x8_t a) \
|
||||
{ \
|
||||
return MSTRCAT (insn, q_f16) (a); \
|
||||
}
|
||||
|
||||
#define VCMP2_TEST(insn) \
|
||||
uint16x4_t \
|
||||
MSTRCAT (test_##insn, _16x4) (float16x4_t a, float16x4_t b) \
|
||||
{ \
|
||||
return MSTRCAT (insn, _f16) (a, b); \
|
||||
} \
|
||||
uint16x8_t \
|
||||
MSTRCAT (test_##insn, _16x8) (float16x8_t a, float16x8_t b) \
|
||||
{ \
|
||||
return MSTRCAT (insn, q_f16) (a, b); \
|
||||
}
|
||||
|
||||
#define VCVT_TEST(insn, TY, TO, FR) \
|
||||
MSTRCAT (TO, 16x4_t) \
|
||||
MSTRCAT (test_##insn, TY) (MSTRCAT (FR, 16x4_t) a) \
|
||||
{ \
|
||||
return MSTRCAT (insn, TY) (a); \
|
||||
} \
|
||||
MSTRCAT (TO, 16x8_t) \
|
||||
MSTRCAT (test_##insn##_q, TY) (MSTRCAT (FR, 16x8_t) a) \
|
||||
{ \
|
||||
return MSTRCAT (insn, q##TY) (a); \
|
||||
}
|
||||
|
||||
#define VCVT_N_TEST(insn, TY, TO, FR) \
|
||||
MSTRCAT (TO, 16x4_t) \
|
||||
MSTRCAT (test_##insn##_n, TY) (MSTRCAT (FR, 16x4_t) a) \
|
||||
{ \
|
||||
return MSTRCAT (insn, _n##TY) (a, 1); \
|
||||
} \
|
||||
MSTRCAT (TO, 16x8_t) \
|
||||
MSTRCAT (test_##insn##_n_q, TY) (MSTRCAT (FR, 16x8_t) a) \
|
||||
{ \
|
||||
return MSTRCAT (insn, q_n##TY) (a, 1); \
|
||||
}
|
||||
|
||||
VCMP1_TEST (vceqz)
|
||||
/* { dg-final { scan-assembler-times {vceq\.f16\td[0-9]+, d[0-0]+, #0} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vceq\.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */
|
||||
|
||||
VCMP1_TEST (vcgtz)
|
||||
/* { dg-final { scan-assembler-times {vcgt\.f16\td[0-9]+, d[0-9]+, #0} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vceq\.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */
|
||||
|
||||
VCMP1_TEST (vcgez)
|
||||
/* { dg-final { scan-assembler-times {vcge\.f16\td[0-9]+, d[0-9]+, #0} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vcge\.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */
|
||||
|
||||
VCMP1_TEST (vcltz)
|
||||
/* { dg-final { scan-assembler-times {vclt.f16\td[0-9]+, d[0-9]+, #0} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vclt.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */
|
||||
|
||||
VCMP1_TEST (vclez)
|
||||
/* { dg-final { scan-assembler-times {vcle\.f16\td[0-9]+, d[0-9]+, #0} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vcle\.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */
|
||||
|
||||
VCVT_TEST (vcvt, _f16_s16, float, int)
|
||||
VCVT_N_TEST (vcvt, _f16_s16, float, int)
|
||||
/* { dg-final { scan-assembler-times {vcvt\.f16\.s16\td[0-9]+, d[0-9]+} 2 } }
|
||||
{ dg-final { scan-assembler-times {vcvt\.f16\.s16\tq[0-9]+, q[0-9]+} 2 } }
|
||||
{ dg-final { scan-assembler-times {vcvt\.f16\.s16\td[0-9]+, d[0-9]+, #1} 1 } }
|
||||
{ dg-final { scan-assembler-times {vcvt\.f16\.s16\tq[0-9]+, q[0-9]+, #1} 1 } } */
|
||||
|
||||
VCVT_TEST (vcvt, _f16_u16, float, uint)
|
||||
VCVT_N_TEST (vcvt, _f16_u16, float, uint)
|
||||
/* { dg-final { scan-assembler-times {vcvt\.f16\.u16\td[0-9]+, d[0-9]+} 2 } }
|
||||
{ dg-final { scan-assembler-times {vcvt\.f16\.u16\tq[0-9]+, q[0-9]+} 2 } }
|
||||
{ dg-final { scan-assembler-times {vcvt\.f16\.u16\td[0-9]+, d[0-9]+, #1} 1 } }
|
||||
{ dg-final { scan-assembler-times {vcvt\.f16\.u16\tq[0-9]+, q[0-9]+, #1} 1 } } */
|
||||
|
||||
VCVT_TEST (vcvt, _s16_f16, int, float)
|
||||
VCVT_N_TEST (vcvt, _s16_f16, int, float)
|
||||
/* { dg-final { scan-assembler-times {vcvt\.s16\.f16\td[0-9]+, d[0-9]+} 2 } }
|
||||
{ dg-final { scan-assembler-times {vcvt\.s16\.f16\tq[0-9]+, q[0-9]+} 2 } }
|
||||
{ dg-final { scan-assembler-times {vcvt\.s16\.f16\td[0-9]+, d[0-9]+, #1} 1 } }
|
||||
{ dg-final { scan-assembler-times {vcvt\.s16\.f16\tq[0-9]+, q[0-9]+, #1} 1 } } */
|
||||
|
||||
VCVT_TEST (vcvt, _u16_f16, uint, float)
|
||||
VCVT_N_TEST (vcvt, _u16_f16, uint, float)
|
||||
/* { dg-final { scan-assembler-times {vcvt\.u16\.f16\td[0-9]+, d[0-9]+} 2 } }
|
||||
{ dg-final { scan-assembler-times {vcvt\.u16\.f16\tq[0-9]+, q[0-9]+} 2 } }
|
||||
{ dg-final { scan-assembler-times {vcvt\.u16\.f16\td[0-9]+, d[0-9]+, #1} 1 } }
|
||||
{ dg-final { scan-assembler-times {vcvt\.u16\.f16\tq[0-9]+, q[0-9]+, #1} 1 } } */
|
||||
|
||||
VCVT_TEST (vcvta, _s16_f16, int, float)
|
||||
/* { dg-final { scan-assembler-times {vcvta\.s16\.f16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vcvta\.s16\.f16\tq[0-9]+, q[0-9]+} 1 } }
|
||||
*/
|
||||
|
||||
VCVT_TEST (vcvta, _u16_f16, uint, float)
|
||||
/* { dg-final { scan-assembler-times {vcvta\.u16\.f16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vcvta\.u16\.f16\tq[0-9]+, q[0-9]+} 1 } }
|
||||
*/
|
||||
|
||||
VCVT_TEST (vcvtm, _s16_f16, int, float)
|
||||
/* { dg-final { scan-assembler-times {vcvtm\.s16\.f16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vcvtm\.s16\.f16\tq[0-9]+, q[0-9]+} 1 } }
|
||||
*/
|
||||
|
||||
VCVT_TEST (vcvtm, _u16_f16, uint, float)
|
||||
/* { dg-final { scan-assembler-times {vcvtm\.u16\.f16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vcvtm\.u16\.f16\tq[0-9]+, q[0-9]+} 1 } }
|
||||
*/
|
||||
|
||||
VCVT_TEST (vcvtn, _s16_f16, int, float)
|
||||
/* { dg-final { scan-assembler-times {vcvtn\.s16\.f16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vcvtn\.s16\.f16\tq[0-9]+, q[0-9]+} 1 } }
|
||||
*/
|
||||
|
||||
VCVT_TEST (vcvtn, _u16_f16, uint, float)
|
||||
/* { dg-final { scan-assembler-times {vcvtn\.u16\.f16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vcvtn\.u16\.f16\tq[0-9]+, q[0-9]+} 1 } }
|
||||
*/
|
||||
|
||||
VCVT_TEST (vcvtp, _s16_f16, int, float)
|
||||
/* { dg-final { scan-assembler-times {vcvtp\.s16\.f16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vcvtp\.s16\.f16\tq[0-9]+, q[0-9]+} 1 } }
|
||||
*/
|
||||
|
||||
VCVT_TEST (vcvtp, _u16_f16, uint, float)
|
||||
/* { dg-final { scan-assembler-times {vcvtp\.u16\.f16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vcvtp\.u16\.f16\tq[0-9]+, q[0-9]+} 1 } }
|
||||
*/
|
||||
|
||||
UNOP_TEST (vabs)
|
||||
/* { dg-final { scan-assembler-times {vabs\.f16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vabs\.f16\tq[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
UNOP_TEST (vneg)
|
||||
/* { dg-final { scan-assembler-times {vneg\.f16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vneg\.f16\tq[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
UNOP_TEST (vrecpe)
|
||||
/* { dg-final { scan-assembler-times {vrecpe\.f16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vrecpe\.f16\tq[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
UNOP_TEST (vrnd)
|
||||
/* { dg-final { scan-assembler-times {vrintz\.f16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vrintz\.f16\tq[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
UNOP_TEST (vrnda)
|
||||
/* { dg-final { scan-assembler-times {vrinta\.f16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vrinta\.f16\tq[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
UNOP_TEST (vrndm)
|
||||
/* { dg-final { scan-assembler-times {vrintm\.f16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vrintm\.f16\tq[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
UNOP_TEST (vrndn)
|
||||
/* { dg-final { scan-assembler-times {vrintn\.f16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vrintn\.f16\tq[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
UNOP_TEST (vrndp)
|
||||
/* { dg-final { scan-assembler-times {vrintp\.f16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vrintp\.f16\tq[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
UNOP_TEST (vrndx)
|
||||
/* { dg-final { scan-assembler-times {vrintx\.f16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vrintx\.f16\tq[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
UNOP_TEST (vrsqrte)
|
||||
/* { dg-final { scan-assembler-times {vrsqrte\.f16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vrsqrte\.f16\tq[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
BINOP_TEST (vadd)
|
||||
/* { dg-final { scan-assembler-times {vadd\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vadd\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
BINOP_TEST (vabd)
|
||||
/* { dg-final { scan-assembler-times {vabd\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vabd\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
VCMP2_TEST (vcage)
|
||||
/* { dg-final { scan-assembler-times {vacge\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vacge\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
VCMP2_TEST (vcagt)
|
||||
/* { dg-final { scan-assembler-times {vacgt\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vacgt\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
VCMP2_TEST (vcale)
|
||||
/* { dg-final { scan-assembler-times {vacle\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vacle\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
VCMP2_TEST (vcalt)
|
||||
/* { dg-final { scan-assembler-times {vaclt\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vaclt\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
VCMP2_TEST (vceq)
|
||||
/* { dg-final { scan-assembler-times {vceq\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vceq\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
VCMP2_TEST (vcge)
|
||||
/* { dg-final { scan-assembler-times {vcge\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vcge\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
VCMP2_TEST (vcgt)
|
||||
/* { dg-final { scan-assembler-times {vcgt\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vcgt\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
VCMP2_TEST (vcle)
|
||||
/* { dg-final { scan-assembler-times {vcle\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vcle\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
VCMP2_TEST (vclt)
|
||||
/* { dg-final { scan-assembler-times {vclt\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vclt\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
BINOP_TEST (vmax)
|
||||
/* { dg-final { scan-assembler-times {vmax\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vmax\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
BINOP_TEST (vmin)
|
||||
/* { dg-final { scan-assembler-times {vmin\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vmin\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
BINOP_TEST (vmaxnm)
|
||||
/* { dg-final { scan-assembler-times {vmaxnm\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vmaxnm\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
BINOP_TEST (vminnm)
|
||||
/* { dg-final { scan-assembler-times {vminnm\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vminnm\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
BINOP_TEST (vmul)
|
||||
/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 3 } }
|
||||
{ dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
BINOP_LANE_TEST (vmul, 2)
|
||||
/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+\[2\]} 1 } }
|
||||
{ dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[2\]} 1 } } */
|
||||
BINOP_N_TEST (vmul)
|
||||
/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+\[0\]} 1 } }
|
||||
{ dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[0\]} 1 } }*/
|
||||
|
||||
float16x4_t
|
||||
test_vpadd_16x4 (float16x4_t a, float16x4_t b)
|
||||
{
|
||||
return vpadd_f16 (a, b);
|
||||
}
|
||||
/* { dg-final { scan-assembler-times {vpadd\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */
|
||||
|
||||
float16x4_t
|
||||
test_vpmax_16x4 (float16x4_t a, float16x4_t b)
|
||||
{
|
||||
return vpmax_f16 (a, b);
|
||||
}
|
||||
/* { dg-final { scan-assembler-times {vpmax\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */
|
||||
|
||||
float16x4_t
|
||||
test_vpmin_16x4 (float16x4_t a, float16x4_t b)
|
||||
{
|
||||
return vpmin_f16 (a, b);
|
||||
}
|
||||
/* { dg-final { scan-assembler-times {vpmin\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */
|
||||
|
||||
BINOP_TEST (vsub)
|
||||
/* { dg-final { scan-assembler-times {vsub\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vsub\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
BINOP_TEST (vrecps)
|
||||
/* { dg-final { scan-assembler-times {vrecps\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vrecps\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
BINOP_TEST (vrsqrts)
|
||||
/* { dg-final { scan-assembler-times {vrsqrts\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vrsqrts\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
TERNOP_TEST (vfma)
|
||||
/* { dg-final { scan-assembler-times {vfma\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vfma\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
TERNOP_TEST (vfms)
|
||||
/* { dg-final { scan-assembler-times {vfms\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vfms\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
float16x4_t
|
||||
test_vmov_n_f16 (float16_t a)
|
||||
{
|
||||
return vmov_n_f16 (a);
|
||||
}
|
||||
|
||||
float16x4_t
|
||||
test_vdup_n_f16 (float16_t a)
|
||||
{
|
||||
return vdup_n_f16 (a);
|
||||
}
|
||||
/* { dg-final { scan-assembler-times {vdup\.16\td[0-9]+, r[0-9]+} 2 } } */
|
||||
|
||||
float16x8_t
|
||||
test_vmovq_n_f16 (float16_t a)
|
||||
{
|
||||
return vmovq_n_f16 (a);
|
||||
}
|
||||
|
||||
float16x8_t
|
||||
test_vdupq_n_f16 (float16_t a)
|
||||
{
|
||||
return vdupq_n_f16 (a);
|
||||
}
|
||||
/* { dg-final { scan-assembler-times {vdup\.16\tq[0-9]+, r[0-9]+} 2 } } */
|
||||
|
||||
float16x4_t
|
||||
test_vdup_lane_f16 (float16x4_t a)
|
||||
{
|
||||
return vdup_lane_f16 (a, 1);
|
||||
}
|
||||
/* { dg-final { scan-assembler-times {vdup\.16\td[0-9]+, d[0-9]+\[1\]} 1 } } */
|
||||
|
||||
float16x8_t
|
||||
test_vdupq_lane_f16 (float16x4_t a)
|
||||
{
|
||||
return vdupq_lane_f16 (a, 1);
|
||||
}
|
||||
/* { dg-final { scan-assembler-times {vdup\.16\tq[0-9]+, d[0-9]+\[1\]} 1 } } */
|
||||
|
||||
float16x4_t
|
||||
test_vext_f16 (float16x4_t a, float16x4_t b)
|
||||
{
|
||||
return vext_f16 (a, b, 1);
|
||||
}
|
||||
/* { dg-final { scan-assembler-times {vext\.16\td[0-9]+, d[0-9]+, d[0-9]+, #1} 1 } } */
|
||||
|
||||
float16x8_t
|
||||
test_vextq_f16 (float16x8_t a, float16x8_t b)
|
||||
{
|
||||
return vextq_f16 (a, b, 1);
|
||||
}
|
||||
/* { dg-final { scan-assembler-times {vext\.16\tq[0-9]+, q[0-9]+, q[0-9]+, #1} 1 } } */
|
||||
|
||||
UNOP_TEST (vrev64)
|
||||
/* { dg-final { scan-assembler-times {vrev64\.16\td[0-9]+, d[0-9]+} 1 } }
|
||||
{ dg-final { scan-assembler-times {vrev64\.16\tq[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
float16x4_t
|
||||
test_vbsl16x4 (uint16x4_t a, float16x4_t b, float16x4_t c)
|
||||
{
|
||||
return vbsl_f16 (a, b, c);
|
||||
}
|
||||
/* { dg-final { scan-assembler-times {vbsl\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */
|
||||
|
||||
float16x8_t
|
||||
test_vbslq16x8 (uint16x8_t a, float16x8_t b, float16x8_t c)
|
||||
{
|
||||
return vbslq_f16 (a, b, c);
|
||||
}
|
||||
/*{ dg-final { scan-assembler-times {vbsl\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
float16x4x2_t
|
||||
test_vzip16x4 (float16x4_t a, float16x4_t b)
|
||||
{
|
||||
return vzip_f16 (a, b);
|
||||
}
|
||||
/* { dg-final { scan-assembler-times {vzip\.16\td[0-9]+, d[0-9]+} 1 } } */
|
||||
|
||||
float16x8x2_t
|
||||
test_vzipq16x8 (float16x8_t a, float16x8_t b)
|
||||
{
|
||||
return vzipq_f16 (a, b);
|
||||
}
|
||||
/*{ dg-final { scan-assembler-times {vzip\.16\tq[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
float16x4x2_t
|
||||
test_vuzp16x4 (float16x4_t a, float16x4_t b)
|
||||
{
|
||||
return vuzp_f16 (a, b);
|
||||
}
|
||||
/* { dg-final { scan-assembler-times {vuzp\.16\td[0-9]+, d[0-9]+} 1 } } */
|
||||
|
||||
float16x8x2_t
|
||||
test_vuzpq16x8 (float16x8_t a, float16x8_t b)
|
||||
{
|
||||
return vuzpq_f16 (a, b);
|
||||
}
|
||||
/*{ dg-final { scan-assembler-times {vuzp\.16\tq[0-9]+, q[0-9]+} 1 } } */
|
||||
|
||||
float16x4x2_t
|
||||
test_vtrn16x4 (float16x4_t a, float16x4_t b)
|
||||
{
|
||||
return vtrn_f16 (a, b);
|
||||
}
|
||||
/* { dg-final { scan-assembler-times {vtrn\.16\td[0-9]+, d[0-9]+} 1 } } */
|
||||
|
||||
float16x8x2_t
|
||||
test_vtrnq16x8 (float16x8_t a, float16x8_t b)
|
||||
{
|
||||
return vtrnq_f16 (a, b);
|
||||
}
|
||||
/*{ dg-final { scan-assembler-times {vtrn\.16\tq[0-9]+, q[0-9]+} 1 } } */
|
108
gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-3.c
Normal file
108
gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-3.c
Normal file
@ -0,0 +1,108 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_v8_2a_fp16_neon_ok } */
|
||||
/* { dg-options "-O2 -ffast-math" } */
|
||||
/* { dg-add-options arm_v8_2a_fp16_neon } */
|
||||
|
||||
/* Test compiler use of FP16 FMA/FMS instructions with -ffast-math. */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
float16x4_t
|
||||
test_vfma_1 (float16x4_t a, float16x4_t b, float16x4_t c)
|
||||
{
|
||||
return vadd_f16 (vmul_f16 (a, b), c);
|
||||
}
|
||||
|
||||
float16x4_t
|
||||
test_vfma_2 (float16x4_t a, float16x4_t b, float16x4_t c)
|
||||
{
|
||||
return vsub_f16 (vmul_f16 (a, b), vneg_f16 (c));
|
||||
}
|
||||
|
||||
float16x4_t
|
||||
test_vfma_3 (float16x4_t a, float16x4_t b, float16x4_t c)
|
||||
{
|
||||
return vsub_f16 (vmul_f16 (vneg_f16 (a), vneg_f16 (b)), vneg_f16 (c));
|
||||
}
|
||||
|
||||
float16x4_t
|
||||
test_vfma_4 (float16x4_t a, float16x4_t b, float16x4_t c)
|
||||
{
|
||||
return vsub_f16 (vmul_f16 (a, b), vneg_f16 (c));
|
||||
}
|
||||
/* { dg-final { scan-assembler-times {vfma\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 4 } } */
|
||||
|
||||
float16x8_t
|
||||
test_vfmaq_1 (float16x8_t a, float16x8_t b, float16x8_t c)
|
||||
{
|
||||
return vaddq_f16 (vmulq_f16 (a, b), c);
|
||||
}
|
||||
|
||||
float16x8_t
|
||||
test_vfmaq_2 (float16x8_t a, float16x8_t b, float16x8_t c)
|
||||
{
|
||||
return vsubq_f16 (vmulq_f16 (a, b), vnegq_f16 (c));
|
||||
}
|
||||
|
||||
float16x8_t
|
||||
test_vfmaq_3 (float16x8_t a, float16x8_t b, float16x8_t c)
|
||||
{
|
||||
return vsubq_f16 (vmulq_f16 (vnegq_f16 (a), vnegq_f16 (b)), vnegq_f16 (c));
|
||||
}
|
||||
|
||||
float16x8_t
|
||||
test_vfmaq_4 (float16x8_t a, float16x8_t b, float16x8_t c)
|
||||
{
|
||||
return vsubq_f16 (vmulq_f16 (a, b), vnegq_f16 (c));
|
||||
}
|
||||
/* { dg-final { scan-assembler-times {vfma\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 4 } } */
|
||||
|
||||
float16x4_t
|
||||
test_vfms_1 (float16x4_t a, float16x4_t b, float16x4_t c)
|
||||
{
|
||||
return vsub_f16 (c, vmul_f16 (a, b));
|
||||
}
|
||||
|
||||
float16x4_t
|
||||
test_vfms_2 (float16x4_t a, float16x4_t b, float16x4_t c)
|
||||
{
|
||||
return vsub_f16 (a, vmul_f16 (b, c));
|
||||
}
|
||||
|
||||
float16x4_t
|
||||
test_vfms_3 (float16x4_t a, float16x4_t b, float16x4_t c)
|
||||
{
|
||||
return vadd_f16 (vmul_f16 (vneg_f16 (a), b), c);
|
||||
}
|
||||
|
||||
float16x4_t
|
||||
test_vfms_4 (float16x4_t a, float16x4_t b, float16x4_t c)
|
||||
{
|
||||
return vadd_f16 (vmul_f16 (a, vneg_f16 (b)), c);
|
||||
}
|
||||
/* { dg-final { scan-assembler-times {vfms\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 4 } } */
|
||||
|
||||
float16x8_t
|
||||
test_vfmsq_1 (float16x8_t a, float16x8_t b, float16x8_t c)
|
||||
{
|
||||
return vsubq_f16 (c, vmulq_f16 (a, b));
|
||||
}
|
||||
|
||||
float16x8_t
|
||||
test_vfmsq_2 (float16x8_t a, float16x8_t b, float16x8_t c)
|
||||
{
|
||||
return vsubq_f16 (a, vmulq_f16 (b, c));
|
||||
}
|
||||
|
||||
float16x8_t
|
||||
test_vfmsq_3 (float16x8_t a, float16x8_t b, float16x8_t c)
|
||||
{
|
||||
return vaddq_f16 (vmulq_f16 (vnegq_f16 (a), b), c);
|
||||
}
|
||||
|
||||
float16x8_t
|
||||
test_vfmsq_4 (float16x8_t a, float16x8_t b, float16x8_t c)
|
||||
{
|
||||
return vaddq_f16 (vmulq_f16 (a, vnegq_f16 (b)), c);
|
||||
}
|
||||
/* { dg-final { scan-assembler-times {vfms\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 4 } } */
|
Loading…
Reference in New Issue
Block a user