[AArch64] Improve arm_neon.h vml<as>_lane handling.
gcc/ * config/aarch64/aarch64-simd-builtins.def (fma): New. * config/aarch64/aarch64-simd.md (aarch64_mla_elt<mode>): New. (aarch64_mla_elt_<vswap_width_name><mode>): Likewise. (aarch64_mls_elt<mode>): Likewise. (aarch64_mls_elt_<vswap_width_name><mode>): Likewise. (aarch64_fma4_elt<mode>): Likewise. (aarch64_fma4_elt_<vswap_width_name><mode>): Likewise. (aarch64_fma4_elt_to_128v2df): Likewise. (aarch64_fma4_elt_to_64df): Likewise. (fnma<mode>4): Likewise. (aarch64_fnma4_elt<mode>): Likewise. (aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (aarch64_fnma4_elt_to_128v2df): Likewise. (aarch64_fnma4_elt_to_64df): Likewise. * config/aarch64/iterators.md (VDQSF): New. * config/aarch64/arm_neon.h (vfm<as><sdq>_lane<q>_f<32, 64>): Convert to C implementation. (vml<sa><q>_lane<q>_<fsu><16, 32, 64>): Likewise. gcc/testsuite/ * gcc.target/aarch64/fmla-intrinsic.c: New. * gcc.target/aarch64/mla-intrinsic.c: Likewise. * gcc.target/aarch64/fmls-intrinsic.c: Likewise. * gcc.target/aarch64/mls-intrinsic.c: Likewise. From-SVN: r202625
This commit is contained in:
parent
779aea46cc
commit
828e70c1d7
@ -1,3 +1,25 @@
|
||||
2013-09-16 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-simd-builtins.def (fma): New.
|
||||
* config/aarch64/aarch64-simd.md
|
||||
(aarch64_mla_elt<mode>): New.
|
||||
(aarch64_mla_elt_<vswap_width_name><mode>): Likewise.
|
||||
(aarch64_mls_elt<mode>): Likewise.
|
||||
(aarch64_mls_elt_<vswap_width_name><mode>): Likewise.
|
||||
(aarch64_fma4_elt<mode>): Likewise.
|
||||
(aarch64_fma4_elt_<vswap_width_name><mode>): Likewise.
|
||||
(aarch64_fma4_elt_to_128v2df): Likewise.
|
||||
(aarch64_fma4_elt_to_64df): Likewise.
|
||||
(fnma<mode>4): Likewise.
|
||||
(aarch64_fnma4_elt<mode>): Likewise.
|
||||
(aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
|
||||
(aarch64_fnma4_elt_to_128v2df): Likewise.
|
||||
(aarch64_fnma4_elt_to_64df): Likewise.
|
||||
* config/aarch64/iterators.md (VDQSF): New.
|
||||
* config/aarch64/arm_neon.h
|
||||
(vfm<as><sdq>_lane<q>_f<32, 64>): Convert to C implementation.
|
||||
(vml<sa><q>_lane<q>_<fsu><16, 32, 64>): Likewise.
|
||||
|
||||
2013-09-16 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-simd.md (aarch64_mul3_elt<mode>): New.
|
||||
|
@ -359,3 +359,6 @@
|
||||
/* Implemented by aarch64_st1<VALL:mode>. */
|
||||
BUILTIN_VALL (STORE1, st1, 0)
|
||||
|
||||
/* Implemented by fma<mode>4. */
|
||||
BUILTIN_VDQF (TERNOP, fma, 4)
|
||||
|
||||
|
@ -1070,6 +1070,38 @@
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_mla_elt<mode>"
|
||||
[(set (match_operand:VDQHS 0 "register_operand" "=w")
|
||||
(plus:VDQHS
|
||||
(mult:VDQHS
|
||||
(vec_duplicate:VDQHS
|
||||
(vec_select:<VEL>
|
||||
(match_operand:VDQHS 1 "register_operand" "<h_con>")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand")])))
|
||||
(match_operand:VDQHS 3 "register_operand" "w"))
|
||||
(match_operand:VDQHS 4 "register_operand" "0")))]
|
||||
"TARGET_SIMD"
|
||||
"mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
|
||||
[(set_attr "simd_type" "simd_mla")
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
|
||||
[(set (match_operand:VDQHS 0 "register_operand" "=w")
|
||||
(plus:VDQHS
|
||||
(mult:VDQHS
|
||||
(vec_duplicate:VDQHS
|
||||
(vec_select:<VEL>
|
||||
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand")])))
|
||||
(match_operand:VDQHS 3 "register_operand" "w"))
|
||||
(match_operand:VDQHS 4 "register_operand" "0")))]
|
||||
"TARGET_SIMD"
|
||||
"mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
|
||||
[(set_attr "simd_type" "simd_mla")
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_mls<mode>"
|
||||
[(set (match_operand:VQ_S 0 "register_operand" "=w")
|
||||
(minus:VQ_S (match_operand:VQ_S 1 "register_operand" "0")
|
||||
@ -1081,6 +1113,38 @@
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_mls_elt<mode>"
|
||||
[(set (match_operand:VDQHS 0 "register_operand" "=w")
|
||||
(minus:VDQHS
|
||||
(match_operand:VDQHS 4 "register_operand" "0")
|
||||
(mult:VDQHS
|
||||
(vec_duplicate:VDQHS
|
||||
(vec_select:<VEL>
|
||||
(match_operand:VDQHS 1 "register_operand" "<h_con>")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand")])))
|
||||
(match_operand:VDQHS 3 "register_operand" "w"))))]
|
||||
"TARGET_SIMD"
|
||||
"mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
|
||||
[(set_attr "simd_type" "simd_mla")
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
|
||||
[(set (match_operand:VDQHS 0 "register_operand" "=w")
|
||||
(minus:VDQHS
|
||||
(match_operand:VDQHS 4 "register_operand" "0")
|
||||
(mult:VDQHS
|
||||
(vec_duplicate:VDQHS
|
||||
(vec_select:<VEL>
|
||||
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand")])))
|
||||
(match_operand:VDQHS 3 "register_operand" "w"))))]
|
||||
"TARGET_SIMD"
|
||||
"mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
|
||||
[(set_attr "simd_type" "simd_mla")
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
;; Max/Min operations.
|
||||
(define_insn "<su><maxmin><mode>3"
|
||||
[(set (match_operand:VQ_S 0 "register_operand" "=w")
|
||||
@ -1483,6 +1547,137 @@
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_fma4_elt<mode>"
|
||||
[(set (match_operand:VDQF 0 "register_operand" "=w")
|
||||
(fma:VDQF
|
||||
(vec_duplicate:VDQF
|
||||
(vec_select:<VEL>
|
||||
(match_operand:VDQF 1 "register_operand" "<h_con>")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand")])))
|
||||
(match_operand:VDQF 3 "register_operand" "w")
|
||||
(match_operand:VDQF 4 "register_operand" "0")))]
|
||||
"TARGET_SIMD"
|
||||
"fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
|
||||
[(set_attr "simd_type" "simd_fmla_elt")
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
|
||||
[(set (match_operand:VDQSF 0 "register_operand" "=w")
|
||||
(fma:VDQSF
|
||||
(vec_duplicate:VDQSF
|
||||
(vec_select:<VEL>
|
||||
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand")])))
|
||||
(match_operand:VDQSF 3 "register_operand" "w")
|
||||
(match_operand:VDQSF 4 "register_operand" "0")))]
|
||||
"TARGET_SIMD"
|
||||
"fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
|
||||
[(set_attr "simd_type" "simd_fmla_elt")
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_fma4_elt_to_128df"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=w")
|
||||
(fma:V2DF
|
||||
(vec_duplicate:V2DF
|
||||
(match_operand:DF 1 "register_operand" "w"))
|
||||
(match_operand:V2DF 2 "register_operand" "w")
|
||||
(match_operand:V2DF 3 "register_operand" "0")))]
|
||||
"TARGET_SIMD"
|
||||
"fmla\\t%0.2d, %2.2d, %1.2d[0]"
|
||||
[(set_attr "simd_type" "simd_fmla_elt")
|
||||
(set_attr "simd_mode" "V2DF")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_fma4_elt_to_64v2df"
|
||||
[(set (match_operand:DF 0 "register_operand" "=w")
|
||||
(fma:DF
|
||||
(vec_select:DF
|
||||
(match_operand:V2DF 1 "register_operand" "w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand")]))
|
||||
(match_operand:DF 3 "register_operand" "w")
|
||||
(match_operand:DF 4 "register_operand" "0")))]
|
||||
"TARGET_SIMD"
|
||||
"fmla\\t%0.2d, %3.2d, %1.2d[%2]"
|
||||
[(set_attr "simd_type" "simd_fmla_elt")
|
||||
(set_attr "simd_mode" "V2DF")]
|
||||
)
|
||||
|
||||
(define_insn "fnma<mode>4"
|
||||
[(set (match_operand:VDQF 0 "register_operand" "=w")
|
||||
(fma:VDQF
|
||||
(match_operand:VDQF 1 "register_operand" "w")
|
||||
(neg:VDQF
|
||||
(match_operand:VDQF 2 "register_operand" "w"))
|
||||
(match_operand:VDQF 3 "register_operand" "0")))]
|
||||
"TARGET_SIMD"
|
||||
"fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
||||
[(set_attr "simd_type" "simd_fmla")
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_fnma4_elt<mode>"
|
||||
[(set (match_operand:VDQF 0 "register_operand" "=w")
|
||||
(fma:VDQF
|
||||
(neg:VDQF
|
||||
(match_operand:VDQF 3 "register_operand" "w"))
|
||||
(vec_duplicate:VDQF
|
||||
(vec_select:<VEL>
|
||||
(match_operand:VDQF 1 "register_operand" "<h_con>")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand")])))
|
||||
(match_operand:VDQF 4 "register_operand" "0")))]
|
||||
"TARGET_SIMD"
|
||||
"fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
|
||||
[(set_attr "simd_type" "simd_fmla_elt")
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
|
||||
[(set (match_operand:VDQSF 0 "register_operand" "=w")
|
||||
(fma:VDQSF
|
||||
(neg:VDQSF
|
||||
(match_operand:VDQSF 3 "register_operand" "w"))
|
||||
(vec_duplicate:VDQSF
|
||||
(vec_select:<VEL>
|
||||
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand")])))
|
||||
(match_operand:VDQSF 4 "register_operand" "0")))]
|
||||
"TARGET_SIMD"
|
||||
"fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
|
||||
[(set_attr "simd_type" "simd_fmla_elt")
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_fnma4_elt_to_128df"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=w")
|
||||
(fma:V2DF
|
||||
(neg:V2DF
|
||||
(match_operand:V2DF 2 "register_operand" "w"))
|
||||
(vec_duplicate:V2DF
|
||||
(match_operand:DF 1 "register_operand" "w"))
|
||||
(match_operand:V2DF 3 "register_operand" "0")))]
|
||||
"TARGET_SIMD"
|
||||
"fmls\\t%0.2d, %2.2d, %1.2d[0]"
|
||||
[(set_attr "simd_type" "simd_fmla_elt")
|
||||
(set_attr "simd_mode" "V2DF")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_fnma4_elt_to_64v2df"
|
||||
[(set (match_operand:DF 0 "register_operand" "=w")
|
||||
(fma:DF
|
||||
(vec_select:DF
|
||||
(match_operand:V2DF 1 "register_operand" "w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand")]))
|
||||
(neg:DF
|
||||
(match_operand:DF 3 "register_operand" "w"))
|
||||
(match_operand:DF 4 "register_operand" "0")))]
|
||||
"TARGET_SIMD"
|
||||
"fmls\\t%0.2d, %3.2d, %1.2d[%2]"
|
||||
[(set_attr "simd_type" "simd_fmla_elt")
|
||||
(set_attr "simd_mode" "V2DF")]
|
||||
)
|
||||
|
||||
;; Vector versions of the floating-point frint patterns.
|
||||
;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
|
||||
(define_insn "<frint_pattern><mode>2"
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -89,6 +89,9 @@
|
||||
;; Vector Float modes.
|
||||
(define_mode_iterator VDQF [V2SF V4SF V2DF])
|
||||
|
||||
;; Vector single Float modes.
|
||||
(define_mode_iterator VDQSF [V2SF V4SF])
|
||||
|
||||
;; Modes suitable to use as the return type of a vcond expression.
|
||||
(define_mode_iterator VDQF_COND [V2SF V2SI V4SF V4SI V2DF V2DI])
|
||||
|
||||
|
@ -1,3 +1,10 @@
|
||||
2013-09-16 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
* gcc.target/aarch64/fmla-intrinsic.c: New.
|
||||
* gcc.target/aarch64/mla-intrinsic.c: Likewise.
|
||||
* gcc.target/aarch64/fmls-intrinsic.c: Likewise.
|
||||
* gcc.target/aarch64/mls-intrinsic.c: Likewise.
|
||||
|
||||
2013-09-16 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
* gcc.target/aarch64/mul_intrinsic_1.c: New.
|
||||
|
116
gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
Normal file
116
gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
Normal file
@ -0,0 +1,116 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O3 --save-temps" } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#define DELTA 0.0001
|
||||
|
||||
extern double fabs (double);
|
||||
|
||||
extern void abort (void);
|
||||
|
||||
#define TEST_VMLA(q1, q2, size, in1_lanes, in2_lanes) \
|
||||
static void \
|
||||
test_vfma##q1##_lane##q2##_f##size (float##size##_t * res, \
|
||||
const float##size##_t *in1, \
|
||||
const float##size##_t *in2) \
|
||||
{ \
|
||||
float##size##x##in1_lanes##_t a = vld1##q1##_f##size (res); \
|
||||
float##size##x##in1_lanes##_t b = vld1##q1##_f##size (in1); \
|
||||
float##size##x##in2_lanes##_t c; \
|
||||
if (in2_lanes > 1) \
|
||||
{ \
|
||||
c = vld1##q2##_f##size (in2); \
|
||||
a = vfma##q1##_lane##q2##_f##size (a, b, c, 1); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
c = vld1##q2##_f##size (in2 + 1); \
|
||||
a = vfma##q1##_lane##q2##_f##size (a, b, c, 0); \
|
||||
} \
|
||||
vst1##q1##_f##size (res, a); \
|
||||
}
|
||||
|
||||
#define BUILD_VARS(width, n_lanes, n_half_lanes) \
|
||||
TEST_VMLA ( , , width, n_half_lanes, n_half_lanes) \
|
||||
TEST_VMLA (q, , width, n_lanes, n_half_lanes) \
|
||||
TEST_VMLA ( , q, width, n_half_lanes, n_lanes) \
|
||||
TEST_VMLA (q, q, width, n_lanes, n_lanes) \
|
||||
|
||||
BUILD_VARS (32, 4, 2)
|
||||
BUILD_VARS (64, 2, 1)
|
||||
|
||||
#define POOL2 {0.0, 1.0}
|
||||
#define POOL4 {0.0, 1.0, 2.0, 3.0}
|
||||
#define EMPTY2 {0.0, 0.0}
|
||||
#define EMPTY4 {0.0, 0.0, 0.0, 0.0}
|
||||
|
||||
#define BUILD_TEST(size, lanes) \
|
||||
static void \
|
||||
test_f##size (void) \
|
||||
{ \
|
||||
int i; \
|
||||
float##size##_t pool[lanes] = POOL##lanes; \
|
||||
float##size##_t res[lanes] = EMPTY##lanes; \
|
||||
float##size##_t res2[lanes] = EMPTY##lanes; \
|
||||
float##size##_t res3[lanes] = EMPTY##lanes; \
|
||||
float##size##_t res4[lanes] = EMPTY##lanes; \
|
||||
\
|
||||
/* Forecfully avoid optimization. */ \
|
||||
asm volatile ("" : : : "memory"); \
|
||||
test_vfma_lane_f##size (res, pool, pool); \
|
||||
for (i = 0; i < lanes / 2; i++) \
|
||||
if (fabs (res[i] - pool[i]) > DELTA) \
|
||||
abort (); \
|
||||
\
|
||||
/* Forecfully avoid optimization. */ \
|
||||
asm volatile ("" : : : "memory"); \
|
||||
test_vfmaq_lane_f##size (res2, pool, pool); \
|
||||
for (i = 0; i < lanes; i++) \
|
||||
if (fabs (res2[i] - pool[i]) > DELTA) \
|
||||
abort (); \
|
||||
\
|
||||
/* Forecfully avoid optimization. */ \
|
||||
asm volatile ("" : : : "memory"); \
|
||||
test_vfma_laneq_f##size (res3, pool, pool); \
|
||||
for (i = 0; i < lanes / 2; i++) \
|
||||
if (fabs (res3[i] - pool[i]) > DELTA) \
|
||||
abort (); \
|
||||
\
|
||||
/* Forecfully avoid optimization. */ \
|
||||
asm volatile ("" : : : "memory"); \
|
||||
test_vfmaq_laneq_f##size (res4, pool, pool); \
|
||||
for (i = 0; i < lanes; i++) \
|
||||
if (fabs (res4[i] - pool[i]) > DELTA) \
|
||||
abort (); \
|
||||
}
|
||||
|
||||
BUILD_TEST (32, 4)
|
||||
BUILD_TEST (64, 2)
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
test_f32 ();
|
||||
test_f64 ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* vfma_laneq_f32.
|
||||
vfma_lane_f32. */
|
||||
/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s\\\[\[0-9\]+\\\]" 2 } } */
|
||||
|
||||
/* vfmaq_lane_f32.
|
||||
vfmaq_laneq_f32. */
|
||||
/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s\\\[\[0-9\]+\\\]" 2 } } */
|
||||
|
||||
/* vfma_lane_f64. */
|
||||
/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 1 } } */
|
||||
|
||||
/* vfmaq_lane_f64.
|
||||
vfma_laneq_f64.
|
||||
vfmaq_laneq_f64. */
|
||||
/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d\\\[\[0-9\]+\\\]" 3 } } */
|
||||
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
117
gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c
Normal file
117
gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c
Normal file
@ -0,0 +1,117 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O3 --save-temps" } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#define DELTA 0.0001
|
||||
|
||||
extern double fabs (double);
|
||||
|
||||
extern void abort (void);
|
||||
|
||||
#define TEST_VMLS(q1, q2, size, in1_lanes, in2_lanes) \
|
||||
static void \
|
||||
test_vfms##q1##_lane##q2##_f##size (float##size##_t * res, \
|
||||
const float##size##_t *in1, \
|
||||
const float##size##_t *in2) \
|
||||
{ \
|
||||
float##size##x##in1_lanes##_t a = vld1##q1##_f##size (res); \
|
||||
float##size##x##in1_lanes##_t b = vld1##q1##_f##size (in1); \
|
||||
float##size##x##in2_lanes##_t c; \
|
||||
if (in2_lanes > 1) \
|
||||
{ \
|
||||
c = vld1##q2##_f##size (in2); \
|
||||
a = vfms##q1##_lane##q2##_f##size (a, b, c, 1); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
c = vld1##q2##_f##size (in2 + 1); \
|
||||
a = vfms##q1##_lane##q2##_f##size (a, b, c, 0); \
|
||||
} \
|
||||
vst1##q1##_f##size (res, a); \
|
||||
}
|
||||
|
||||
#define BUILD_VARS(width, n_lanes, n_half_lanes) \
|
||||
TEST_VMLS ( , , width, n_half_lanes, n_half_lanes) \
|
||||
TEST_VMLS (q, , width, n_lanes, n_half_lanes) \
|
||||
TEST_VMLS ( , q, width, n_half_lanes, n_lanes) \
|
||||
TEST_VMLS (q, q, width, n_lanes, n_lanes) \
|
||||
|
||||
BUILD_VARS (32, 4, 2)
|
||||
BUILD_VARS (64, 2, 1)
|
||||
|
||||
#define POOL2 {0.0, 1.0}
|
||||
#define POOL4 {0.0, 1.0, 2.0, 3.0}
|
||||
#define EMPTY2 {0.0, 0.0}
|
||||
#define EMPTY4 {0.0, 0.0, 0.0, 0.0}
|
||||
|
||||
#define BUILD_TEST(size, lanes) \
|
||||
static void \
|
||||
test_f##size (void) \
|
||||
{ \
|
||||
int i; \
|
||||
float##size##_t pool[lanes] = POOL##lanes; \
|
||||
float##size##_t res[lanes] = EMPTY##lanes; \
|
||||
float##size##_t res2[lanes] = EMPTY##lanes; \
|
||||
float##size##_t res3[lanes] = EMPTY##lanes; \
|
||||
float##size##_t res4[lanes] = EMPTY##lanes; \
|
||||
\
|
||||
/* Forecfully avoid optimization. */ \
|
||||
asm volatile ("" : : : "memory"); \
|
||||
test_vfms_lane_f##size (res, pool, pool); \
|
||||
asm volatile ("" : :"Q" (res) : "memory"); \
|
||||
for (i = 0; i < lanes / 2; i++) \
|
||||
if (fabs (res[i] + pool[i]) > DELTA) \
|
||||
abort (); \
|
||||
\
|
||||
/* Forecfully avoid optimization. */ \
|
||||
test_vfmsq_lane_f##size (res2, pool, pool); \
|
||||
asm volatile ("" : :"Q" (res2) : "memory"); \
|
||||
for (i = 0; i < lanes; i++) \
|
||||
if (fabs (res2[i] + pool[i]) > DELTA) \
|
||||
abort (); \
|
||||
\
|
||||
/* Forecfully avoid optimization. */ \
|
||||
test_vfms_laneq_f##size (res3, pool, pool); \
|
||||
asm volatile ("" : :"Q" (res3) : "memory"); \
|
||||
for (i = 0; i < lanes / 2; i++) \
|
||||
if (fabs (res3[i] + pool[i]) > DELTA) \
|
||||
abort (); \
|
||||
\
|
||||
/* Forecfully avoid optimization. */ \
|
||||
test_vfmsq_laneq_f##size (res4, pool, pool); \
|
||||
asm volatile ("" : :"Q" (res4) : "memory"); \
|
||||
for (i = 0; i < lanes; i++) \
|
||||
if (fabs (res4[i] + pool[i]) > DELTA) \
|
||||
abort (); \
|
||||
}
|
||||
|
||||
BUILD_TEST (32, 4)
|
||||
BUILD_TEST (64, 2)
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
test_f32 ();
|
||||
test_f64 ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* vfms_laneq_f32.
|
||||
vfms_lane_f32. */
|
||||
/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s\\\[\[0-9\]+\\\]" 2 } } */
|
||||
|
||||
/* vfmsq_lane_f32.
|
||||
vfmsq_laneq_f32. */
|
||||
/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s\\\[\[0-9\]+\\\]" 2 } } */
|
||||
|
||||
/* vfms_lane_f64. */
|
||||
/* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 1 } } */
|
||||
|
||||
/* vfmsq_lane_f64.
|
||||
vfms_laneq_f64.
|
||||
vfmsq_laneq_f64. */
|
||||
/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d\\\[\[0-9\]+\\\]" 3 } } */
|
||||
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
84
gcc/testsuite/gcc.target/aarch64/mla_intrinsic_1.c
Normal file
84
gcc/testsuite/gcc.target/aarch64/mla_intrinsic_1.c
Normal file
@ -0,0 +1,84 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O3 --save-temps" } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
extern void abort (void);
|
||||
|
||||
#define MAPs(size, xx) int##size##xx##_t
|
||||
#define MAPu(size, xx) uint##size##xx##_t
|
||||
|
||||
|
||||
#define TEST_VMLA(q, su, size, in1_lanes, in2_lanes) \
|
||||
static void \
|
||||
test_vmlaq_lane##q##_##su##size (MAP##su (size, ) * res, \
|
||||
const MAP##su(size, ) *in1, \
|
||||
const MAP##su(size, ) *in2) \
|
||||
{ \
|
||||
MAP##su (size, x##in1_lanes) a = vld1q_##su##size (res); \
|
||||
MAP##su (size, x##in1_lanes) b = vld1q_##su##size (in1); \
|
||||
MAP##su (size, x##in2_lanes) c = vld1##q##_##su##size (in2); \
|
||||
a = vmlaq_lane##q##_##su##size (a, b, c, 1); \
|
||||
vst1q_##su##size (res, a); \
|
||||
}
|
||||
|
||||
#define BUILD_VARS(width, n_lanes, n_half_lanes) \
|
||||
TEST_VMLA (, s, width, n_lanes, n_half_lanes) \
|
||||
TEST_VMLA (q, s, width, n_lanes, n_lanes) \
|
||||
TEST_VMLA (, u, width, n_lanes, n_half_lanes) \
|
||||
TEST_VMLA (q, u, width, n_lanes, n_lanes) \
|
||||
|
||||
BUILD_VARS (32, 4, 2)
|
||||
BUILD_VARS (16, 8, 4)
|
||||
|
||||
#define POOL4 {0, 1, 2, 3}
|
||||
#define POOL8 {0, 1, 2, 3, 4, 5, 6, 7}
|
||||
#define EMPTY4 {0, 0, 0, 0}
|
||||
#define EMPTY8 {0, 0, 0, 0, 0, 0, 0, 0}
|
||||
|
||||
#define BUILD_TEST(su, size, lanes) \
|
||||
static void \
|
||||
test_##su##size (void) \
|
||||
{ \
|
||||
int i; \
|
||||
MAP##su (size,) pool[lanes] = POOL##lanes; \
|
||||
MAP##su (size,) res[lanes] = EMPTY##lanes; \
|
||||
MAP##su (size,) res2[lanes] = EMPTY##lanes; \
|
||||
\
|
||||
/* Forecfully avoid optimization. */ \
|
||||
asm volatile ("" : : : "memory"); \
|
||||
test_vmlaq_lane_##su##size (res, pool, pool); \
|
||||
for (i = 0; i < lanes; i++) \
|
||||
if (res[i] != pool[i]) \
|
||||
abort (); \
|
||||
\
|
||||
/* Forecfully avoid optimization. */ \
|
||||
asm volatile ("" : : : "memory"); \
|
||||
test_vmlaq_laneq_##su##size (res2, pool, pool); \
|
||||
for (i = 0; i < lanes; i++) \
|
||||
if (res2[i] != pool[i]) \
|
||||
abort (); \
|
||||
}
|
||||
|
||||
#undef BUILD_VARS
|
||||
#define BUILD_VARS(size, lanes) \
|
||||
BUILD_TEST (s, size, lanes) \
|
||||
BUILD_TEST (u, size, lanes)
|
||||
|
||||
BUILD_VARS (32, 4)
|
||||
BUILD_VARS (16, 8)
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
test_s32 ();
|
||||
test_u32 ();
|
||||
test_s16 ();
|
||||
test_u16 ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "mla\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s\\\[\[0-9\]+\\\]" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "mla\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h, v\[0-9\]+\.8h\\\[\[0-9\]+\\\]" 4 } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
89
gcc/testsuite/gcc.target/aarch64/mls_intrinsic_1.c
Normal file
89
gcc/testsuite/gcc.target/aarch64/mls_intrinsic_1.c
Normal file
@ -0,0 +1,89 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O3 --save-temps" } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
extern void abort (void);
|
||||
|
||||
#define MAPs(size, xx) int##size##xx##_t
|
||||
#define MAPu(size, xx) uint##size##xx##_t
|
||||
|
||||
|
||||
#define TEST_VMLS(q, su, size, in1_lanes, in2_lanes) \
|
||||
static void \
|
||||
test_vmlsq_lane##q##_##su##size (MAP##su (size, ) * res, \
|
||||
const MAP##su(size, ) *in1, \
|
||||
const MAP##su(size, ) *in2) \
|
||||
{ \
|
||||
MAP##su (size, x##in1_lanes) a = vld1q_##su##size (res); \
|
||||
MAP##su (size, x##in1_lanes) b = vld1q_##su##size (in1); \
|
||||
MAP##su (size, x##in2_lanes) c = vld1##q##_##su##size (in2); \
|
||||
a = vmlsq_lane##q##_##su##size (a, b, c, 1); \
|
||||
vst1q_##su##size (res, a); \
|
||||
}
|
||||
|
||||
#define BUILD_VARS(width, n_lanes, n_half_lanes) \
|
||||
TEST_VMLS (, s, width, n_lanes, n_half_lanes) \
|
||||
TEST_VMLS (q, s, width, n_lanes, n_lanes) \
|
||||
TEST_VMLS (, u, width, n_lanes, n_half_lanes) \
|
||||
TEST_VMLS (q, u, width, n_lanes, n_lanes) \
|
||||
|
||||
BUILD_VARS (32, 4, 2)
|
||||
BUILD_VARS (16, 8, 4)
|
||||
|
||||
#define MAP_OPs +
|
||||
#define MAP_OPu -
|
||||
|
||||
#define POOL4 {0, 1, 2, 3}
|
||||
#define POOL8 {0, 1, 2, 3, 4, 5, 6, 7}
|
||||
#define EMPTY4s {0, 0, 0, 0}
|
||||
#define EMPTY8s {0, 0, 0, 0, 0, 0, 0, 0}
|
||||
#define EMPTY4u {0, 2, 4, 6}
|
||||
#define EMPTY8u {0, 2, 4, 6, 8, 10, 12, 14}
|
||||
|
||||
#define BUILD_TEST(su, size, lanes) \
|
||||
static void \
|
||||
test_##su##size (void) \
|
||||
{ \
|
||||
int i; \
|
||||
MAP##su (size,) pool[lanes] = POOL##lanes; \
|
||||
MAP##su (size,) res[lanes] = EMPTY##lanes##su; \
|
||||
MAP##su (size,) res2[lanes] = EMPTY##lanes##su; \
|
||||
\
|
||||
/* Forecfully avoid optimization. */ \
|
||||
asm volatile ("" : : : "memory"); \
|
||||
test_vmlsq_lane_##su##size (res, pool, pool); \
|
||||
for (i = 0; i < lanes; i++) \
|
||||
if (res[i] MAP_OP##su pool[i] != 0) \
|
||||
abort (); \
|
||||
\
|
||||
/* Forecfully avoid optimization. */ \
|
||||
asm volatile ("" : : : "memory"); \
|
||||
test_vmlsq_laneq_##su##size (res2, pool, pool); \
|
||||
for (i = 0; i < lanes; i++) \
|
||||
if (res2[i] MAP_OP##su pool[i] != 0) \
|
||||
abort (); \
|
||||
}
|
||||
|
||||
#undef BUILD_VARS
|
||||
#define BUILD_VARS(size, lanes) \
|
||||
BUILD_TEST (s, size, lanes) \
|
||||
BUILD_TEST (u, size, lanes)
|
||||
|
||||
BUILD_VARS (32, 4)
|
||||
BUILD_VARS (16, 8)
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
test_s32 ();
|
||||
test_u32 ();
|
||||
test_s16 ();
|
||||
test_u16 ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "mls\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s\\\[\[0-9\]+\\\]" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "mls\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h, v\[0-9\]+\.8h\\\[\[0-9\]+\\\]" 4 } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
Loading…
Reference in New Issue
Block a user