[PATCH AArch64] Add more AArch64 NEON intrinsics
Add vmaxnm_f64, vminnm_f64, vmax_f64, vmin_f64. Committed on behalf of Tamar Christina <tamar.christina@arm.com> . gcc/ * config/aarch64/aarch64-simd-builtins.def (__builtin_aarch64_fmindf): Change BUILTIN_VDQF to BUILTIN_VDQF_DF. (__builtin_aarch64_fmaxdf): Likewise. (__builtin_aarch64_smin_nandf): Likewise. (__builtin_aarch64_smax_nandf): Likewise. * config/aarch64/aarch64-simd.md (<fmaxmin><mode>3): Remove. * config/aarch64/aarch64.md (<fmaxmin><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/arm_neon.h (vmaxnm_f64): New. (vminnm_f64): Likewise. (vmin_f64): Likewise. (vmax_f64): Likewise. * config/aarch64/iterators.md (FMAXMIN): Merge with... (FMAXMIN_UNS): ...this. (fmaxmin): Merged with (fmaxmin_op): ...this... (maxmin_uns_op): ...in to this. gcc/testsuite/ * gcc.target/aarch64/vminmaxnm.c: New. * gcc.target/aarch64/simd/vminmaxnm_1.c (main): Added float64x1_t tests. From-SVN: r238977
This commit is contained in:
parent
0b953808f4
commit
1efafef383
@ -1,3 +1,23 @@
|
||||
2016-08-02 Tamar Christina <tamar.christina@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-simd-builtins.def
|
||||
(__builtin_aarch64_fmindf): Change BUILTIN_VDQF to BUILTIN_VDQF_DF.
|
||||
(__builtin_aarch64_fmaxdf): Likewise.
|
||||
(__builtin_aarch64_smin_nandf): Likewise.
|
||||
(__builtin_aarch64_smax_nandf): Likewise.
|
||||
* config/aarch64/aarch64-simd.md (<fmaxmin><mode>3): Remove.
|
||||
* config/aarch64/aarch64.md (<fmaxmin><mode>3): Rename to...
|
||||
(<fmaxmin><mode>3): ...this.
|
||||
* config/aarch64/arm_neon.h (vmaxnm_f64): New.
|
||||
(vminnm_f64): Likewise.
|
||||
(vmin_f64): Likewise.
|
||||
(vmax_f64): Likewise.
|
||||
* config/aarch64/iterators.md (FMAXMIN): Merge with...
|
||||
(FMAXMIN_UNS): ...this.
|
||||
(fmaxmin): Merged with
|
||||
(fmaxmin_op): ...this...
|
||||
(maxmin_uns_op): ...in to this.
|
||||
|
||||
2016-08-01 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
|
||||
|
@ -241,19 +241,19 @@
|
||||
BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10)
|
||||
BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10)
|
||||
|
||||
/* Implemented by <maxmin><mode>3.
|
||||
/* Implemented by <maxmin_uns><mode>3.
|
||||
smax variants map to fmaxnm,
|
||||
smax_nan variants map to fmax. */
|
||||
BUILTIN_VDQ_BHSI (BINOP, smax, 3)
|
||||
BUILTIN_VDQ_BHSI (BINOP, smin, 3)
|
||||
BUILTIN_VDQ_BHSI (BINOP, umax, 3)
|
||||
BUILTIN_VDQ_BHSI (BINOP, umin, 3)
|
||||
BUILTIN_VHSDF (BINOP, smax_nan, 3)
|
||||
BUILTIN_VHSDF (BINOP, smin_nan, 3)
|
||||
BUILTIN_VHSDF_DF (BINOP, smax_nan, 3)
|
||||
BUILTIN_VHSDF_DF (BINOP, smin_nan, 3)
|
||||
|
||||
/* Implemented by <fmaxmin><mode>3. */
|
||||
BUILTIN_VHSDF (BINOP, fmax, 3)
|
||||
BUILTIN_VHSDF (BINOP, fmin, 3)
|
||||
/* Implemented by <maxmin_uns><mode>3. */
|
||||
BUILTIN_VHSDF_HSDF (BINOP, fmax, 3)
|
||||
BUILTIN_VHSDF_HSDF (BINOP, fmin, 3)
|
||||
|
||||
/* Implemented by aarch64_<maxmin_uns>p<mode>. */
|
||||
BUILTIN_VDQ_BHSI (BINOP, smaxp, 0)
|
||||
@ -549,8 +549,4 @@
|
||||
BUILTIN_GPI (UNOP, fix_truncdf, 2)
|
||||
BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2)
|
||||
BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2)
|
||||
BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
|
||||
|
||||
/* Implemented by <fmaxmin><mode>3. */
|
||||
VAR1 (BINOP, fmax, 3, hf)
|
||||
VAR1 (BINOP, fmin, 3, hf)
|
||||
BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
|
@ -2038,6 +2038,9 @@
|
||||
[(set_attr "type" "neon_fp_minmax_<stype><q>")]
|
||||
)
|
||||
|
||||
;; Vector forms for fmax, fmin, fmaxnm, fminnm.
|
||||
;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
|
||||
;; which implement the IEEE fmax ()/fmin () functions.
|
||||
(define_insn "<maxmin_uns><mode>3"
|
||||
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
|
||||
@ -2048,17 +2051,6 @@
|
||||
[(set_attr "type" "neon_fp_minmax_<stype><q>")]
|
||||
)
|
||||
|
||||
;; Auto-vectorized forms for the IEEE-754 fmax()/fmin() functions
|
||||
(define_insn "<fmaxmin><mode>3"
|
||||
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
|
||||
(match_operand:VHSDF 2 "register_operand" "w")]
|
||||
FMAXMIN))]
|
||||
"TARGET_SIMD"
|
||||
"<fmaxmin_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
||||
[(set_attr "type" "neon_fp_minmax_<stype><q>")]
|
||||
)
|
||||
|
||||
;; 'across lanes' add.
|
||||
|
||||
(define_expand "reduc_plus_scal_<mode>"
|
||||
|
@ -4841,14 +4841,16 @@
|
||||
[(set_attr "type" "f_minmax<s>")]
|
||||
)
|
||||
|
||||
;; Scalar forms for the IEEE-754 fmax()/fmin() functions
|
||||
(define_insn "<fmaxmin><mode>3"
|
||||
;; Scalar forms for fmax, fmin, fmaxnm, fminnm.
|
||||
;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
|
||||
;; which implement the IEEE fmax ()/fmin () functions.
|
||||
(define_insn "<maxmin_uns><mode>3"
|
||||
[(set (match_operand:GPF_F16 0 "register_operand" "=w")
|
||||
(unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")
|
||||
(match_operand:GPF_F16 2 "register_operand" "w")]
|
||||
FMAXMIN))]
|
||||
FMAXMIN_UNS))]
|
||||
"TARGET_FLOAT"
|
||||
"<fmaxmin_op>\\t%<s>0, %<s>1, %<s>2"
|
||||
"<maxmin_uns_op>\\t%<s>0, %<s>1, %<s>2"
|
||||
[(set_attr "type" "f_minmax<stype>")]
|
||||
)
|
||||
|
||||
|
@ -17201,6 +17201,14 @@ vmax_f32 (float32x2_t __a, float32x2_t __b)
|
||||
return __builtin_aarch64_smax_nanv2sf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
|
||||
vmax_f64 (float64x1_t __a, float64x1_t __b)
|
||||
{
|
||||
return (float64x1_t)
|
||||
{ __builtin_aarch64_smax_nandf (vget_lane_f64 (__a, 0),
|
||||
vget_lane_f64 (__b, 0)) };
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
|
||||
vmax_s8 (int8x8_t __a, int8x8_t __b)
|
||||
{
|
||||
@ -17692,6 +17700,14 @@ vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
|
||||
return __builtin_aarch64_fmaxv2sf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
|
||||
vmaxnm_f64 (float64x1_t __a, float64x1_t __b)
|
||||
{
|
||||
return (float64x1_t)
|
||||
{ __builtin_aarch64_fmaxdf (vget_lane_f64 (__a, 0),
|
||||
vget_lane_f64 (__b, 0)) };
|
||||
}
|
||||
|
||||
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
|
||||
vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
|
||||
{
|
||||
@ -17824,6 +17840,14 @@ vmin_f32 (float32x2_t __a, float32x2_t __b)
|
||||
return __builtin_aarch64_smin_nanv2sf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
|
||||
vmin_f64 (float64x1_t __a, float64x1_t __b)
|
||||
{
|
||||
return (float64x1_t)
|
||||
{ __builtin_aarch64_smin_nandf (vget_lane_f64 (__a, 0),
|
||||
vget_lane_f64 (__b, 0)) };
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
|
||||
vmin_s8 (int8x8_t __a, int8x8_t __b)
|
||||
{
|
||||
@ -17922,6 +17946,14 @@ vminnm_f32 (float32x2_t __a, float32x2_t __b)
|
||||
return __builtin_aarch64_fminv2sf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
|
||||
vminnm_f64 (float64x1_t __a, float64x1_t __b)
|
||||
{
|
||||
return (float64x1_t)
|
||||
{ __builtin_aarch64_fmind (vget_lane_f64 (__a, 0),
|
||||
vget_lane_f64 (__b, 0)) };
|
||||
}
|
||||
|
||||
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
|
||||
vminnmq_f32 (float32x4_t __a, float32x4_t __b)
|
||||
{
|
||||
|
@ -1016,9 +1016,8 @@
|
||||
(define_int_iterator ADDSUBHN2 [UNSPEC_ADDHN2 UNSPEC_RADDHN2
|
||||
UNSPEC_SUBHN2 UNSPEC_RSUBHN2])
|
||||
|
||||
(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN])
|
||||
|
||||
(define_int_iterator FMAXMIN [UNSPEC_FMAXNM UNSPEC_FMINNM])
|
||||
(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN
|
||||
UNSPEC_FMAXNM UNSPEC_FMINNM])
|
||||
|
||||
(define_int_iterator VQDMULH [UNSPEC_SQDMULH UNSPEC_SQRDMULH])
|
||||
|
||||
@ -1102,7 +1101,9 @@
|
||||
(UNSPEC_FMAXV "smax_nan")
|
||||
(UNSPEC_FMIN "smin_nan")
|
||||
(UNSPEC_FMINNMV "smin")
|
||||
(UNSPEC_FMINV "smin_nan")])
|
||||
(UNSPEC_FMINV "smin_nan")
|
||||
(UNSPEC_FMAXNM "fmax")
|
||||
(UNSPEC_FMINNM "fmin")])
|
||||
|
||||
(define_int_attr maxmin_uns_op [(UNSPEC_UMAXV "umax")
|
||||
(UNSPEC_UMINV "umin")
|
||||
@ -1113,13 +1114,9 @@
|
||||
(UNSPEC_FMAXV "fmax")
|
||||
(UNSPEC_FMIN "fmin")
|
||||
(UNSPEC_FMINNMV "fminnm")
|
||||
(UNSPEC_FMINV "fmin")])
|
||||
|
||||
(define_int_attr fmaxmin [(UNSPEC_FMAXNM "fmax")
|
||||
(UNSPEC_FMINNM "fmin")])
|
||||
|
||||
(define_int_attr fmaxmin_op [(UNSPEC_FMAXNM "fmaxnm")
|
||||
(UNSPEC_FMINNM "fminnm")])
|
||||
(UNSPEC_FMINV "fmin")
|
||||
(UNSPEC_FMAXNM "fmaxnm")
|
||||
(UNSPEC_FMINNM "fminnm")])
|
||||
|
||||
(define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u")
|
||||
(UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")
|
||||
|
@ -1,3 +1,9 @@
|
||||
2016-08-02 Tamar Christina <tamar.christina@arm.com>
|
||||
|
||||
* gcc.target/aarch64/vminmaxnm.c: New.
|
||||
* gcc.target/aarch64/simd/vminmaxnm_1.c (main): Add float64x1_t
|
||||
tests.
|
||||
|
||||
2016-08-01 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
* gcc.target/powerpc/vec-extract-5.c: New tests to test
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Test the `v[min|max]nm{q}_f*' AArch64 SIMD intrinsic. */
|
||||
/* Test the `v[min|max]{nm}{q}_f*' AArch64 SIMD intrinsic. */
|
||||
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2" } */
|
||||
@ -18,6 +18,7 @@ extern void abort ();
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
/* v{min|max}nm_f32 normal. */
|
||||
float32x2_t f32x2_input1 = vdup_n_f32 (-1.0);
|
||||
float32x2_t f32x2_input2 = vdup_n_f32 (0.0);
|
||||
float32x2_t f32x2_exp_minnm = vdup_n_f32 (-1.0);
|
||||
@ -28,6 +29,7 @@ main (int argc, char **argv)
|
||||
CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm);
|
||||
CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm);
|
||||
|
||||
/* v{min|max}nm_f32 NaN. */
|
||||
f32x2_input1 = vdup_n_f32 (__builtin_nanf (""));
|
||||
f32x2_input2 = vdup_n_f32 (1.0);
|
||||
f32x2_exp_minnm = vdup_n_f32 (1.0);
|
||||
@ -38,6 +40,7 @@ main (int argc, char **argv)
|
||||
CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm);
|
||||
CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm);
|
||||
|
||||
/* v{min|max}nmq_f32 normal. */
|
||||
float32x4_t f32x4_input1 = vdupq_n_f32 (-1024.0);
|
||||
float32x4_t f32x4_input2 = vdupq_n_f32 (77.0);
|
||||
float32x4_t f32x4_exp_minnm = vdupq_n_f32 (-1024.0);
|
||||
@ -48,6 +51,7 @@ main (int argc, char **argv)
|
||||
CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm);
|
||||
CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm);
|
||||
|
||||
/* v{min|max}nmq_f32 NaN. */
|
||||
f32x4_input1 = vdupq_n_f32 (-__builtin_nanf (""));
|
||||
f32x4_input2 = vdupq_n_f32 (-1.0);
|
||||
f32x4_exp_minnm = vdupq_n_f32 (-1.0);
|
||||
@ -58,16 +62,57 @@ main (int argc, char **argv)
|
||||
CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm);
|
||||
CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm);
|
||||
|
||||
/* v{min|max}nm_f64 normal. */
|
||||
float64x1_t f64x1_input1 = vdup_n_f64 (1.23);
|
||||
float64x1_t f64x1_input2 = vdup_n_f64 (4.56);
|
||||
float64x1_t f64x1_exp_minnm = vdup_n_f64 (1.23);
|
||||
float64x1_t f64x1_exp_maxnm = vdup_n_f64 (4.56);
|
||||
float64x1_t f64x1_ret_minnm = vminnm_f64 (f64x1_input1, f64x1_input2);
|
||||
float64x1_t f64x1_ret_maxnm = vmaxnm_f64 (f64x1_input1, f64x1_input2);
|
||||
CHECK (uint64_t, 1, f64x1_ret_minnm, f64x1_exp_minnm);
|
||||
CHECK (uint64_t, 1, f64x1_ret_maxnm, f64x1_exp_maxnm);
|
||||
|
||||
/* v{min|max}_f64 normal. */
|
||||
float64x1_t f64x1_exp_min = vdup_n_f64 (1.23);
|
||||
float64x1_t f64x1_exp_max = vdup_n_f64 (4.56);
|
||||
float64x1_t f64x1_ret_min = vmin_f64 (f64x1_input1, f64x1_input2);
|
||||
float64x1_t f64x1_ret_max = vmax_f64 (f64x1_input1, f64x1_input2);
|
||||
CHECK (uint64_t, 1, f64x1_ret_min, f64x1_exp_min);
|
||||
CHECK (uint64_t, 1, f64x1_ret_max, f64x1_exp_max);
|
||||
|
||||
/* v{min|max}nmq_f64 normal. */
|
||||
float64x2_t f64x2_input1 = vdupq_n_f64 (1.23);
|
||||
float64x2_t f64x2_input2 = vdupq_n_f64 (4.56);
|
||||
float64x2_t f64x2_exp_minnm = vdupq_n_f64 (1.23);
|
||||
float64x2_t f64x2_exp_maxnm = vdupq_n_f64 (4.56);
|
||||
float64x2_t f64x2_ret_minnm = vminnmq_f64 (f64x2_input1, f64x2_input2);
|
||||
float64x2_t f64x2_ret_maxnm = vmaxnmq_f64 (f64x2_input1, f64x2_input2);
|
||||
|
||||
CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm);
|
||||
CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm);
|
||||
|
||||
/* v{min|max}nm_f64 NaN. */
|
||||
f64x1_input1 = vdup_n_f64 (-__builtin_nanf (""));
|
||||
f64x1_input2 = vdup_n_f64 (1.0);
|
||||
f64x1_exp_minnm = vdup_n_f64 (1.0);
|
||||
f64x1_exp_maxnm = vdup_n_f64 (1.0);
|
||||
f64x1_ret_minnm = vminnm_f64 (f64x1_input1, f64x1_input2);
|
||||
f64x1_ret_maxnm = vmaxnm_f64 (f64x1_input1, f64x1_input2);
|
||||
|
||||
CHECK (uint64_t, 1, f64x1_ret_minnm, f64x1_exp_minnm);
|
||||
CHECK (uint64_t, 1, f64x1_ret_maxnm, f64x1_exp_maxnm);
|
||||
|
||||
/* v{min|max}_f64 NaN. */
|
||||
f64x1_input1 = vdup_n_f64 (-__builtin_nanf (""));
|
||||
f64x1_input2 = vdup_n_f64 (1.0);
|
||||
f64x1_exp_minnm = vdup_n_f64 (-__builtin_nanf (""));
|
||||
f64x1_exp_maxnm = vdup_n_f64 (-__builtin_nanf (""));
|
||||
f64x1_ret_minnm = vmin_f64 (f64x1_input1, f64x1_input2);
|
||||
f64x1_ret_maxnm = vmax_f64 (f64x1_input1, f64x1_input2);
|
||||
|
||||
CHECK (uint64_t, 1, f64x1_ret_minnm, f64x1_exp_minnm);
|
||||
CHECK (uint64_t, 1, f64x1_ret_maxnm, f64x1_exp_maxnm);
|
||||
|
||||
/* v{min|max}nmq_f64 NaN. */
|
||||
f64x2_input1 = vdupq_n_f64 (-__builtin_nan (""));
|
||||
f64x2_input2 = vdupq_n_f64 (1.0);
|
||||
f64x2_exp_minnm = vdupq_n_f64 (1.0);
|
||||
|
37
gcc/testsuite/gcc.target/aarch64/vminmaxnm.c
Normal file
37
gcc/testsuite/gcc.target/aarch64/vminmaxnm.c
Normal file
@ -0,0 +1,37 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
#include "arm_neon.h"
|
||||
|
||||
/* For each of these intrinsics, we map directly to an unspec in RTL.
|
||||
We're just using the argument directly and returning the result, so we
|
||||
can precisely specify the exact instruction pattern and register
|
||||
allocations we expect. */
|
||||
|
||||
float64x1_t
|
||||
test_vmaxnm_f64 (float64x1_t a, float64x1_t b)
|
||||
{
|
||||
/* { dg-final { scan-assembler-times "fmaxnm\td0, d0, d1" 1 } } */
|
||||
return vmaxnm_f64 (a, b);
|
||||
}
|
||||
|
||||
float64x1_t
|
||||
test_vminnm_f64 (float64x1_t a, float64x1_t b)
|
||||
{
|
||||
/* { dg-final { scan-assembler-times "fminnm\td0, d0, d1" 1 } } */
|
||||
return vminnm_f64 (a, b);
|
||||
}
|
||||
|
||||
float64x1_t
|
||||
test_vmax_f64 (float64x1_t a, float64x1_t b)
|
||||
{
|
||||
/* { dg-final { scan-assembler-times "fmax\td0, d0, d1" 1 } } */
|
||||
return vmax_f64 (a, b);
|
||||
}
|
||||
|
||||
float64x1_t
|
||||
test_vmin_f64 (float64x1_t a, float64x1_t b)
|
||||
{
|
||||
/* { dg-final { scan-assembler-times "fmin\td0, d0, d1" 1 } } */
|
||||
return vmin_f64 (a, b);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user