[PATCH AArch64] Add more AArch64 NEON intrinsics

Add vmaxnm_f64, vminnm_f64, vmax_f64, vmin_f64.

Committed on behalf of Tamar Christina <tamar.christina@arm.com> .

gcc/

	* config/aarch64/aarch64-simd-builtins.def
	(__builtin_aarch64_fmindf): Change BUILTIN_VDQF to BUILTIN_VDQF_DF.
	(__builtin_aarch64_fmaxdf): Likewise.
	(__builtin_aarch64_smin_nandf): Likewise.
	(__builtin_aarch64_smax_nandf): Likewise.
	* config/aarch64/aarch64-simd.md (<fmaxmin><mode>3): Remove.
	* config/aarch64/aarch64.md (<fmaxmin><mode>3): Rename to...
	(<fmaxmin><mode>3): ...this.
	* config/aarch64/arm_neon.h (vmaxnm_f64): New.
	(vminnm_f64): Likewise.
	(vmin_f64): Likewise.
	(vmax_f64): Likewise.
	* config/aarch64/iterators.md (FMAXMIN): Merge with...
	(FMAXMIN_UNS): ...this.
	(fmaxmin): Merged with
	(fmaxmin_op): ...this...
	(maxmin_uns_op): ...in to this.

gcc/testsuite/

	* gcc.target/aarch64/vminmaxnm.c: New.
	* gcc.target/aarch64/simd/vminmaxnm_1.c (main): Added float64x1_t
	tests.

From-SVN: r238977
This commit is contained in:
Tamar Christina 2016-08-02 09:25:19 +00:00 committed by James Greenhalgh
parent 0b953808f4
commit 1efafef383
9 changed files with 166 additions and 39 deletions

View File

@ -1,3 +1,23 @@
2016-08-02 Tamar Christina <tamar.christina@arm.com>
* config/aarch64/aarch64-simd-builtins.def
(__builtin_aarch64_fmindf): Change BUILTIN_VDQF to BUILTIN_VDQF_DF.
(__builtin_aarch64_fmaxdf): Likewise.
(__builtin_aarch64_smin_nandf): Likewise.
(__builtin_aarch64_smax_nandf): Likewise.
* config/aarch64/aarch64-simd.md (<fmaxmin><mode>3): Remove.
* config/aarch64/aarch64.md (<fmaxmin><mode>3): Rename to...
(<fmaxmin><mode>3): ...this.
* config/aarch64/arm_neon.h (vmaxnm_f64): New.
(vminnm_f64): Likewise.
(vmin_f64): Likewise.
(vmax_f64): Likewise.
* config/aarch64/iterators.md (FMAXMIN): Merge with...
(FMAXMIN_UNS): ...this.
(fmaxmin): Merged with
(fmaxmin_op): ...this...
(maxmin_uns_op): ...in to this.
2016-08-01 Michael Meissner <meissner@linux.vnet.ibm.com>
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):

View File

@ -241,19 +241,19 @@
BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10)
BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10)
/* Implemented by <maxmin><mode>3.
/* Implemented by <maxmin_uns><mode>3.
smax variants map to fmaxnm,
smax_nan variants map to fmax. */
BUILTIN_VDQ_BHSI (BINOP, smax, 3)
BUILTIN_VDQ_BHSI (BINOP, smin, 3)
BUILTIN_VDQ_BHSI (BINOP, umax, 3)
BUILTIN_VDQ_BHSI (BINOP, umin, 3)
BUILTIN_VHSDF (BINOP, smax_nan, 3)
BUILTIN_VHSDF (BINOP, smin_nan, 3)
BUILTIN_VHSDF_DF (BINOP, smax_nan, 3)
BUILTIN_VHSDF_DF (BINOP, smin_nan, 3)
/* Implemented by <fmaxmin><mode>3. */
BUILTIN_VHSDF (BINOP, fmax, 3)
BUILTIN_VHSDF (BINOP, fmin, 3)
/* Implemented by <maxmin_uns><mode>3. */
BUILTIN_VHSDF_HSDF (BINOP, fmax, 3)
BUILTIN_VHSDF_HSDF (BINOP, fmin, 3)
/* Implemented by aarch64_<maxmin_uns>p<mode>. */
BUILTIN_VDQ_BHSI (BINOP, smaxp, 0)
@ -549,8 +549,4 @@
BUILTIN_GPI (UNOP, fix_truncdf, 2)
BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2)
BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2)
BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
/* Implemented by <fmaxmin><mode>3. */
VAR1 (BINOP, fmax, 3, hf)
VAR1 (BINOP, fmin, 3, hf)
BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)

View File

@ -2038,6 +2038,9 @@
[(set_attr "type" "neon_fp_minmax_<stype><q>")]
)
;; Vector forms for fmax, fmin, fmaxnm, fminnm.
;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
;; which implement the IEEE fmax ()/fmin () functions.
(define_insn "<maxmin_uns><mode>3"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
@ -2048,17 +2051,6 @@
[(set_attr "type" "neon_fp_minmax_<stype><q>")]
)
;; Auto-vectorized forms for the IEEE-754 fmax()/fmin() functions
(define_insn "<fmaxmin><mode>3"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
(match_operand:VHSDF 2 "register_operand" "w")]
FMAXMIN))]
"TARGET_SIMD"
"<fmaxmin_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_fp_minmax_<stype><q>")]
)
;; 'across lanes' add.
(define_expand "reduc_plus_scal_<mode>"

View File

@ -4841,14 +4841,16 @@
[(set_attr "type" "f_minmax<s>")]
)
;; Scalar forms for the IEEE-754 fmax()/fmin() functions
(define_insn "<fmaxmin><mode>3"
;; Scalar forms for fmax, fmin, fmaxnm, fminnm.
;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
;; which implement the IEEE fmax ()/fmin () functions.
(define_insn "<maxmin_uns><mode>3"
[(set (match_operand:GPF_F16 0 "register_operand" "=w")
(unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")
(match_operand:GPF_F16 2 "register_operand" "w")]
FMAXMIN))]
FMAXMIN_UNS))]
"TARGET_FLOAT"
"<fmaxmin_op>\\t%<s>0, %<s>1, %<s>2"
"<maxmin_uns_op>\\t%<s>0, %<s>1, %<s>2"
[(set_attr "type" "f_minmax<stype>")]
)

View File

@ -17201,6 +17201,14 @@ vmax_f32 (float32x2_t __a, float32x2_t __b)
return __builtin_aarch64_smax_nanv2sf (__a, __b);
}
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vmax_f64 (float64x1_t __a, float64x1_t __b)
{
return (float64x1_t)
{ __builtin_aarch64_smax_nandf (vget_lane_f64 (__a, 0),
vget_lane_f64 (__b, 0)) };
}
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmax_s8 (int8x8_t __a, int8x8_t __b)
{
@ -17692,6 +17700,14 @@ vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
return __builtin_aarch64_fmaxv2sf (__a, __b);
}
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vmaxnm_f64 (float64x1_t __a, float64x1_t __b)
{
return (float64x1_t)
{ __builtin_aarch64_fmaxdf (vget_lane_f64 (__a, 0),
vget_lane_f64 (__b, 0)) };
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
{
@ -17824,6 +17840,14 @@ vmin_f32 (float32x2_t __a, float32x2_t __b)
return __builtin_aarch64_smin_nanv2sf (__a, __b);
}
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vmin_f64 (float64x1_t __a, float64x1_t __b)
{
return (float64x1_t)
{ __builtin_aarch64_smin_nandf (vget_lane_f64 (__a, 0),
vget_lane_f64 (__b, 0)) };
}
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmin_s8 (int8x8_t __a, int8x8_t __b)
{
@ -17922,6 +17946,14 @@ vminnm_f32 (float32x2_t __a, float32x2_t __b)
return __builtin_aarch64_fminv2sf (__a, __b);
}
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vminnm_f64 (float64x1_t __a, float64x1_t __b)
{
return (float64x1_t)
{ __builtin_aarch64_fmind (vget_lane_f64 (__a, 0),
vget_lane_f64 (__b, 0)) };
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vminnmq_f32 (float32x4_t __a, float32x4_t __b)
{

View File

@ -1016,9 +1016,8 @@
(define_int_iterator ADDSUBHN2 [UNSPEC_ADDHN2 UNSPEC_RADDHN2
UNSPEC_SUBHN2 UNSPEC_RSUBHN2])
(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN])
(define_int_iterator FMAXMIN [UNSPEC_FMAXNM UNSPEC_FMINNM])
(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN
UNSPEC_FMAXNM UNSPEC_FMINNM])
(define_int_iterator VQDMULH [UNSPEC_SQDMULH UNSPEC_SQRDMULH])
@ -1102,7 +1101,9 @@
(UNSPEC_FMAXV "smax_nan")
(UNSPEC_FMIN "smin_nan")
(UNSPEC_FMINNMV "smin")
(UNSPEC_FMINV "smin_nan")])
(UNSPEC_FMINV "smin_nan")
(UNSPEC_FMAXNM "fmax")
(UNSPEC_FMINNM "fmin")])
(define_int_attr maxmin_uns_op [(UNSPEC_UMAXV "umax")
(UNSPEC_UMINV "umin")
@ -1113,13 +1114,9 @@
(UNSPEC_FMAXV "fmax")
(UNSPEC_FMIN "fmin")
(UNSPEC_FMINNMV "fminnm")
(UNSPEC_FMINV "fmin")])
(define_int_attr fmaxmin [(UNSPEC_FMAXNM "fmax")
(UNSPEC_FMINNM "fmin")])
(define_int_attr fmaxmin_op [(UNSPEC_FMAXNM "fmaxnm")
(UNSPEC_FMINNM "fminnm")])
(UNSPEC_FMINV "fmin")
(UNSPEC_FMAXNM "fmaxnm")
(UNSPEC_FMINNM "fminnm")])
(define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u")
(UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")

View File

@ -1,3 +1,9 @@
2016-08-02 Tamar Christina <tamar.christina@arm.com>
* gcc.target/aarch64/vminmaxnm.c: New.
* gcc.target/aarch64/simd/vminmaxnm_1.c (main): Add float64x1_t
tests.
2016-08-01 Michael Meissner <meissner@linux.vnet.ibm.com>
* gcc.target/powerpc/vec-extract-5.c: New tests to test

View File

@ -1,4 +1,4 @@
/* Test the `v[min|max]nm{q}_f*' AArch64 SIMD intrinsic. */
/* Test the `v[min|max]{nm}{q}_f*' AArch64 SIMD intrinsic. */
/* { dg-do run } */
/* { dg-options "-O2" } */
@ -18,6 +18,7 @@ extern void abort ();
int
main (int argc, char **argv)
{
/* v{min|max}nm_f32 normal. */
float32x2_t f32x2_input1 = vdup_n_f32 (-1.0);
float32x2_t f32x2_input2 = vdup_n_f32 (0.0);
float32x2_t f32x2_exp_minnm = vdup_n_f32 (-1.0);
@ -28,6 +29,7 @@ main (int argc, char **argv)
CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm);
CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm);
/* v{min|max}nm_f32 NaN. */
f32x2_input1 = vdup_n_f32 (__builtin_nanf (""));
f32x2_input2 = vdup_n_f32 (1.0);
f32x2_exp_minnm = vdup_n_f32 (1.0);
@ -38,6 +40,7 @@ main (int argc, char **argv)
CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm);
CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm);
/* v{min|max}nmq_f32 normal. */
float32x4_t f32x4_input1 = vdupq_n_f32 (-1024.0);
float32x4_t f32x4_input2 = vdupq_n_f32 (77.0);
float32x4_t f32x4_exp_minnm = vdupq_n_f32 (-1024.0);
@ -48,6 +51,7 @@ main (int argc, char **argv)
CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm);
CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm);
/* v{min|max}nmq_f32 NaN. */
f32x4_input1 = vdupq_n_f32 (-__builtin_nanf (""));
f32x4_input2 = vdupq_n_f32 (-1.0);
f32x4_exp_minnm = vdupq_n_f32 (-1.0);
@ -58,16 +62,57 @@ main (int argc, char **argv)
CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm);
CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm);
/* v{min|max}nm_f64 normal. */
float64x1_t f64x1_input1 = vdup_n_f64 (1.23);
float64x1_t f64x1_input2 = vdup_n_f64 (4.56);
float64x1_t f64x1_exp_minnm = vdup_n_f64 (1.23);
float64x1_t f64x1_exp_maxnm = vdup_n_f64 (4.56);
float64x1_t f64x1_ret_minnm = vminnm_f64 (f64x1_input1, f64x1_input2);
float64x1_t f64x1_ret_maxnm = vmaxnm_f64 (f64x1_input1, f64x1_input2);
CHECK (uint64_t, 1, f64x1_ret_minnm, f64x1_exp_minnm);
CHECK (uint64_t, 1, f64x1_ret_maxnm, f64x1_exp_maxnm);
/* v{min|max}_f64 normal. */
float64x1_t f64x1_exp_min = vdup_n_f64 (1.23);
float64x1_t f64x1_exp_max = vdup_n_f64 (4.56);
float64x1_t f64x1_ret_min = vmin_f64 (f64x1_input1, f64x1_input2);
float64x1_t f64x1_ret_max = vmax_f64 (f64x1_input1, f64x1_input2);
CHECK (uint64_t, 1, f64x1_ret_min, f64x1_exp_min);
CHECK (uint64_t, 1, f64x1_ret_max, f64x1_exp_max);
/* v{min|max}nmq_f64 normal. */
float64x2_t f64x2_input1 = vdupq_n_f64 (1.23);
float64x2_t f64x2_input2 = vdupq_n_f64 (4.56);
float64x2_t f64x2_exp_minnm = vdupq_n_f64 (1.23);
float64x2_t f64x2_exp_maxnm = vdupq_n_f64 (4.56);
float64x2_t f64x2_ret_minnm = vminnmq_f64 (f64x2_input1, f64x2_input2);
float64x2_t f64x2_ret_maxnm = vmaxnmq_f64 (f64x2_input1, f64x2_input2);
CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm);
CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm);
/* v{min|max}nm_f64 NaN. */
f64x1_input1 = vdup_n_f64 (-__builtin_nanf (""));
f64x1_input2 = vdup_n_f64 (1.0);
f64x1_exp_minnm = vdup_n_f64 (1.0);
f64x1_exp_maxnm = vdup_n_f64 (1.0);
f64x1_ret_minnm = vminnm_f64 (f64x1_input1, f64x1_input2);
f64x1_ret_maxnm = vmaxnm_f64 (f64x1_input1, f64x1_input2);
CHECK (uint64_t, 1, f64x1_ret_minnm, f64x1_exp_minnm);
CHECK (uint64_t, 1, f64x1_ret_maxnm, f64x1_exp_maxnm);
/* v{min|max}_f64 NaN. */
f64x1_input1 = vdup_n_f64 (-__builtin_nanf (""));
f64x1_input2 = vdup_n_f64 (1.0);
f64x1_exp_minnm = vdup_n_f64 (-__builtin_nanf (""));
f64x1_exp_maxnm = vdup_n_f64 (-__builtin_nanf (""));
f64x1_ret_minnm = vmin_f64 (f64x1_input1, f64x1_input2);
f64x1_ret_maxnm = vmax_f64 (f64x1_input1, f64x1_input2);
CHECK (uint64_t, 1, f64x1_ret_minnm, f64x1_exp_minnm);
CHECK (uint64_t, 1, f64x1_ret_maxnm, f64x1_exp_maxnm);
/* v{min|max}nmq_f64 NaN. */
f64x2_input1 = vdupq_n_f64 (-__builtin_nan (""));
f64x2_input2 = vdupq_n_f64 (1.0);
f64x2_exp_minnm = vdupq_n_f64 (1.0);

View File

@ -0,0 +1,37 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
#include "arm_neon.h"
/* For each of these intrinsics, we map directly to an unspec in RTL.
We're just using the argument directly and returning the result, so we
can precisely specify the exact instruction pattern and register
allocations we expect. */
float64x1_t
test_vmaxnm_f64 (float64x1_t a, float64x1_t b)
{
/* { dg-final { scan-assembler-times "fmaxnm\td0, d0, d1" 1 } } */
return vmaxnm_f64 (a, b);
}
float64x1_t
test_vminnm_f64 (float64x1_t a, float64x1_t b)
{
/* { dg-final { scan-assembler-times "fminnm\td0, d0, d1" 1 } } */
return vminnm_f64 (a, b);
}
float64x1_t
test_vmax_f64 (float64x1_t a, float64x1_t b)
{
/* { dg-final { scan-assembler-times "fmax\td0, d0, d1" 1 } } */
return vmax_f64 (a, b);
}
float64x1_t
test_vmin_f64 (float64x1_t a, float64x1_t b)
{
/* { dg-final { scan-assembler-times "fmin\td0, d0, d1" 1 } } */
return vmin_f64 (a, b);
}