[PATCH 8/17][ARM] Add VFP FP16 arithmetic instructions.
gcc/ 2016-09-23 Matthew Wahab <matthew.wahab@arm.com> * config/arm/iterators.md (Code iterators): Fix some white-space in the comments. (GLTE): New. (ABSNEG): New (FCVT): Moved from vfp.md. (VCVT_HF_US_N): New. (VCVT_SI_US_N): New. (VCVT_HF_US): New. (VCVTH_US): New. (FP16_RND): New. (absneg_str): New. (FCVTI32typename): Moved from vfp.md. (sup): Add UNSPEC_VCVTA_S, UNSPEC_VCVTA_U, UNSPEC_VCVTM_S, UNSPEC_VCVTM_U, UNSPEC_VCVTN_S, UNSPEC_VCVTN_U, UNSPEC_VCVTP_S, UNSPEC_VCVTP_U, UNSPEC_VCVT_HF_S_N, UNSPEC_VCVT_HF_U_N, UNSPEC_VCVT_SI_S_N, UNSPEC_VCVT_SI_U_N, UNSPEC_VCVTH_S_N, UNSPEC_VCVTH_U_N, UNSPEC_VCVTH_S and UNSPEC_VCVTH_U. (vcvth_op): New. (fp16_rnd_str): New. (fp16_rnd_insn): New. * config/arm/unspecs.md (UNSPEC_VCVT_HF_S_N): New. (UNSPEC_VCVT_HF_U_N): New. (UNSPEC_VCVT_SI_S_N): New. (UNSPEC_VCVT_SI_U_N): New. (UNSPEC_VCVTH_S): New. (UNSPEC_VCVTH_U): New. (UNSPEC_VCVTA_S): New. (UNSPEC_VCVTA_U): New. (UNSPEC_VCVTM_S): New. (UNSPEC_VCVTM_U): New. (UNSPEC_VCVTN_S): New. (UNSPEC_VCVTN_U): New. (UNSPEC_VCVTP_S): New. (UNSPEC_VCVTP_U): New. (UNSPEC_VCVTP_S): New. (UNSPEC_VCVTP_U): New. (UNSPEC_VRND): New. (UNSPEC_VRNDA): New. (UNSPEC_VRNDI): New. (UNSPEC_VRNDM): New. (UNSPEC_VRNDN): New. (UNSPEC_VRNDP): New. (UNSPEC_VRNDX): New. * config/arm/vfp.md (<absneg_str>hf2): New. (neon_vabshf): New. (neon_v<fp16_rnd_str>hf): New. (neon_vrndihf): New. (addhf3): New. (subhf3): New. (divhf3): New. (mulhf3): New. (*mulsf3neghf_vfp): New. (*negmulhf3_vfp): New. (*mulsf3addhf_vfp): New. (*mulhf3subhf_vfp): New. (*mulhf3neghfaddhf_vfp): New. (*mulhf3neghfsubhf_vfp): New. (fmahf4): New. (neon_vfmahf): New. (fmsubhf4_fp16): New. (neon_vfmshf): New. (*fnmsubhf4): New. (*fnmaddhf4): New. (neon_vsqrthf): New. (neon_vrsqrtshf): New. (FCVT): Move to iterators.md. (FCVTI32typename): Likewise. (neon_vcvth<sup>hf): New. (neon_vcvth<sup>si): New. (neon_vcvth<sup>_nhf_unspec): New. (neon_vcvth<sup>_nhf): New. (neon_vcvth<sup>_nsi_unspec): New. (neon_vcvth<sup>_nsi): New. (neon_vcvt<vcvth_op>h<sup>si): New. (neon_<fmaxmin_op>hf): New. testsuite/ 2016-09-23 Matthew Wahab <matthew.wahab@arm.com> * gcc.target/arm/armv8_2-fp16-arith-1.c: New. * gcc.target/arm/armv8_2-fp16-conv-1.c: New. From-SVN: r240411
This commit is contained in:
parent
e2080e79be
commit
d403b8d4e8
@ -1,3 +1,81 @@
|
||||
2016-09-23 Matthew Wahab <matthew.wahab@arm.com>
|
||||
|
||||
* config/arm/iterators.md (Code iterators): Fix some white-space
|
||||
in the comments.
|
||||
(GLTE): New.
|
||||
(ABSNEG): New
|
||||
(FCVT): Moved from vfp.md.
|
||||
(VCVT_HF_US_N): New.
|
||||
(VCVT_SI_US_N): New.
|
||||
(VCVT_HF_US): New.
|
||||
(VCVTH_US): New.
|
||||
(FP16_RND): New.
|
||||
(absneg_str): New.
|
||||
(FCVTI32typename): Moved from vfp.md.
|
||||
(sup): Add UNSPEC_VCVTA_S, UNSPEC_VCVTA_U, UNSPEC_VCVTM_S,
|
||||
UNSPEC_VCVTM_U, UNSPEC_VCVTN_S, UNSPEC_VCVTN_U, UNSPEC_VCVTP_S,
|
||||
UNSPEC_VCVTP_U, UNSPEC_VCVT_HF_S_N, UNSPEC_VCVT_HF_U_N,
|
||||
UNSPEC_VCVT_SI_S_N, UNSPEC_VCVT_SI_U_N, UNSPEC_VCVTH_S_N,
|
||||
UNSPEC_VCVTH_U_N, UNSPEC_VCVTH_S and UNSPEC_VCVTH_U.
|
||||
(vcvth_op): New.
|
||||
(fp16_rnd_str): New.
|
||||
(fp16_rnd_insn): New.
|
||||
* config/arm/unspecs.md (UNSPEC_VCVT_HF_S_N): New.
|
||||
(UNSPEC_VCVT_HF_U_N): New.
|
||||
(UNSPEC_VCVT_SI_S_N): New.
|
||||
(UNSPEC_VCVT_SI_U_N): New.
|
||||
(UNSPEC_VCVTH_S): New.
|
||||
(UNSPEC_VCVTH_U): New.
|
||||
(UNSPEC_VCVTA_S): New.
|
||||
(UNSPEC_VCVTA_U): New.
|
||||
(UNSPEC_VCVTM_S): New.
|
||||
(UNSPEC_VCVTM_U): New.
|
||||
(UNSPEC_VCVTN_S): New.
|
||||
(UNSPEC_VCVTN_U): New.
|
||||
(UNSPEC_VCVTP_S): New.
|
||||
(UNSPEC_VCVTP_U): New.
|
||||
(UNSPEC_VCVTP_S): New.
|
||||
(UNSPEC_VCVTP_U): New.
|
||||
(UNSPEC_VRND): New.
|
||||
(UNSPEC_VRNDA): New.
|
||||
(UNSPEC_VRNDI): New.
|
||||
(UNSPEC_VRNDM): New.
|
||||
(UNSPEC_VRNDN): New.
|
||||
(UNSPEC_VRNDP): New.
|
||||
(UNSPEC_VRNDX): New.
|
||||
* config/arm/vfp.md (<absneg_str>hf2): New.
|
||||
(neon_vabshf): New.
|
||||
(neon_v<fp16_rnd_str>hf): New.
|
||||
(neon_vrndihf): New.
|
||||
(addhf3): New.
|
||||
(subhf3): New.
|
||||
(divhf3): New.
|
||||
(mulhf3): New.
|
||||
(*mulsf3neghf_vfp): New.
|
||||
(*negmulhf3_vfp): New.
|
||||
(*mulsf3addhf_vfp): New.
|
||||
(*mulhf3subhf_vfp): New.
|
||||
(*mulhf3neghfaddhf_vfp): New.
|
||||
(*mulhf3neghfsubhf_vfp): New.
|
||||
(fmahf4): New.
|
||||
(neon_vfmahf): New.
|
||||
(fmsubhf4_fp16): New.
|
||||
(neon_vfmshf): New.
|
||||
(*fnmsubhf4): New.
|
||||
(*fnmaddhf4): New.
|
||||
(neon_vsqrthf): New.
|
||||
(neon_vrsqrtshf): New.
|
||||
(FCVT): Move to iterators.md.
|
||||
(FCVTI32typename): Likewise.
|
||||
(neon_vcvth<sup>hf): New.
|
||||
(neon_vcvth<sup>si): New.
|
||||
(neon_vcvth<sup>_nhf_unspec): New.
|
||||
(neon_vcvth<sup>_nhf): New.
|
||||
(neon_vcvth<sup>_nsi_unspec): New.
|
||||
(neon_vcvth<sup>_nsi): New.
|
||||
(neon_vcvt<vcvth_op>h<sup>si): New.
|
||||
(neon_<fmaxmin_op>hf): New.
|
||||
|
||||
2016-09-23 Dominik Vogt <vogt@linux.vnet.ibm.com>
|
||||
|
||||
* config/s390/s390.md (bitoff, bitoff_plus): Neq mode attributes.
|
||||
|
@ -199,14 +199,17 @@
|
||||
;; Code iterators
|
||||
;;----------------------------------------------------------------------------
|
||||
|
||||
;; A list of condition codes used in compare instructions where
|
||||
;; the carry flag from the addition is used instead of doing the
|
||||
;; A list of condition codes used in compare instructions where
|
||||
;; the carry flag from the addition is used instead of doing the
|
||||
;; compare a second time.
|
||||
(define_code_iterator LTUGEU [ltu geu])
|
||||
|
||||
;; The signed gt, ge comparisons
|
||||
(define_code_iterator GTGE [gt ge])
|
||||
|
||||
;; The signed gt, ge, lt, le comparisons
|
||||
(define_code_iterator GLTE [gt ge lt le])
|
||||
|
||||
;; The unsigned gt, ge comparisons
|
||||
(define_code_iterator GTUGEU [gtu geu])
|
||||
|
||||
@ -235,6 +238,12 @@
|
||||
;; Binary operators whose second operand can be shifted.
|
||||
(define_code_iterator SHIFTABLE_OPS [plus minus ior xor and])
|
||||
|
||||
;; Operations on the sign of a number.
|
||||
(define_code_iterator ABSNEG [abs neg])
|
||||
|
||||
;; Conversions.
|
||||
(define_code_iterator FCVT [unsigned_float float])
|
||||
|
||||
;; plus and minus are the only SHIFTABLE_OPS for which Thumb2 allows
|
||||
;; a stack pointer opoerand. The minus operation is a candidate for an rsub
|
||||
;; and hence only plus is supported.
|
||||
@ -330,6 +339,22 @@
|
||||
|
||||
(define_int_iterator VCVT_US_N [UNSPEC_VCVT_S_N UNSPEC_VCVT_U_N])
|
||||
|
||||
(define_int_iterator VCVT_HF_US_N [UNSPEC_VCVT_HF_S_N UNSPEC_VCVT_HF_U_N])
|
||||
|
||||
(define_int_iterator VCVT_SI_US_N [UNSPEC_VCVT_SI_S_N UNSPEC_VCVT_SI_U_N])
|
||||
|
||||
(define_int_iterator VCVT_HF_US [UNSPEC_VCVTA_S UNSPEC_VCVTA_U
|
||||
UNSPEC_VCVTM_S UNSPEC_VCVTM_U
|
||||
UNSPEC_VCVTN_S UNSPEC_VCVTN_U
|
||||
UNSPEC_VCVTP_S UNSPEC_VCVTP_U])
|
||||
|
||||
(define_int_iterator VCVTH_US [UNSPEC_VCVTH_S UNSPEC_VCVTH_U])
|
||||
|
||||
;; Operators for FP16 instructions.
|
||||
(define_int_iterator FP16_RND [UNSPEC_VRND UNSPEC_VRNDA
|
||||
UNSPEC_VRNDM UNSPEC_VRNDN
|
||||
UNSPEC_VRNDP UNSPEC_VRNDX])
|
||||
|
||||
(define_int_iterator VQMOVN [UNSPEC_VQMOVN_S UNSPEC_VQMOVN_U])
|
||||
|
||||
(define_int_iterator VMOVL [UNSPEC_VMOVL_S UNSPEC_VMOVL_U])
|
||||
@ -687,6 +712,12 @@
|
||||
(define_code_attr shift [(ashiftrt "ashr") (lshiftrt "lshr")])
|
||||
(define_code_attr shifttype [(ashiftrt "signed") (lshiftrt "unsigned")])
|
||||
|
||||
;; String reprentations of operations on the sign of a number.
|
||||
(define_code_attr absneg_str [(abs "abs") (neg "neg")])
|
||||
|
||||
;; Conversions.
|
||||
(define_code_attr FCVTI32typename [(unsigned_float "u32") (float "s32")])
|
||||
|
||||
;;----------------------------------------------------------------------------
|
||||
;; Int attributes
|
||||
;;----------------------------------------------------------------------------
|
||||
@ -718,7 +749,13 @@
|
||||
(UNSPEC_VPMAX "s") (UNSPEC_VPMAX_U "u")
|
||||
(UNSPEC_VPMIN "s") (UNSPEC_VPMIN_U "u")
|
||||
(UNSPEC_VCVT_S "s") (UNSPEC_VCVT_U "u")
|
||||
(UNSPEC_VCVTA_S "s") (UNSPEC_VCVTA_U "u")
|
||||
(UNSPEC_VCVTM_S "s") (UNSPEC_VCVTM_U "u")
|
||||
(UNSPEC_VCVTN_S "s") (UNSPEC_VCVTN_U "u")
|
||||
(UNSPEC_VCVTP_S "s") (UNSPEC_VCVTP_U "u")
|
||||
(UNSPEC_VCVT_S_N "s") (UNSPEC_VCVT_U_N "u")
|
||||
(UNSPEC_VCVT_HF_S_N "s") (UNSPEC_VCVT_HF_U_N "u")
|
||||
(UNSPEC_VCVT_SI_S_N "s") (UNSPEC_VCVT_SI_U_N "u")
|
||||
(UNSPEC_VQMOVN_S "s") (UNSPEC_VQMOVN_U "u")
|
||||
(UNSPEC_VMOVL_S "s") (UNSPEC_VMOVL_U "u")
|
||||
(UNSPEC_VSHL_S "s") (UNSPEC_VSHL_U "u")
|
||||
@ -733,9 +770,25 @@
|
||||
(UNSPEC_VSHLL_S_N "s") (UNSPEC_VSHLL_U_N "u")
|
||||
(UNSPEC_VSRA_S_N "s") (UNSPEC_VSRA_U_N "u")
|
||||
(UNSPEC_VRSRA_S_N "s") (UNSPEC_VRSRA_U_N "u")
|
||||
|
||||
(UNSPEC_VCVTH_S "s") (UNSPEC_VCVTH_U "u")
|
||||
])
|
||||
|
||||
(define_int_attr vcvth_op
|
||||
[(UNSPEC_VCVTA_S "a") (UNSPEC_VCVTA_U "a")
|
||||
(UNSPEC_VCVTM_S "m") (UNSPEC_VCVTM_U "m")
|
||||
(UNSPEC_VCVTN_S "n") (UNSPEC_VCVTN_U "n")
|
||||
(UNSPEC_VCVTP_S "p") (UNSPEC_VCVTP_U "p")])
|
||||
|
||||
(define_int_attr fp16_rnd_str
|
||||
[(UNSPEC_VRND "rnd") (UNSPEC_VRNDA "rnda")
|
||||
(UNSPEC_VRNDM "rndm") (UNSPEC_VRNDN "rndn")
|
||||
(UNSPEC_VRNDP "rndp") (UNSPEC_VRNDX "rndx")])
|
||||
|
||||
(define_int_attr fp16_rnd_insn
|
||||
[(UNSPEC_VRND "vrintz") (UNSPEC_VRNDA "vrinta")
|
||||
(UNSPEC_VRNDM "vrintm") (UNSPEC_VRNDN "vrintn")
|
||||
(UNSPEC_VRNDP "vrintp") (UNSPEC_VRNDX "vrintx")])
|
||||
|
||||
(define_int_attr cmp_op_unsp [(UNSPEC_VCEQ "eq") (UNSPEC_VCGT "gt")
|
||||
(UNSPEC_VCGE "ge") (UNSPEC_VCLE "le")
|
||||
(UNSPEC_VCLT "lt") (UNSPEC_VCAGE "ge")
|
||||
|
@ -203,6 +203,20 @@
|
||||
UNSPEC_VCVT_U
|
||||
UNSPEC_VCVT_S_N
|
||||
UNSPEC_VCVT_U_N
|
||||
UNSPEC_VCVT_HF_S_N
|
||||
UNSPEC_VCVT_HF_U_N
|
||||
UNSPEC_VCVT_SI_S_N
|
||||
UNSPEC_VCVT_SI_U_N
|
||||
UNSPEC_VCVTH_S
|
||||
UNSPEC_VCVTH_U
|
||||
UNSPEC_VCVTA_S
|
||||
UNSPEC_VCVTA_U
|
||||
UNSPEC_VCVTM_S
|
||||
UNSPEC_VCVTM_U
|
||||
UNSPEC_VCVTN_S
|
||||
UNSPEC_VCVTN_U
|
||||
UNSPEC_VCVTP_S
|
||||
UNSPEC_VCVTP_U
|
||||
UNSPEC_VEXT
|
||||
UNSPEC_VHADD_S
|
||||
UNSPEC_VHADD_U
|
||||
@ -365,5 +379,12 @@
|
||||
UNSPEC_NVRINTN
|
||||
UNSPEC_VQRDMLAH
|
||||
UNSPEC_VQRDMLSH
|
||||
UNSPEC_VRND
|
||||
UNSPEC_VRNDA
|
||||
UNSPEC_VRNDI
|
||||
UNSPEC_VRNDM
|
||||
UNSPEC_VRNDN
|
||||
UNSPEC_VRNDP
|
||||
UNSPEC_VRNDX
|
||||
])
|
||||
|
||||
|
@ -937,9 +937,63 @@
|
||||
(set_attr "type" "ffarithd")]
|
||||
)
|
||||
|
||||
;; ABS and NEG for FP16.
|
||||
(define_insn "<absneg_str>hf2"
|
||||
[(set (match_operand:HF 0 "s_register_operand" "=w")
|
||||
(ABSNEG:HF (match_operand:HF 1 "s_register_operand" "w")))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"v<absneg_str>.f16\t%0, %1"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "ffariths")]
|
||||
)
|
||||
|
||||
(define_expand "neon_vabshf"
|
||||
[(set
|
||||
(match_operand:HF 0 "s_register_operand")
|
||||
(abs:HF (match_operand:HF 1 "s_register_operand")))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
{
|
||||
emit_insn (gen_abshf2 (operands[0], operands[1]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; VRND for FP16.
|
||||
(define_insn "neon_v<fp16_rnd_str>hf"
|
||||
[(set (match_operand:HF 0 "s_register_operand" "=w")
|
||||
(unspec:HF
|
||||
[(match_operand:HF 1 "s_register_operand" "w")]
|
||||
FP16_RND))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"<fp16_rnd_insn>.f16\t%0, %1"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "neon_fp_round_s")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vrndihf"
|
||||
[(set (match_operand:HF 0 "s_register_operand" "=w")
|
||||
(unspec:HF
|
||||
[(match_operand:HF 1 "s_register_operand" "w")]
|
||||
UNSPEC_VRNDI))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vrintr.f16\t%0, %1"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "neon_fp_round_s")]
|
||||
)
|
||||
|
||||
;; Arithmetic insns
|
||||
|
||||
(define_insn "addhf3"
|
||||
[(set
|
||||
(match_operand:HF 0 "s_register_operand" "=w")
|
||||
(plus:HF
|
||||
(match_operand:HF 1 "s_register_operand" "w")
|
||||
(match_operand:HF 2 "s_register_operand" "w")))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vadd.f16\t%0, %1, %2"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "fadds")]
|
||||
)
|
||||
|
||||
(define_insn "*addsf3_vfp"
|
||||
[(set (match_operand:SF 0 "s_register_operand" "=t")
|
||||
(plus:SF (match_operand:SF 1 "s_register_operand" "t")
|
||||
@ -962,6 +1016,17 @@
|
||||
(set_attr "type" "faddd")]
|
||||
)
|
||||
|
||||
(define_insn "subhf3"
|
||||
[(set
|
||||
(match_operand:HF 0 "s_register_operand" "=w")
|
||||
(minus:HF
|
||||
(match_operand:HF 1 "s_register_operand" "w")
|
||||
(match_operand:HF 2 "s_register_operand" "w")))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vsub.f16\t%0, %1, %2"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "fadds")]
|
||||
)
|
||||
|
||||
(define_insn "*subsf3_vfp"
|
||||
[(set (match_operand:SF 0 "s_register_operand" "=t")
|
||||
@ -988,6 +1053,19 @@
|
||||
|
||||
;; Division insns
|
||||
|
||||
;; FP16 Division.
|
||||
(define_insn "divhf3"
|
||||
[(set
|
||||
(match_operand:HF 0 "s_register_operand" "=w")
|
||||
(div:HF
|
||||
(match_operand:HF 1 "s_register_operand" "w")
|
||||
(match_operand:HF 2 "s_register_operand" "w")))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vdiv.f16\t%0, %1, %2"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "fdivs")]
|
||||
)
|
||||
|
||||
; VFP9 Erratum 760019: It's potentially unsafe to overwrite the input
|
||||
; operands, so mark the output as early clobber for VFPv2 on ARMv5 or
|
||||
; earlier.
|
||||
@ -1018,6 +1096,17 @@
|
||||
|
||||
;; Multiplication insns
|
||||
|
||||
(define_insn "mulhf3"
|
||||
[(set
|
||||
(match_operand:HF 0 "s_register_operand" "=w")
|
||||
(mult:HF (match_operand:HF 1 "s_register_operand" "w")
|
||||
(match_operand:HF 2 "s_register_operand" "w")))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vmul.f16\t%0, %1, %2"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "fmuls")]
|
||||
)
|
||||
|
||||
(define_insn "*mulsf3_vfp"
|
||||
[(set (match_operand:SF 0 "s_register_operand" "=t")
|
||||
(mult:SF (match_operand:SF 1 "s_register_operand" "t")
|
||||
@ -1040,6 +1129,26 @@
|
||||
(set_attr "type" "fmuld")]
|
||||
)
|
||||
|
||||
(define_insn "*mulsf3neghf_vfp"
|
||||
[(set (match_operand:HF 0 "s_register_operand" "=t")
|
||||
(mult:HF (neg:HF (match_operand:HF 1 "s_register_operand" "t"))
|
||||
(match_operand:HF 2 "s_register_operand" "t")))]
|
||||
"TARGET_VFP_FP16INST && !flag_rounding_math"
|
||||
"vnmul.f16\\t%0, %1, %2"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "fmuls")]
|
||||
)
|
||||
|
||||
(define_insn "*negmulhf3_vfp"
|
||||
[(set (match_operand:HF 0 "s_register_operand" "=t")
|
||||
(neg:HF (mult:HF (match_operand:HF 1 "s_register_operand" "t")
|
||||
(match_operand:HF 2 "s_register_operand" "t"))))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vnmul.f16\\t%0, %1, %2"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "fmuls")]
|
||||
)
|
||||
|
||||
(define_insn "*mulsf3negsf_vfp"
|
||||
[(set (match_operand:SF 0 "s_register_operand" "=t")
|
||||
(mult:SF (neg:SF (match_operand:SF 1 "s_register_operand" "t"))
|
||||
@ -1089,6 +1198,18 @@
|
||||
;; Multiply-accumulate insns
|
||||
|
||||
;; 0 = 1 * 2 + 0
|
||||
(define_insn "*mulsf3addhf_vfp"
|
||||
[(set (match_operand:HF 0 "s_register_operand" "=t")
|
||||
(plus:HF
|
||||
(mult:HF (match_operand:HF 2 "s_register_operand" "t")
|
||||
(match_operand:HF 3 "s_register_operand" "t"))
|
||||
(match_operand:HF 1 "s_register_operand" "0")))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vmla.f16\\t%0, %2, %3"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "fmacs")]
|
||||
)
|
||||
|
||||
(define_insn "*mulsf3addsf_vfp"
|
||||
[(set (match_operand:SF 0 "s_register_operand" "=t")
|
||||
(plus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t")
|
||||
@ -1114,6 +1235,17 @@
|
||||
)
|
||||
|
||||
;; 0 = 1 * 2 - 0
|
||||
(define_insn "*mulhf3subhf_vfp"
|
||||
[(set (match_operand:HF 0 "s_register_operand" "=t")
|
||||
(minus:HF (mult:HF (match_operand:HF 2 "s_register_operand" "t")
|
||||
(match_operand:HF 3 "s_register_operand" "t"))
|
||||
(match_operand:HF 1 "s_register_operand" "0")))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vnmls.f16\\t%0, %2, %3"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "fmacs")]
|
||||
)
|
||||
|
||||
(define_insn "*mulsf3subsf_vfp"
|
||||
[(set (match_operand:SF 0 "s_register_operand" "=t")
|
||||
(minus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t")
|
||||
@ -1139,6 +1271,17 @@
|
||||
)
|
||||
|
||||
;; 0 = -(1 * 2) + 0
|
||||
(define_insn "*mulhf3neghfaddhf_vfp"
|
||||
[(set (match_operand:HF 0 "s_register_operand" "=t")
|
||||
(minus:HF (match_operand:HF 1 "s_register_operand" "0")
|
||||
(mult:HF (match_operand:HF 2 "s_register_operand" "t")
|
||||
(match_operand:HF 3 "s_register_operand" "t"))))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vmls.f16\\t%0, %2, %3"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "fmacs")]
|
||||
)
|
||||
|
||||
(define_insn "*mulsf3negsfaddsf_vfp"
|
||||
[(set (match_operand:SF 0 "s_register_operand" "=t")
|
||||
(minus:SF (match_operand:SF 1 "s_register_operand" "0")
|
||||
@ -1165,6 +1308,18 @@
|
||||
|
||||
|
||||
;; 0 = -(1 * 2) - 0
|
||||
(define_insn "*mulhf3neghfsubhf_vfp"
|
||||
[(set (match_operand:HF 0 "s_register_operand" "=t")
|
||||
(minus:HF (mult:HF
|
||||
(neg:HF (match_operand:HF 2 "s_register_operand" "t"))
|
||||
(match_operand:HF 3 "s_register_operand" "t"))
|
||||
(match_operand:HF 1 "s_register_operand" "0")))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vnmla.f16\\t%0, %2, %3"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "fmacs")]
|
||||
)
|
||||
|
||||
(define_insn "*mulsf3negsfsubsf_vfp"
|
||||
[(set (match_operand:SF 0 "s_register_operand" "=t")
|
||||
(minus:SF (mult:SF
|
||||
@ -1193,6 +1348,30 @@
|
||||
|
||||
;; Fused-multiply-accumulate
|
||||
|
||||
(define_insn "fmahf4"
|
||||
[(set (match_operand:HF 0 "register_operand" "=w")
|
||||
(fma:HF
|
||||
(match_operand:HF 1 "register_operand" "w")
|
||||
(match_operand:HF 2 "register_operand" "w")
|
||||
(match_operand:HF 3 "register_operand" "0")))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vfma.f16\\t%0, %1, %2"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "ffmas")]
|
||||
)
|
||||
|
||||
(define_expand "neon_vfmahf"
|
||||
[(match_operand:HF 0 "s_register_operand")
|
||||
(match_operand:HF 1 "s_register_operand")
|
||||
(match_operand:HF 2 "s_register_operand")
|
||||
(match_operand:HF 3 "s_register_operand")]
|
||||
"TARGET_VFP_FP16INST"
|
||||
{
|
||||
emit_insn (gen_fmahf4 (operands[0], operands[2], operands[3],
|
||||
operands[1]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "fma<SDF:mode>4"
|
||||
[(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
|
||||
(fma:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>")
|
||||
@ -1205,6 +1384,30 @@
|
||||
(set_attr "type" "ffma<vfp_type>")]
|
||||
)
|
||||
|
||||
(define_insn "fmsubhf4_fp16"
|
||||
[(set (match_operand:HF 0 "register_operand" "=w")
|
||||
(fma:HF
|
||||
(neg:HF (match_operand:HF 1 "register_operand" "w"))
|
||||
(match_operand:HF 2 "register_operand" "w")
|
||||
(match_operand:HF 3 "register_operand" "0")))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vfms.f16\\t%0, %1, %2"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "ffmas")]
|
||||
)
|
||||
|
||||
(define_expand "neon_vfmshf"
|
||||
[(match_operand:HF 0 "s_register_operand")
|
||||
(match_operand:HF 1 "s_register_operand")
|
||||
(match_operand:HF 2 "s_register_operand")
|
||||
(match_operand:HF 3 "s_register_operand")]
|
||||
"TARGET_VFP_FP16INST"
|
||||
{
|
||||
emit_insn (gen_fmsubhf4_fp16 (operands[0], operands[2], operands[3],
|
||||
operands[1]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "*fmsub<SDF:mode>4"
|
||||
[(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
|
||||
(fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand"
|
||||
@ -1218,6 +1421,17 @@
|
||||
(set_attr "type" "ffma<vfp_type>")]
|
||||
)
|
||||
|
||||
(define_insn "*fnmsubhf4"
|
||||
[(set (match_operand:HF 0 "register_operand" "=w")
|
||||
(fma:HF (match_operand:HF 1 "register_operand" "w")
|
||||
(match_operand:HF 2 "register_operand" "w")
|
||||
(neg:HF (match_operand:HF 3 "register_operand" "0"))))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vfnms.f16\\t%0, %1, %2"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "ffmas")]
|
||||
)
|
||||
|
||||
(define_insn "*fnmsub<SDF:mode>4"
|
||||
[(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
|
||||
(fma:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>")
|
||||
@ -1230,6 +1444,17 @@
|
||||
(set_attr "type" "ffma<vfp_type>")]
|
||||
)
|
||||
|
||||
(define_insn "*fnmaddhf4"
|
||||
[(set (match_operand:HF 0 "register_operand" "=w")
|
||||
(fma:HF (neg:HF (match_operand:HF 1 "register_operand" "w"))
|
||||
(match_operand:HF 2 "register_operand" "w")
|
||||
(neg:HF (match_operand:HF 3 "register_operand" "0"))))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vfnma.f16\\t%0, %1, %2"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "ffmas")]
|
||||
)
|
||||
|
||||
(define_insn "*fnmadd<SDF:mode>4"
|
||||
[(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
|
||||
(fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand"
|
||||
@ -1372,6 +1597,27 @@
|
||||
|
||||
;; Sqrt insns.
|
||||
|
||||
(define_insn "neon_vsqrthf"
|
||||
[(set (match_operand:HF 0 "s_register_operand" "=w")
|
||||
(sqrt:HF (match_operand:HF 1 "s_register_operand" "w")))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vsqrt.f16\t%0, %1"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "fsqrts")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vrsqrtshf"
|
||||
[(set
|
||||
(match_operand:HF 0 "s_register_operand" "=w")
|
||||
(unspec:HF [(match_operand:HF 1 "s_register_operand" "w")
|
||||
(match_operand:HF 2 "s_register_operand" "w")]
|
||||
UNSPEC_VRSQRTS))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vrsqrts.f16\t%0, %1, %2"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "fsqrts")]
|
||||
)
|
||||
|
||||
; VFP9 Erratum 760019: It's potentially unsafe to overwrite the input
|
||||
; operands, so mark the output as early clobber for VFPv2 on ARMv5 or
|
||||
; earlier.
|
||||
@ -1528,9 +1774,6 @@
|
||||
)
|
||||
|
||||
;; Fixed point to floating point conversions.
|
||||
(define_code_iterator FCVT [unsigned_float float])
|
||||
(define_code_attr FCVTI32typename [(unsigned_float "u32") (float "s32")])
|
||||
|
||||
(define_insn "*combine_vcvt_f32_<FCVTI32typename>"
|
||||
[(set (match_operand:SF 0 "s_register_operand" "=t")
|
||||
(mult:SF (FCVT:SF (match_operand:SI 1 "s_register_operand" "0"))
|
||||
@ -1575,6 +1818,125 @@
|
||||
(set_attr "type" "f_cvtf2i")]
|
||||
)
|
||||
|
||||
;; FP16 conversions.
|
||||
(define_insn "neon_vcvth<sup>hf"
|
||||
[(set (match_operand:HF 0 "s_register_operand" "=w")
|
||||
(unspec:HF
|
||||
[(match_operand:SI 1 "s_register_operand" "w")]
|
||||
VCVTH_US))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vcvt.f16.<sup>%#32\t%0, %1"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "f_cvti2f")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vcvth<sup>si"
|
||||
[(set (match_operand:SI 0 "s_register_operand" "=w")
|
||||
(unspec:SI
|
||||
[(match_operand:HF 1 "s_register_operand" "w")]
|
||||
VCVTH_US))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vcvt.<sup>%#32.f16\t%0, %1"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "f_cvtf2i")]
|
||||
)
|
||||
|
||||
;; The neon_vcvth<sup>_nhf patterns are used to generate the instruction for the
|
||||
;; vcvth_n_f16_<sup>32 arm_fp16 intrinsics. They are complicated by the
|
||||
;; hardware requirement that the source and destination registers are the same
|
||||
;; despite having different machine modes. The approach is to use a temporary
|
||||
;; register for the conversion and move that to the correct destination.
|
||||
|
||||
;; Generate an unspec pattern for the intrinsic.
|
||||
(define_insn "neon_vcvth<sup>_nhf_unspec"
|
||||
[(set
|
||||
(match_operand:SI 0 "s_register_operand" "=w")
|
||||
(unspec:SI
|
||||
[(match_operand:SI 1 "s_register_operand" "0")
|
||||
(match_operand:SI 2 "immediate_operand" "i")]
|
||||
VCVT_HF_US_N))
|
||||
(set
|
||||
(match_operand:HF 3 "s_register_operand" "=w")
|
||||
(float_truncate:HF (float:SF (match_dup 0))))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
{
|
||||
neon_const_bounds (operands[2], 1, 33);
|
||||
return "vcvt.f16.<sup>32\t%0, %0, %2\;vmov.f32\t%3, %0";
|
||||
}
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "f_cvti2f")]
|
||||
)
|
||||
|
||||
;; Generate the instruction patterns needed for vcvth_n_f16_s32 neon intrinsics.
|
||||
(define_expand "neon_vcvth<sup>_nhf"
|
||||
[(match_operand:HF 0 "s_register_operand")
|
||||
(unspec:HF [(match_operand:SI 1 "s_register_operand")
|
||||
(match_operand:SI 2 "immediate_operand")]
|
||||
VCVT_HF_US_N)]
|
||||
"TARGET_VFP_FP16INST"
|
||||
{
|
||||
rtx op1 = gen_reg_rtx (SImode);
|
||||
|
||||
neon_const_bounds (operands[2], 1, 33);
|
||||
|
||||
emit_move_insn (op1, operands[1]);
|
||||
emit_insn (gen_neon_vcvth<sup>_nhf_unspec (op1, op1, operands[2],
|
||||
operands[0]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; The neon_vcvth<sup>_nsi patterns are used to generate the instruction for the
|
||||
;; vcvth_n_<sup>32_f16 arm_fp16 intrinsics. They have the same restrictions and
|
||||
;; are implemented in the same way as the neon_vcvth<sup>_nhf patterns.
|
||||
|
||||
;; Generate an unspec pattern, constraining the registers.
|
||||
(define_insn "neon_vcvth<sup>_nsi_unspec"
|
||||
[(set (match_operand:SI 0 "s_register_operand" "=w")
|
||||
(unspec:SI
|
||||
[(fix:SI
|
||||
(fix:SF
|
||||
(float_extend:SF
|
||||
(match_operand:HF 1 "s_register_operand" "w"))))
|
||||
(match_operand:SI 2 "immediate_operand" "i")]
|
||||
VCVT_SI_US_N))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
{
|
||||
neon_const_bounds (operands[2], 1, 33);
|
||||
return "vmov.f32\t%0, %1\;vcvt.<sup>%#32.f16\t%0, %0, %2";
|
||||
}
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "f_cvtf2i")]
|
||||
)
|
||||
|
||||
;; Generate the instruction patterns needed for vcvth_n_f16_s32 neon intrinsics.
|
||||
(define_expand "neon_vcvth<sup>_nsi"
|
||||
[(match_operand:SI 0 "s_register_operand")
|
||||
(unspec:SI
|
||||
[(match_operand:HF 1 "s_register_operand")
|
||||
(match_operand:SI 2 "immediate_operand")]
|
||||
VCVT_SI_US_N)]
|
||||
"TARGET_VFP_FP16INST"
|
||||
{
|
||||
rtx op1 = gen_reg_rtx (SImode);
|
||||
|
||||
neon_const_bounds (operands[2], 1, 33);
|
||||
emit_insn (gen_neon_vcvth<sup>_nsi_unspec (op1, operands[1], operands[2]));
|
||||
emit_move_insn (operands[0], op1);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "neon_vcvt<vcvth_op>h<sup>si"
|
||||
[(set
|
||||
(match_operand:SI 0 "s_register_operand" "=w")
|
||||
(unspec:SI
|
||||
[(match_operand:HF 1 "s_register_operand" "w")]
|
||||
VCVT_HF_US))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"vcvt<vcvth_op>.<sup>%#32.f16\t%0, %1"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "f_cvtf2i")]
|
||||
)
|
||||
|
||||
;; Store multiple insn used in function prologue.
|
||||
(define_insn "*push_multi_vfp"
|
||||
[(match_parallel 2 "multi_register_push"
|
||||
@ -1644,6 +2006,20 @@
|
||||
)
|
||||
|
||||
;; Scalar forms for the IEEE-754 fmax()/fmin() functions
|
||||
|
||||
(define_insn "neon_<fmaxmin_op>hf"
|
||||
[(set
|
||||
(match_operand:HF 0 "s_register_operand" "=w")
|
||||
(unspec:HF
|
||||
[(match_operand:HF 1 "s_register_operand" "w")
|
||||
(match_operand:HF 2 "s_register_operand" "w")]
|
||||
VMAXMINFNM))]
|
||||
"TARGET_VFP_FP16INST"
|
||||
"<fmaxmin_op>.f16\t%0, %1, %2"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "f_minmaxs")]
|
||||
)
|
||||
|
||||
(define_insn "<fmaxmin><mode>3"
|
||||
[(set (match_operand:SDF 0 "s_register_operand" "=<F_constraint>")
|
||||
(unspec:SDF [(match_operand:SDF 1 "s_register_operand" "<F_constraint>")
|
||||
|
@ -1,3 +1,8 @@
|
||||
2016-09-23 Matthew Wahab <matthew.wahab@arm.com>
|
||||
|
||||
* gcc.target/arm/armv8_2-fp16-arith-1.c: New.
|
||||
* gcc.target/arm/armv8_2-fp16-conv-1.c: New.
|
||||
|
||||
2016-09-23 Dominik Vogt <vogt@linux.vnet.ibm.com>
|
||||
|
||||
* gcc.target/s390/md/rXsbg_mode_sXl.c: Adapt expected assembly
|
||||
|
68
gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c
Normal file
68
gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c
Normal file
@ -0,0 +1,68 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */
|
||||
/* { dg-options "-O2 -ffast-math" } */
|
||||
/* { dg-add-options arm_v8_2a_fp16_scalar } */
|
||||
|
||||
/* Test instructions generated for half-precision arithmetic. */
|
||||
|
||||
typedef __fp16 float16_t;
|
||||
typedef __simd64_float16_t float16x4_t;
|
||||
typedef __simd128_float16_t float16x8_t;
|
||||
|
||||
float16_t
|
||||
fp16_abs (float16_t a)
|
||||
{
|
||||
return (a < 0) ? -a : a;
|
||||
}
|
||||
|
||||
#define TEST_UNOP(NAME, OPERATOR, TY) \
|
||||
TY test_##NAME##_##TY (TY a) \
|
||||
{ \
|
||||
return OPERATOR (a); \
|
||||
}
|
||||
|
||||
#define TEST_BINOP(NAME, OPERATOR, TY) \
|
||||
TY test_##NAME##_##TY (TY a, TY b) \
|
||||
{ \
|
||||
return a OPERATOR b; \
|
||||
}
|
||||
|
||||
#define TEST_CMP(NAME, OPERATOR, RTY, TY) \
|
||||
RTY test_##NAME##_##TY (TY a, TY b) \
|
||||
{ \
|
||||
return a OPERATOR b; \
|
||||
}
|
||||
|
||||
/* Scalars. */
|
||||
|
||||
TEST_UNOP (neg, -, float16_t)
|
||||
TEST_UNOP (abs, fp16_abs, float16_t)
|
||||
|
||||
TEST_BINOP (add, +, float16_t)
|
||||
TEST_BINOP (sub, -, float16_t)
|
||||
TEST_BINOP (mult, *, float16_t)
|
||||
TEST_BINOP (div, /, float16_t)
|
||||
|
||||
TEST_CMP (equal, ==, int, float16_t)
|
||||
TEST_CMP (unequal, !=, int, float16_t)
|
||||
TEST_CMP (lessthan, <, int, float16_t)
|
||||
TEST_CMP (greaterthan, >, int, float16_t)
|
||||
TEST_CMP (lessthanequal, <=, int, float16_t)
|
||||
TEST_CMP (greaterthanqual, >=, int, float16_t)
|
||||
|
||||
/* { dg-final { scan-assembler-times {vneg\.f16\ts[0-9]+, s[0-9]+} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vabs\.f16\ts[0-9]+, s[0-9]+} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 4 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-not {vadd\.f32} } } */
|
||||
/* { dg-final { scan-assembler-not {vsub\.f32} } } */
|
||||
/* { dg-final { scan-assembler-not {vmul\.f32} } } */
|
||||
/* { dg-final { scan-assembler-not {vdiv\.f32} } } */
|
||||
/* { dg-final { scan-assembler-not {vcmp\.f16} } } */
|
||||
/* { dg-final { scan-assembler-not {vcmpe\.f16} } } */
|
101
gcc/testsuite/gcc.target/arm/armv8_2-fp16-conv-1.c
Normal file
101
gcc/testsuite/gcc.target/arm/armv8_2-fp16-conv-1.c
Normal file
@ -0,0 +1,101 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */
|
||||
/* { dg-options "-O2" } */
|
||||
/* { dg-add-options arm_v8_2a_fp16_scalar } */
|
||||
|
||||
/* Test ARMv8.2 FP16 conversions. */
|
||||
#include <arm_fp16.h>
|
||||
|
||||
float
|
||||
f16_to_f32 (__fp16 a)
|
||||
{
|
||||
return (float)a;
|
||||
}
|
||||
|
||||
float
|
||||
f16_to_pf32 (__fp16* a)
|
||||
{
|
||||
return (float)*a;
|
||||
}
|
||||
|
||||
short
|
||||
f16_to_s16 (__fp16 a)
|
||||
{
|
||||
return (short)a;
|
||||
}
|
||||
|
||||
short
|
||||
pf16_to_s16 (__fp16* a)
|
||||
{
|
||||
return (short)*a;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {vcvtb\.f32\.f16\ts[0-9]+, s[0-9]+} 4 } } */
|
||||
|
||||
__fp16
|
||||
f32_to_f16 (float a)
|
||||
{
|
||||
return (__fp16)a;
|
||||
}
|
||||
|
||||
void
|
||||
f32_to_pf16 (__fp16* x, float a)
|
||||
{
|
||||
*x = (__fp16)a;
|
||||
}
|
||||
|
||||
__fp16
|
||||
s16_to_f16 (short a)
|
||||
{
|
||||
return (__fp16)a;
|
||||
}
|
||||
|
||||
void
|
||||
s16_to_pf16 (__fp16* x, short a)
|
||||
{
|
||||
*x = (__fp16)a;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {vcvtb\.f16\.f32\ts[0-9]+, s[0-9]+} 4 } } */
|
||||
|
||||
float
|
||||
s16_to_f32 (short a)
|
||||
{
|
||||
return (float)a;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {vcvt\.f32\.s32\ts[0-9]+, s[0-9]+} 3 } } */
|
||||
|
||||
short
|
||||
f32_to_s16 (float a)
|
||||
{
|
||||
return (short)a;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {vcvt\.s32\.f32\ts[0-9]+, s[0-9]+} 3 } } */
|
||||
|
||||
unsigned short
|
||||
f32_to_u16 (float a)
|
||||
{
|
||||
return (unsigned short)a;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {vcvt\.u32\.f32\ts[0-9]+, s[0-9]+} 1 } } */
|
||||
|
||||
short
|
||||
f64_to_s16 (double a)
|
||||
{
|
||||
return (short)a;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {vcvt\.s32\.f64\ts[0-9]+, d[0-9]+} 1 } } */
|
||||
|
||||
unsigned short
|
||||
f64_to_u16 (double a)
|
||||
{
|
||||
return (unsigned short)a;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {vcvt\.s32\.f64\ts[0-9]+, d[0-9]+} 1 } } */
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user