target/arm: Implement VFP fp16 for VFP_BINOP operations

Implmeent VFP fp16 support for simple binary-operator VFP insns VADD,
VSUB, VMUL, VDIV, VMINNM and VMAXNM:

 * make the VFP_BINOP() macro generate float16 helpers as well as
   float32 and float64
 * implement a do_vfp_3op_hp() function similar to the existing
   do_vfp_3op_sp()
 * add decode for the half-precision insn patterns

Note that the VFP_BINOP macro use creates a couple of unused helper
functions vfp_maxh and vfp_minh, but they're small so it's not worth
splitting the BINOP operations into "needs halfprec" and "no
halfprec" groups.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200828183354.27913-4-peter.maydell@linaro.org
This commit is contained in:
Peter Maydell 2020-08-28 19:33:12 +01:00
parent 02bc236d01
commit 120a0eb3ea
5 changed files with 106 additions and 0 deletions

View File

@ -101,20 +101,28 @@ DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, tl, i32, i32, i32)
DEF_HELPER_1(vfp_get_fpscr, i32, env)
DEF_HELPER_2(vfp_set_fpscr, void, env, i32)
DEF_HELPER_3(vfp_addh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_adds, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_addd, f64, f64, f64, ptr)
DEF_HELPER_3(vfp_subh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_subs, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_subd, f64, f64, f64, ptr)
DEF_HELPER_3(vfp_mulh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_muls, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_muld, f64, f64, f64, ptr)
DEF_HELPER_3(vfp_divh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_divs, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_divd, f64, f64, f64, ptr)
DEF_HELPER_3(vfp_maxh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_maxs, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_maxd, f64, f64, f64, ptr)
DEF_HELPER_3(vfp_minh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_mins, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_mind, f64, f64, f64, ptr)
DEF_HELPER_3(vfp_maxnumh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_maxnums, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr)
DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr)
DEF_HELPER_1(vfp_negs, f32, f32)

View File

@ -1266,6 +1266,54 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
return true;
}
static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
int vd, int vn, int vm, bool reads_vd)
{
/*
* Do a half-precision operation. Functionally this is
* the same as do_vfp_3op_sp(), except:
* - it uses the FPST_FPCR_F16
* - it doesn't need the VFP vector handling (fp16 is a
* v8 feature, and in v8 VFP vectors don't exist)
* - it does the aa32_fp16_arith feature test
*/
TCGv_i32 f0, f1, fd;
TCGv_ptr fpst;
if (!dc_isar_feature(aa32_fp16_arith, s)) {
return false;
}
if (s->vec_len != 0 || s->vec_stride != 0) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
f0 = tcg_temp_new_i32();
f1 = tcg_temp_new_i32();
fd = tcg_temp_new_i32();
fpst = fpstatus_ptr(FPST_FPCR_F16);
neon_load_reg32(f0, vn);
neon_load_reg32(f1, vm);
if (reads_vd) {
neon_load_reg32(fd, vd);
}
fn(fd, f0, f1, fpst);
neon_store_reg32(fd, vd);
tcg_temp_free_i32(f0);
tcg_temp_free_i32(f1);
tcg_temp_free_i32(fd);
tcg_temp_free_ptr(fpst);
return true;
}
static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
int vd, int vn, int vm, bool reads_vd)
{
@ -1643,6 +1691,11 @@ static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
}
static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
{
return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
}
static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
{
return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
@ -1677,6 +1730,11 @@ static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
}
static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
{
return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
}
static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
{
return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
@ -1687,6 +1745,11 @@ static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
}
static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
{
return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
}
static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
{
return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
@ -1697,6 +1760,11 @@ static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
}
static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
{
return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
}
static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
{
return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
@ -1707,6 +1775,24 @@ static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
}
static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
{
if (!dc_isar_feature(aa32_vminmaxnm, s)) {
return false;
}
return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
a->vd, a->vn, a->vm, false);
}
static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
{
if (!dc_isar_feature(aa32_vminmaxnm, s)) {
return false;
}
return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
a->vd, a->vn, a->vm, false);
}
static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
{
if (!dc_isar_feature(aa32_vminmaxnm, s)) {

View File

@ -49,6 +49,9 @@ VSEL 1111 1110 0. cc:2 .... .... 1010 .0.0 .... \
VSEL 1111 1110 0. cc:2 .... .... 1011 .0.0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp dp=1
VMAXNM_hp 1111 1110 1.00 .... .... 1001 .0.0 .... @vfp_dnm_s
VMINNM_hp 1111 1110 1.00 .... .... 1001 .1.0 .... @vfp_dnm_s
VMAXNM_sp 1111 1110 1.00 .... .... 1010 .0.0 .... @vfp_dnm_s
VMINNM_sp 1111 1110 1.00 .... .... 1010 .1.0 .... @vfp_dnm_s

View File

@ -115,18 +115,22 @@ VNMLS_dp ---- 1110 0.01 .... .... 1011 .0.0 .... @vfp_dnm_d
VNMLA_sp ---- 1110 0.01 .... .... 1010 .1.0 .... @vfp_dnm_s
VNMLA_dp ---- 1110 0.01 .... .... 1011 .1.0 .... @vfp_dnm_d
VMUL_hp ---- 1110 0.10 .... .... 1001 .0.0 .... @vfp_dnm_s
VMUL_sp ---- 1110 0.10 .... .... 1010 .0.0 .... @vfp_dnm_s
VMUL_dp ---- 1110 0.10 .... .... 1011 .0.0 .... @vfp_dnm_d
VNMUL_sp ---- 1110 0.10 .... .... 1010 .1.0 .... @vfp_dnm_s
VNMUL_dp ---- 1110 0.10 .... .... 1011 .1.0 .... @vfp_dnm_d
VADD_hp ---- 1110 0.11 .... .... 1001 .0.0 .... @vfp_dnm_s
VADD_sp ---- 1110 0.11 .... .... 1010 .0.0 .... @vfp_dnm_s
VADD_dp ---- 1110 0.11 .... .... 1011 .0.0 .... @vfp_dnm_d
VSUB_hp ---- 1110 0.11 .... .... 1001 .1.0 .... @vfp_dnm_s
VSUB_sp ---- 1110 0.11 .... .... 1010 .1.0 .... @vfp_dnm_s
VSUB_dp ---- 1110 0.11 .... .... 1011 .1.0 .... @vfp_dnm_d
VDIV_hp ---- 1110 1.00 .... .... 1001 .0.0 .... @vfp_dnm_s
VDIV_sp ---- 1110 1.00 .... .... 1010 .0.0 .... @vfp_dnm_s
VDIV_dp ---- 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d

View File

@ -236,6 +236,11 @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val)
#define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
#define VFP_BINOP(name) \
dh_ctype_f16 VFP_HELPER(name, h)(dh_ctype_f16 a, dh_ctype_f16 b, void *fpstp) \
{ \
float_status *fpst = fpstp; \
return float16_ ## name(a, b, fpst); \
} \
float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \
{ \
float_status *fpst = fpstp; \