target/arm: Implement VFP fp16 VMLA, VMLS, VNMLS, VNMLA, VNMUL

Implement fp16 versions of the VFP VMLA, VMLS, VNMLS, VNMLA, VNMUL
instructions. (These are all the remaining ones which we implement
via do_vfp_3op_[hsd]p().)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200828183354.27913-5-peter.maydell@linaro.org
This commit is contained in:
Peter Maydell 2020-08-28 19:33:13 +01:00
parent 120a0eb3ea
commit e7cb0ded52
4 changed files with 95 additions and 0 deletions

View File

@ -125,6 +125,7 @@ DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr)
DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr)
DEF_HELPER_1(vfp_negh, f16, f16)
DEF_HELPER_1(vfp_negs, f32, f32)
DEF_HELPER_1(vfp_negd, f64, f64)
DEF_HELPER_1(vfp_abss, f32, f32)

View File

@ -1547,6 +1547,21 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
return true;
}
static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* Note that order of inputs to the add matters for NaNs */
TCGv_i32 tmp = tcg_temp_new_i32();
gen_helper_vfp_mulh(tmp, vn, vm, fpst);
gen_helper_vfp_addh(vd, vd, tmp, fpst);
tcg_temp_free_i32(tmp);
}
static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
{
return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
}
static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* Note that order of inputs to the add matters for NaNs */
@ -1577,6 +1592,25 @@ static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
}
static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/*
* VMLS: vd = vd + -(vn * vm)
* Note that order of inputs to the add matters for NaNs.
*/
TCGv_i32 tmp = tcg_temp_new_i32();
gen_helper_vfp_mulh(tmp, vn, vm, fpst);
gen_helper_vfp_negh(tmp, tmp);
gen_helper_vfp_addh(vd, vd, tmp, fpst);
tcg_temp_free_i32(tmp);
}
static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
{
return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
}
static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/*
@ -1615,6 +1649,27 @@ static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
}
static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/*
* VNMLS: -fd + (fn * fm)
* Note that it isn't valid to replace (-A + B) with (B - A) or similar
* plausible looking simplifications because this will give wrong results
* for NaNs.
*/
TCGv_i32 tmp = tcg_temp_new_i32();
gen_helper_vfp_mulh(tmp, vn, vm, fpst);
gen_helper_vfp_negh(vd, vd);
gen_helper_vfp_addh(vd, vd, tmp, fpst);
tcg_temp_free_i32(tmp);
}
static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
{
return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
}
static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/*
@ -1657,6 +1712,23 @@ static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
}
static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* VNMLA: -fd + -(fn * fm) */
TCGv_i32 tmp = tcg_temp_new_i32();
gen_helper_vfp_mulh(tmp, vn, vm, fpst);
gen_helper_vfp_negh(tmp, tmp);
gen_helper_vfp_negh(vd, vd);
gen_helper_vfp_addh(vd, vd, tmp, fpst);
tcg_temp_free_i32(tmp);
}
static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
{
return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
}
static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* VNMLA: -fd + -(fn * fm) */
@ -1706,6 +1778,18 @@ static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
}
static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* VNMUL: -(fn * fm) */
gen_helper_vfp_mulh(vd, vn, vm, fpst);
gen_helper_vfp_negh(vd, vd);
}
static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
{
return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
}
static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* VNMUL: -(fn * fm) */

View File

@ -103,15 +103,19 @@ VLDM_VSTM_dp ---- 1101 0.1 l:1 rn:4 .... 1011 imm:8 \
vd=%vd_dp p=1 u=0 w=1
# 3-register VFP data-processing; bits [23,21:20,6] identify the operation.
VMLA_hp ---- 1110 0.00 .... .... 1001 .0.0 .... @vfp_dnm_s
VMLA_sp ---- 1110 0.00 .... .... 1010 .0.0 .... @vfp_dnm_s
VMLA_dp ---- 1110 0.00 .... .... 1011 .0.0 .... @vfp_dnm_d
VMLS_hp ---- 1110 0.00 .... .... 1001 .1.0 .... @vfp_dnm_s
VMLS_sp ---- 1110 0.00 .... .... 1010 .1.0 .... @vfp_dnm_s
VMLS_dp ---- 1110 0.00 .... .... 1011 .1.0 .... @vfp_dnm_d
VNMLS_hp ---- 1110 0.01 .... .... 1001 .0.0 .... @vfp_dnm_s
VNMLS_sp ---- 1110 0.01 .... .... 1010 .0.0 .... @vfp_dnm_s
VNMLS_dp ---- 1110 0.01 .... .... 1011 .0.0 .... @vfp_dnm_d
VNMLA_hp ---- 1110 0.01 .... .... 1001 .1.0 .... @vfp_dnm_s
VNMLA_sp ---- 1110 0.01 .... .... 1010 .1.0 .... @vfp_dnm_s
VNMLA_dp ---- 1110 0.01 .... .... 1011 .1.0 .... @vfp_dnm_d
@ -119,6 +123,7 @@ VMUL_hp ---- 1110 0.10 .... .... 1001 .0.0 .... @vfp_dnm_s
VMUL_sp ---- 1110 0.10 .... .... 1010 .0.0 .... @vfp_dnm_s
VMUL_dp ---- 1110 0.10 .... .... 1011 .0.0 .... @vfp_dnm_d
VNMUL_hp ---- 1110 0.10 .... .... 1001 .1.0 .... @vfp_dnm_s
VNMUL_sp ---- 1110 0.10 .... .... 1010 .1.0 .... @vfp_dnm_s
VNMUL_dp ---- 1110 0.10 .... .... 1011 .1.0 .... @vfp_dnm_d

View File

@ -261,6 +261,11 @@ VFP_BINOP(minnum)
VFP_BINOP(maxnum)
#undef VFP_BINOP
dh_ctype_f16 VFP_HELPER(neg, h)(dh_ctype_f16 a)
{
return float16_chs(a);
}
float32 VFP_HELPER(neg, s)(float32 a)
{
return float32_chs(a);