target/arm: Implement VFP fp16 VMLA, VMLS, VNMLS, VNMLA, VNMUL
Implement fp16 versions of the VFP VMLA, VMLS, VNMLS, VNMLA, VNMUL instructions. (These are all the remaining ones which we implement via do_vfp_3op_[hsd]p().) Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200828183354.27913-5-peter.maydell@linaro.org
This commit is contained in:
parent
120a0eb3ea
commit
e7cb0ded52
@ -125,6 +125,7 @@ DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr)
|
||||
DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr)
|
||||
DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr)
|
||||
DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr)
|
||||
DEF_HELPER_1(vfp_negh, f16, f16)
|
||||
DEF_HELPER_1(vfp_negs, f32, f32)
|
||||
DEF_HELPER_1(vfp_negd, f64, f64)
|
||||
DEF_HELPER_1(vfp_abss, f32, f32)
|
||||
|
@ -1547,6 +1547,21 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
|
||||
return true;
|
||||
}
|
||||
|
||||
static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
|
||||
{
|
||||
/* Note that order of inputs to the add matters for NaNs */
|
||||
TCGv_i32 tmp = tcg_temp_new_i32();
|
||||
|
||||
gen_helper_vfp_mulh(tmp, vn, vm, fpst);
|
||||
gen_helper_vfp_addh(vd, vd, tmp, fpst);
|
||||
tcg_temp_free_i32(tmp);
|
||||
}
|
||||
|
||||
static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
|
||||
{
|
||||
return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
|
||||
}
|
||||
|
||||
static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
|
||||
{
|
||||
/* Note that order of inputs to the add matters for NaNs */
|
||||
@ -1577,6 +1592,25 @@ static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
|
||||
return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
|
||||
}
|
||||
|
||||
static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
|
||||
{
|
||||
/*
|
||||
* VMLS: vd = vd + -(vn * vm)
|
||||
* Note that order of inputs to the add matters for NaNs.
|
||||
*/
|
||||
TCGv_i32 tmp = tcg_temp_new_i32();
|
||||
|
||||
gen_helper_vfp_mulh(tmp, vn, vm, fpst);
|
||||
gen_helper_vfp_negh(tmp, tmp);
|
||||
gen_helper_vfp_addh(vd, vd, tmp, fpst);
|
||||
tcg_temp_free_i32(tmp);
|
||||
}
|
||||
|
||||
static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
|
||||
{
|
||||
return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
|
||||
}
|
||||
|
||||
static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
|
||||
{
|
||||
/*
|
||||
@ -1615,6 +1649,27 @@ static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
|
||||
return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
|
||||
}
|
||||
|
||||
static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
|
||||
{
|
||||
/*
|
||||
* VNMLS: -fd + (fn * fm)
|
||||
* Note that it isn't valid to replace (-A + B) with (B - A) or similar
|
||||
* plausible looking simplifications because this will give wrong results
|
||||
* for NaNs.
|
||||
*/
|
||||
TCGv_i32 tmp = tcg_temp_new_i32();
|
||||
|
||||
gen_helper_vfp_mulh(tmp, vn, vm, fpst);
|
||||
gen_helper_vfp_negh(vd, vd);
|
||||
gen_helper_vfp_addh(vd, vd, tmp, fpst);
|
||||
tcg_temp_free_i32(tmp);
|
||||
}
|
||||
|
||||
static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
|
||||
{
|
||||
return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
|
||||
}
|
||||
|
||||
static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
|
||||
{
|
||||
/*
|
||||
@ -1657,6 +1712,23 @@ static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
|
||||
return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
|
||||
}
|
||||
|
||||
static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
|
||||
{
|
||||
/* VNMLA: -fd + -(fn * fm) */
|
||||
TCGv_i32 tmp = tcg_temp_new_i32();
|
||||
|
||||
gen_helper_vfp_mulh(tmp, vn, vm, fpst);
|
||||
gen_helper_vfp_negh(tmp, tmp);
|
||||
gen_helper_vfp_negh(vd, vd);
|
||||
gen_helper_vfp_addh(vd, vd, tmp, fpst);
|
||||
tcg_temp_free_i32(tmp);
|
||||
}
|
||||
|
||||
static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
|
||||
{
|
||||
return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
|
||||
}
|
||||
|
||||
static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
|
||||
{
|
||||
/* VNMLA: -fd + -(fn * fm) */
|
||||
@ -1706,6 +1778,18 @@ static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
|
||||
return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
|
||||
}
|
||||
|
||||
static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
|
||||
{
|
||||
/* VNMUL: -(fn * fm) */
|
||||
gen_helper_vfp_mulh(vd, vn, vm, fpst);
|
||||
gen_helper_vfp_negh(vd, vd);
|
||||
}
|
||||
|
||||
static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
|
||||
{
|
||||
return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
|
||||
}
|
||||
|
||||
static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
|
||||
{
|
||||
/* VNMUL: -(fn * fm) */
|
||||
|
@ -103,15 +103,19 @@ VLDM_VSTM_dp ---- 1101 0.1 l:1 rn:4 .... 1011 imm:8 \
|
||||
vd=%vd_dp p=1 u=0 w=1
|
||||
|
||||
# 3-register VFP data-processing; bits [23,21:20,6] identify the operation.
|
||||
VMLA_hp ---- 1110 0.00 .... .... 1001 .0.0 .... @vfp_dnm_s
|
||||
VMLA_sp ---- 1110 0.00 .... .... 1010 .0.0 .... @vfp_dnm_s
|
||||
VMLA_dp ---- 1110 0.00 .... .... 1011 .0.0 .... @vfp_dnm_d
|
||||
|
||||
VMLS_hp ---- 1110 0.00 .... .... 1001 .1.0 .... @vfp_dnm_s
|
||||
VMLS_sp ---- 1110 0.00 .... .... 1010 .1.0 .... @vfp_dnm_s
|
||||
VMLS_dp ---- 1110 0.00 .... .... 1011 .1.0 .... @vfp_dnm_d
|
||||
|
||||
VNMLS_hp ---- 1110 0.01 .... .... 1001 .0.0 .... @vfp_dnm_s
|
||||
VNMLS_sp ---- 1110 0.01 .... .... 1010 .0.0 .... @vfp_dnm_s
|
||||
VNMLS_dp ---- 1110 0.01 .... .... 1011 .0.0 .... @vfp_dnm_d
|
||||
|
||||
VNMLA_hp ---- 1110 0.01 .... .... 1001 .1.0 .... @vfp_dnm_s
|
||||
VNMLA_sp ---- 1110 0.01 .... .... 1010 .1.0 .... @vfp_dnm_s
|
||||
VNMLA_dp ---- 1110 0.01 .... .... 1011 .1.0 .... @vfp_dnm_d
|
||||
|
||||
@ -119,6 +123,7 @@ VMUL_hp ---- 1110 0.10 .... .... 1001 .0.0 .... @vfp_dnm_s
|
||||
VMUL_sp ---- 1110 0.10 .... .... 1010 .0.0 .... @vfp_dnm_s
|
||||
VMUL_dp ---- 1110 0.10 .... .... 1011 .0.0 .... @vfp_dnm_d
|
||||
|
||||
VNMUL_hp ---- 1110 0.10 .... .... 1001 .1.0 .... @vfp_dnm_s
|
||||
VNMUL_sp ---- 1110 0.10 .... .... 1010 .1.0 .... @vfp_dnm_s
|
||||
VNMUL_dp ---- 1110 0.10 .... .... 1011 .1.0 .... @vfp_dnm_d
|
||||
|
||||
|
@ -261,6 +261,11 @@ VFP_BINOP(minnum)
|
||||
VFP_BINOP(maxnum)
|
||||
#undef VFP_BINOP
|
||||
|
||||
dh_ctype_f16 VFP_HELPER(neg, h)(dh_ctype_f16 a)
|
||||
{
|
||||
return float16_chs(a);
|
||||
}
|
||||
|
||||
float32 VFP_HELPER(neg, s)(float32 a)
|
||||
{
|
||||
return float32_chs(a);
|
||||
|
Loading…
Reference in New Issue
Block a user