qemu-e2k/target/riscv/insn_trans/trans_rvzfa.c.inc
Christoph Müllner a47842d166 riscv: Add support for the Zfa extension
This patch introduces the RISC-V Zfa extension, which introduces
additional floating-point instructions:
* fli (load-immediate) with pre-defined immediates
* fminm/fmaxm (like fmin/fmax but with different NaN behaviour)
* fround/froundmx (round to integer)
* fcvtmod.w.d (Modular Convert-to-Integer)
* fmv* to access high bits of float register bigger than XLEN
* Quiet comparison instructions (fleq/fltq)

Zfa defines its instructions in combination with the following extensions:
* single-precision floating-point (F)
* double-precision floating-point (D)
* quad-precision floating-point (Q)
* half-precision floating-point (Zfh)

Since QEMU does not support the RISC-V quad-precision floating-point
ISA extension (Q), this patch does not include the instructions that
depend on this extension. All other instructions are included in this
patch.

The Zfa specification can be found here:
  https://github.com/riscv/riscv-isa-manual/blob/master/src/zfa.tex
The Zfa specifciation is frozen and is in public review since May 3, 2023:
  https://groups.google.com/a/groups.riscv.org/g/isa-dev/c/SED4ntBkabg

The patch also includes a TCG test for the fcvtmod.w.d instruction.
The test cases test for correct results and flag behaviour.
Note, that the Zfa specification requires fcvtmod's flag behaviour
to be identical to a fcvt with the same operands (which is also
tested).

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
Message-Id: <20230710071243.282464-1-christoph.muellner@vrull.eu>
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
2023-07-10 22:29:20 +10:00

522 lines
14 KiB
C++

/*
* RISC-V translation routines for the Zfa Standard Extension.
*
* Copyright (c) 2023 Christoph Müllner, christoph.muellner@vrull.eu
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2 or later, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define REQUIRE_ZFA(ctx) do { \
if (!ctx->cfg_ptr->ext_zfa) { \
return false; \
} \
} while (0)
#define REQUIRE_ZFH(ctx) do { \
if (!ctx->cfg_ptr->ext_zfh) { \
return false; \
} \
} while (0)
static bool trans_fli_s(DisasContext *ctx, arg_fli_s *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_EXT(ctx, RVF);
/* Values below are NaN-boxed to avoid a gen_nanbox_s(). */
static const uint64_t fli_s_table[] = {
0xffffffffbf800000, /* -1.0 */
0xffffffff00800000, /* minimum positive normal */
0xffffffff37800000, /* 1.0 * 2^-16 */
0xffffffff38000000, /* 1.0 * 2^-15 */
0xffffffff3b800000, /* 1.0 * 2^-8 */
0xffffffff3c000000, /* 1.0 * 2^-7 */
0xffffffff3d800000, /* 1.0 * 2^-4 */
0xffffffff3e000000, /* 1.0 * 2^-3 */
0xffffffff3e800000, /* 0.25 */
0xffffffff3ea00000, /* 0.3125 */
0xffffffff3ec00000, /* 0.375 */
0xffffffff3ee00000, /* 0.4375 */
0xffffffff3f000000, /* 0.5 */
0xffffffff3f200000, /* 0.625 */
0xffffffff3f400000, /* 0.75 */
0xffffffff3f600000, /* 0.875 */
0xffffffff3f800000, /* 1.0 */
0xffffffff3fa00000, /* 1.25 */
0xffffffff3fc00000, /* 1.5 */
0xffffffff3fe00000, /* 1.75 */
0xffffffff40000000, /* 2.0 */
0xffffffff40200000, /* 2.5 */
0xffffffff40400000, /* 3 */
0xffffffff40800000, /* 4 */
0xffffffff41000000, /* 8 */
0xffffffff41800000, /* 16 */
0xffffffff43000000, /* 2^7 */
0xffffffff43800000, /* 2^8 */
0xffffffff47000000, /* 2^15 */
0xffffffff47800000, /* 2^16 */
0xffffffff7f800000, /* +inf */
0xffffffff7fc00000, /* Canonical NaN */
};
TCGv_i64 dest = dest_fpr(ctx, a->rd);
tcg_gen_movi_i64(dest, fli_s_table[a->rs1]);
gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
static bool trans_fli_d(DisasContext *ctx, arg_fli_d *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_EXT(ctx, RVD);
static const uint64_t fli_d_table[] = {
0xbff0000000000000, /* -1.0 */
0x0010000000000000, /* minimum positive normal */
0x3ef0000000000000, /* 1.0 * 2^-16 */
0x3f00000000000000, /* 1.0 * 2^-15 */
0x3f70000000000000, /* 1.0 * 2^-8 */
0x3f80000000000000, /* 1.0 * 2^-7 */
0x3fb0000000000000, /* 1.0 * 2^-4 */
0x3fc0000000000000, /* 1.0 * 2^-3 */
0x3fd0000000000000, /* 0.25 */
0x3fd4000000000000, /* 0.3125 */
0x3fd8000000000000, /* 0.375 */
0x3fdc000000000000, /* 0.4375 */
0x3fe0000000000000, /* 0.5 */
0x3fe4000000000000, /* 0.625 */
0x3fe8000000000000, /* 0.75 */
0x3fec000000000000, /* 0.875 */
0x3ff0000000000000, /* 1.0 */
0x3ff4000000000000, /* 1.25 */
0x3ff8000000000000, /* 1.5 */
0x3ffc000000000000, /* 1.75 */
0x4000000000000000, /* 2.0 */
0x4004000000000000, /* 2.5 */
0x4008000000000000, /* 3 */
0x4010000000000000, /* 4 */
0x4020000000000000, /* 8 */
0x4030000000000000, /* 16 */
0x4060000000000000, /* 2^7 */
0x4070000000000000, /* 2^8 */
0x40e0000000000000, /* 2^15 */
0x40f0000000000000, /* 2^16 */
0x7ff0000000000000, /* +inf */
0x7ff8000000000000, /* Canonical NaN */
};
TCGv_i64 dest = dest_fpr(ctx, a->rd);
tcg_gen_movi_i64(dest, fli_d_table[a->rs1]);
gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
static bool trans_fli_h(DisasContext *ctx, arg_fli_h *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_ZFH(ctx);
/* Values below are NaN-boxed to avoid a gen_nanbox_h(). */
static const uint64_t fli_h_table[] = {
0xffffffffffffbc00, /* -1.0 */
0xffffffffffff0400, /* minimum positive normal */
0xffffffffffff0100, /* 1.0 * 2^-16 */
0xffffffffffff0200, /* 1.0 * 2^-15 */
0xffffffffffff1c00, /* 1.0 * 2^-8 */
0xffffffffffff2000, /* 1.0 * 2^-7 */
0xffffffffffff2c00, /* 1.0 * 2^-4 */
0xffffffffffff3000, /* 1.0 * 2^-3 */
0xffffffffffff3400, /* 0.25 */
0xffffffffffff3500, /* 0.3125 */
0xffffffffffff3600, /* 0.375 */
0xffffffffffff3700, /* 0.4375 */
0xffffffffffff3800, /* 0.5 */
0xffffffffffff3900, /* 0.625 */
0xffffffffffff3a00, /* 0.75 */
0xffffffffffff3b00, /* 0.875 */
0xffffffffffff3c00, /* 1.0 */
0xffffffffffff3d00, /* 1.25 */
0xffffffffffff3e00, /* 1.5 */
0xffffffffffff3f00, /* 1.75 */
0xffffffffffff4000, /* 2.0 */
0xffffffffffff4100, /* 2.5 */
0xffffffffffff4200, /* 3 */
0xffffffffffff4400, /* 4 */
0xffffffffffff4800, /* 8 */
0xffffffffffff4c00, /* 16 */
0xffffffffffff5800, /* 2^7 */
0xffffffffffff5c00, /* 2^8 */
0xffffffffffff7800, /* 2^15 */
0xffffffffffff7c00, /* 2^16 */
0xffffffffffff7c00, /* +inf */
0xffffffffffff7e00, /* Canonical NaN */
};
TCGv_i64 dest = dest_fpr(ctx, a->rd);
tcg_gen_movi_i64(dest, fli_h_table[a->rs1]);
gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
static bool trans_fminm_s(DisasContext *ctx, arg_fminm_s *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_EXT(ctx, RVF);
TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
gen_helper_fminm_s(dest, cpu_env, src1, src2);
gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
static bool trans_fmaxm_s(DisasContext *ctx, arg_fmaxm_s *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_EXT(ctx, RVF);
TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
gen_helper_fmaxm_s(dest, cpu_env, src1, src2);
gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
static bool trans_fminm_d(DisasContext *ctx, arg_fminm_d *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_EXT(ctx, RVD);
TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
gen_helper_fminm_d(dest, cpu_env, src1, src2);
gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
static bool trans_fmaxm_d(DisasContext *ctx, arg_fmaxm_d *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_EXT(ctx, RVD);
TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
gen_helper_fmaxm_d(dest, cpu_env, src1, src2);
gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
static bool trans_fminm_h(DisasContext *ctx, arg_fminm_h *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_ZFH(ctx);
TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
gen_helper_fminm_h(dest, cpu_env, src1, src2);
gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
static bool trans_fmaxm_h(DisasContext *ctx, arg_fmaxm_h *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_ZFH(ctx);
TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
gen_helper_fmaxm_h(dest, cpu_env, src1, src2);
gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
static bool trans_fround_s(DisasContext *ctx, arg_fround_s *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_EXT(ctx, RVF);
TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
gen_helper_fround_s(dest, cpu_env, src1);
gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
static bool trans_froundnx_s(DisasContext *ctx, arg_froundnx_s *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_EXT(ctx, RVF);
TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
gen_helper_froundnx_s(dest, cpu_env, src1);
gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
static bool trans_fround_d(DisasContext *ctx, arg_fround_d *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_EXT(ctx, RVD);
TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
gen_helper_fround_d(dest, cpu_env, src1);
gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
static bool trans_froundnx_d(DisasContext *ctx, arg_froundnx_d *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_EXT(ctx, RVD);
TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
gen_helper_froundnx_d(dest, cpu_env, src1);
gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
static bool trans_fround_h(DisasContext *ctx, arg_fround_h *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_ZFH(ctx);
TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
gen_helper_fround_h(dest, cpu_env, src1);
gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
static bool trans_froundnx_h(DisasContext *ctx, arg_froundnx_h *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_ZFH(ctx);
TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
gen_helper_froundnx_h(dest, cpu_env, src1);
gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
bool trans_fcvtmod_w_d(DisasContext *ctx, arg_fcvtmod_w_d *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_EXT(ctx, RVD);
TCGv dst = dest_gpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
TCGv_i64 t1 = tcg_temp_new_i64();
/* Rounding mode is RTZ. */
gen_set_rm(ctx, RISCV_FRM_RTZ);
gen_helper_fcvtmod_w_d(t1, cpu_env, src1);
tcg_gen_trunc_i64_tl(dst, t1);
gen_set_gpr(ctx, a->rd, dst);
return true;
}
bool trans_fmvh_x_d(DisasContext *ctx, arg_fmvh_x_d *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_EXT(ctx, RVD);
REQUIRE_32BIT(ctx);
TCGv dst = dest_gpr(ctx, a->rd);
TCGv_i64 t1 = tcg_temp_new_i64();
tcg_gen_sari_i64(t1, cpu_fpr[a->rs1], 32);
tcg_gen_trunc_i64_tl(dst, t1);
gen_set_gpr(ctx, a->rd, dst);
return true;
}
bool trans_fmvp_d_x(DisasContext *ctx, arg_fmvp_d_x *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_EXT(ctx, RVD);
REQUIRE_32BIT(ctx);
TCGv src1 = get_gpr(ctx, a->rs1, EXT_NONE);
TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE);
tcg_gen_concat_tl_i64(cpu_fpr[a->rd], src1, src2);
mark_fs_dirty(ctx);
return true;
}
bool trans_fleq_s(DisasContext *ctx, arg_fleq_s *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_EXT(ctx, RVF);
TCGv dest = dest_gpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
gen_helper_fleq_s(dest, cpu_env, src1, src2);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
bool trans_fltq_s(DisasContext *ctx, arg_fltq_s *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_EXT(ctx, RVF);
TCGv dest = dest_gpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
gen_helper_fltq_s(dest, cpu_env, src1, src2);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
bool trans_fleq_d(DisasContext *ctx, arg_fleq_d *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_EXT(ctx, RVD);
TCGv dest = dest_gpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
gen_helper_fltq_s(dest, cpu_env, src1, src2);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
bool trans_fltq_d(DisasContext *ctx, arg_fltq_d *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_EXT(ctx, RVD);
TCGv dest = dest_gpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
gen_helper_fltq_s(dest, cpu_env, src1, src2);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
bool trans_fleq_h(DisasContext *ctx, arg_fleq_h *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_ZFH(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
gen_helper_fleq_h(dest, cpu_env, src1, src2);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
bool trans_fltq_h(DisasContext *ctx, arg_fltq_h *a)
{
REQUIRE_FPU;
REQUIRE_ZFA(ctx);
REQUIRE_ZFH(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
gen_helper_fltq_h(dest, cpu_env, src1, src2);
gen_set_gpr(ctx, a->rd, dest);
return true;
}