target/mips/mxu: Add S32MUL S32MULU S32EXTR S32EXTRV insns

These instructions are part of pool15, see the grand opcode organization
tree on top of the file.

Signed-off-by: Siarhei Volkau <lis8215@gmail.com>
Message-Id: <20230608104222.1520143-23-lis8215@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
This commit is contained in:
Siarhei Volkau 2023-06-08 13:42:11 +03:00 committed by Philippe Mathieu-Daudé
parent 968045b6c7
commit 29059e72b0
1 changed files with 196 additions and 4 deletions

View File

@ -290,9 +290,9 @@
* 100010 OPC_MXU_S8LDD
* 100011 OPC_MXU_S8STD 15..14
* 100100 OPC_MXU_S8LDI 00 OPC_MXU_S32MUL
* 100101 OPC_MXU_S8SDI 00 OPC_MXU_S32MULU
* 00 OPC_MXU_S32EXTR
* 100110 OPC_MXU__POOL15 00 OPC_MXU_S32EXTRV
* 100101 OPC_MXU_S8SDI 01 OPC_MXU_S32MULU
* 10 OPC_MXU_S32EXTR
* 100110 OPC_MXU__POOL15 11 OPC_MXU_S32EXTRV
*
* 20..18
* 100111 OPC_MXU__POOL16 000 OPC_MXU_D32SARW
@ -385,6 +385,7 @@ enum {
OPC_MXU_S8STD = 0x23,
OPC_MXU_S8LDI = 0x24,
OPC_MXU_S8SDI = 0x25,
OPC_MXU__POOL15 = 0x26,
OPC_MXU__POOL16 = 0x27,
OPC_MXU__POOL17 = 0x28,
OPC_MXU_S16LDD = 0x2A,
@ -477,6 +478,16 @@ enum {
OPC_MXU_D8SUMC = 0x02,
};
/*
* MXU pool 15
*/
enum {
OPC_MXU_S32MUL = 0x00,
OPC_MXU_S32MULU = 0x01,
OPC_MXU_S32EXTR = 0x02,
OPC_MXU_S32EXTRV = 0x03,
};
/*
* MXU pool 16
*/
@ -871,6 +882,47 @@ static void gen_mxu_s16std(DisasContext *ctx, bool postmodify)
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UW);
}
/*
* S32MUL XRa, XRd, rs, rt - Signed 32x32=>64 bit multiplication
* of GPR's and stores result into pair of MXU registers.
* It strains HI and LO registers.
*
* S32MULU XRa, XRd, rs, rt - Unsigned 32x32=>64 bit multiplication
* of GPR's and stores result into pair of MXU registers.
* It strains HI and LO registers.
*/
static void gen_mxu_s32mul(DisasContext *ctx, bool mulu)
{
TCGv t0, t1;
uint32_t XRa, XRd, rs, rt;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
XRd = extract32(ctx->opcode, 10, 4);
rs = extract32(ctx->opcode, 16, 5);
rt = extract32(ctx->opcode, 21, 5);
if (unlikely(rs == 0 || rt == 0)) {
tcg_gen_movi_tl(t0, 0);
tcg_gen_movi_tl(t1, 0);
} else {
gen_load_gpr(t0, rs);
gen_load_gpr(t1, rt);
if (mulu) {
tcg_gen_mulu2_tl(t0, t1, t0, t1);
} else {
tcg_gen_muls2_tl(t0, t1, t0, t1);
}
}
tcg_gen_mov_tl(cpu_HI[0], t1);
tcg_gen_mov_tl(cpu_LO[0], t0);
gen_store_mxu_gpr(t1, XRa);
gen_store_mxu_gpr(t0, XRd);
}
/*
* D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication
* D16MULF XRa, XRb, XRc, optn2 - Signed Q15 fraction pattern multiplication
@ -3014,9 +3066,122 @@ static void gen_mxu_d32asum(DisasContext *ctx)
* MXU instruction category: Miscellaneous
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* Q16SAT
* S32EXTR
* S32EXTRV
* Q16SAT
*/
/*
* S32EXTR XRa, XRd, rs, bits5
* Extract bits5 bits from 64-bit pair {XRa:XRd}
* starting from rs[4:0] offset and put to the XRa.
*/
static void gen_mxu_s32extr(DisasContext *ctx)
{
TCGv t0, t1, t2, t3;
uint32_t XRa, XRd, rs, bits5;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
t2 = tcg_temp_new();
t3 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
XRd = extract32(ctx->opcode, 10, 4);
bits5 = extract32(ctx->opcode, 16, 5);
rs = extract32(ctx->opcode, 21, 5);
/* {tmp} = {XRa:XRd} >> (64 - rt - bits5); */
/* {XRa} = extract({tmp}, 0, bits5); */
if (bits5 > 0) {
TCGLabel *l_xra_only = gen_new_label();
TCGLabel *l_done = gen_new_label();
gen_load_mxu_gpr(t0, XRd);
gen_load_mxu_gpr(t1, XRa);
gen_load_gpr(t2, rs);
tcg_gen_andi_tl(t2, t2, 0x1f);
tcg_gen_subfi_tl(t2, 32, t2);
tcg_gen_brcondi_tl(TCG_COND_GE, t2, bits5, l_xra_only);
tcg_gen_subfi_tl(t2, bits5, t2);
tcg_gen_subfi_tl(t3, 32, t2);
tcg_gen_shr_tl(t0, t0, t3);
tcg_gen_shl_tl(t1, t1, t2);
tcg_gen_or_tl(t0, t0, t1);
tcg_gen_br(l_done);
gen_set_label(l_xra_only);
tcg_gen_subi_tl(t2, t2, bits5);
tcg_gen_shr_tl(t0, t1, t2);
gen_set_label(l_done);
tcg_gen_extract_tl(t0, t0, 0, bits5);
} else {
/* unspecified behavior but matches tests on real hardware*/
tcg_gen_movi_tl(t0, 0);
}
gen_store_mxu_gpr(t0, XRa);
}
/*
* S32EXTRV XRa, XRd, rs, rt
* Extract rt[4:0] bits from 64-bit pair {XRa:XRd}
* starting from rs[4:0] offset and put to the XRa.
*/
static void gen_mxu_s32extrv(DisasContext *ctx)
{
TCGv t0, t1, t2, t3, t4;
uint32_t XRa, XRd, rs, rt;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
t2 = tcg_temp_new();
t3 = tcg_temp_new();
t4 = tcg_temp_new();
TCGLabel *l_xra_only = gen_new_label();
TCGLabel *l_done = gen_new_label();
TCGLabel *l_zero = gen_new_label();
TCGLabel *l_extract = gen_new_label();
XRa = extract32(ctx->opcode, 6, 4);
XRd = extract32(ctx->opcode, 10, 4);
rt = extract32(ctx->opcode, 16, 5);
rs = extract32(ctx->opcode, 21, 5);
/* {tmp} = {XRa:XRd} >> (64 - rs - rt) */
gen_load_mxu_gpr(t0, XRd);
gen_load_mxu_gpr(t1, XRa);
gen_load_gpr(t2, rs);
gen_load_gpr(t4, rt);
tcg_gen_brcondi_tl(TCG_COND_EQ, t4, 0, l_zero);
tcg_gen_andi_tl(t2, t2, 0x1f);
tcg_gen_subfi_tl(t2, 32, t2);
tcg_gen_brcond_tl(TCG_COND_GE, t2, t4, l_xra_only);
tcg_gen_sub_tl(t2, t4, t2);
tcg_gen_subfi_tl(t3, 32, t2);
tcg_gen_shr_tl(t0, t0, t3);
tcg_gen_shl_tl(t1, t1, t2);
tcg_gen_or_tl(t0, t0, t1);
tcg_gen_br(l_extract);
gen_set_label(l_xra_only);
tcg_gen_sub_tl(t2, t2, t4);
tcg_gen_shr_tl(t0, t1, t2);
tcg_gen_br(l_extract);
/* unspecified behavior but matches tests on real hardware*/
gen_set_label(l_zero);
tcg_gen_movi_tl(t0, 0);
tcg_gen_br(l_done);
/* {XRa} = extract({tmp}, 0, rt) */
gen_set_label(l_extract);
tcg_gen_subfi_tl(t4, 32, t4);
tcg_gen_shl_tl(t0, t0, t4);
tcg_gen_shr_tl(t0, t0, t4);
gen_set_label(l_done);
gen_store_mxu_gpr(t0, XRa);
}
/*
* Q16SAT XRa, XRb, XRc
* Packs four 16-bit signed integers in XRb and XRc to
@ -3695,6 +3860,30 @@ static void decode_opc_mxu__pool14(DisasContext *ctx)
}
}
static void decode_opc_mxu__pool15(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 14, 2);
switch (opcode) {
case OPC_MXU_S32MUL:
gen_mxu_s32mul(ctx, false);
break;
case OPC_MXU_S32MULU:
gen_mxu_s32mul(ctx, true);
break;
case OPC_MXU_S32EXTR:
gen_mxu_s32extr(ctx);
break;
case OPC_MXU_S32EXTRV:
gen_mxu_s32extrv(ctx);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool16(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 18, 3);
@ -3884,6 +4073,9 @@ bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
case OPC_MXU_S8SDI:
gen_mxu_s8std(ctx, true);
break;
case OPC_MXU__POOL15:
decode_opc_mxu__pool15(ctx);
break;
case OPC_MXU__POOL16:
decode_opc_mxu__pool16(ctx);
break;