From eb79951ab638ba84ef424a8f7c0929cd4a5ea53d Mon Sep 17 00:00:00 2001 From: Siarhei Volkau Date: Thu, 8 Jun 2023 13:42:08 +0300 Subject: [PATCH] target/mips/mxu: Add Q8ADDE Q8ACCE D8SUM D8SUMC instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These instructions are all dual 8-bit addition/subtraction in various combinations. Most instructions are grouped in pool14, see the opcode organization in the file. Signed-off-by: Siarhei Volkau Message-Id: <20230608104222.1520143-20-lis8215@gmail.com> Signed-off-by: Philippe Mathieu-Daudé --- target/mips/tcg/mxu_translate.c | 200 ++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c index 18188208b7..5556fd152d 100644 --- a/target/mips/tcg/mxu_translate.c +++ b/target/mips/tcg/mxu_translate.c @@ -379,6 +379,8 @@ enum { OPC_MXU_D32ADD = 0x18, OPC_MXU__POOL12 = 0x19, OPC_MXU__POOL13 = 0x1B, + OPC_MXU__POOL14 = 0x1C, + OPC_MXU_Q8ACCE = 0x1D, OPC_MXU_S8LDD = 0x22, OPC_MXU__POOL16 = 0x27, OPC_MXU__POOL17 = 0x28, @@ -459,6 +461,15 @@ enum { OPC_MXU_D16ASUM = 0x02, }; +/* + * MXU pool 14 + */ +enum { + OPC_MXU_Q8ADDE = 0x00, + OPC_MXU_D8SUM = 0x01, + OPC_MXU_D8SUMC = 0x02, +}; + /* * MXU pool 16 */ @@ -2183,6 +2194,168 @@ static void gen_mxu_Q8ADD(DisasContext *ctx) } } +/* + * Q8ADDE XRa, XRb, XRc, XRd, aptn2 + * Add/subtract quadruple of 8-bit packed in XRb + * to another one in XRc, with zero extending + * to 16-bit and put results as packed 16-bit data + * into XRa and XRd. + * aptn2 manages action add or subract of pairs of data. + * + * Q8ACCE XRa, XRb, XRc, XRd, aptn2 + * Add/subtract quadruple of 8-bit packed in XRb + * to another one in XRc, with zero extending + * to 16-bit and accumulate results as packed 16-bit data + * into XRa and XRd. + * aptn2 manages action add or subract of pairs of data. + */ +static void gen_mxu_q8adde(DisasContext *ctx, bool accumulate) +{ + uint32_t aptn2, XRd, XRc, XRb, XRa; + + aptn2 = extract32(ctx->opcode, 24, 2); + XRd = extract32(ctx->opcode, 18, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + if (unlikely((XRb == 0) && (XRc == 0))) { + /* both operands zero registers -> just set destination to zero */ + if (XRa != 0) { + tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); + } + if (XRd != 0) { + tcg_gen_movi_tl(mxu_gpr[XRd - 1], 0); + } + } else { + /* the most general case */ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + TCGv t4 = tcg_temp_new(); + TCGv t5 = tcg_temp_new(); + + if (XRa != 0) { + tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 16, 8); + tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 16, 8); + tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 24, 8); + tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8); + if (aptn2 & 2) { + tcg_gen_sub_tl(t0, t0, t1); + tcg_gen_sub_tl(t2, t2, t3); + } else { + tcg_gen_add_tl(t0, t0, t1); + tcg_gen_add_tl(t2, t2, t3); + } + if (accumulate) { + gen_load_mxu_gpr(t5, XRa); + tcg_gen_extract_tl(t1, t5, 0, 16); + tcg_gen_extract_tl(t3, t5, 16, 16); + tcg_gen_add_tl(t0, t0, t1); + tcg_gen_add_tl(t2, t2, t3); + } + tcg_gen_shli_tl(t2, t2, 16); + tcg_gen_extract_tl(t0, t0, 0, 16); + tcg_gen_or_tl(t4, t2, t0); + } + if (XRd != 0) { + tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8); + tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 0, 8); + tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 8, 8); + tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 8, 8); + if (aptn2 & 1) { + tcg_gen_sub_tl(t0, t0, t1); + tcg_gen_sub_tl(t2, t2, t3); + } else { + tcg_gen_add_tl(t0, t0, t1); + tcg_gen_add_tl(t2, t2, t3); + } + if (accumulate) { + gen_load_mxu_gpr(t5, XRd); + tcg_gen_extract_tl(t1, t5, 0, 16); + tcg_gen_extract_tl(t3, t5, 16, 16); + tcg_gen_add_tl(t0, t0, t1); + tcg_gen_add_tl(t2, t2, t3); + } + tcg_gen_shli_tl(t2, t2, 16); + tcg_gen_extract_tl(t0, t0, 0, 16); + tcg_gen_or_tl(t5, t2, t0); + } + + gen_store_mxu_gpr(t4, XRa); + gen_store_mxu_gpr(t5, XRd); + } +} + +/* + * D8SUM XRa, XRb, XRc + * Double parallel add of quadruple unsigned 8-bit together + * with zero extending to 16-bit data. + * D8SUMC XRa, XRb, XRc + * Double parallel add of quadruple unsigned 8-bit together + * with zero extending to 16-bit data and adding 2 to each + * parallel result. + */ +static void gen_mxu_d8sum(DisasContext *ctx, bool sumc) +{ + uint32_t pad, pad2, XRc, XRb, XRa; + + pad = extract32(ctx->opcode, 24, 2); + pad2 = extract32(ctx->opcode, 18, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + if (unlikely(pad != 0 || pad2 != 0)) { + /* opcode padding incorrect -> do nothing */ + } else if (unlikely(XRa == 0)) { + /* destination is zero register -> do nothing */ + } else if (unlikely((XRb == 0) && (XRc == 0))) { + /* both operands zero registers -> just set destination to zero */ + tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); + } else { + /* the most general case */ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + TCGv t4 = tcg_temp_new(); + TCGv t5 = tcg_temp_new(); + + if (XRb != 0) { + tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8); + tcg_gen_extract_tl(t1, mxu_gpr[XRb - 1], 8, 8); + tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 16, 8); + tcg_gen_extract_tl(t3, mxu_gpr[XRb - 1], 24, 8); + tcg_gen_add_tl(t4, t0, t1); + tcg_gen_add_tl(t4, t4, t2); + tcg_gen_add_tl(t4, t4, t3); + } else { + tcg_gen_mov_tl(t4, 0); + } + if (XRc != 0) { + tcg_gen_extract_tl(t0, mxu_gpr[XRc - 1], 0, 8); + tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 8, 8); + tcg_gen_extract_tl(t2, mxu_gpr[XRc - 1], 16, 8); + tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8); + tcg_gen_add_tl(t5, t0, t1); + tcg_gen_add_tl(t5, t5, t2); + tcg_gen_add_tl(t5, t5, t3); + } else { + tcg_gen_mov_tl(t5, 0); + } + + if (sumc) { + tcg_gen_addi_tl(t4, t4, 2); + tcg_gen_addi_tl(t5, t5, 2); + } + tcg_gen_shli_tl(t4, t4, 16); + + tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5); + } +} + /* * Q16ADD XRa, XRb, XRc, XRd, aptn2, optn2 - Quad packed * 16-bit pattern addition. @@ -3335,6 +3508,27 @@ static void decode_opc_mxu__pool13(DisasContext *ctx) } } +static void decode_opc_mxu__pool14(DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 22, 2); + + switch (opcode) { + case OPC_MXU_Q8ADDE: + gen_mxu_q8adde(ctx, false); + break; + case OPC_MXU_D8SUM: + gen_mxu_d8sum(ctx, false); + break; + case OPC_MXU_D8SUMC: + gen_mxu_d8sum(ctx, true); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + gen_reserved_instruction(ctx); + break; + } +} + static void decode_opc_mxu__pool16(DisasContext *ctx) { uint32_t opcode = extract32(ctx->opcode, 18, 3); @@ -3506,6 +3700,12 @@ bool decode_ase_mxu(DisasContext *ctx, uint32_t insn) case OPC_MXU__POOL13: decode_opc_mxu__pool13(ctx); break; + case OPC_MXU__POOL14: + decode_opc_mxu__pool14(ctx); + break; + case OPC_MXU_Q8ACCE: + gen_mxu_q8adde(ctx, true); + break; case OPC_MXU_S8LDD: gen_mxu_s8ldd(ctx); break;