diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 615bed9096..bb45ea0fcf 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -247,6 +247,141 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) tcg_out_opc_dbar(s, 0); } +static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +{ + if (ret == arg) { + return true; + } + switch (type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + /* + * Conventional register-register move used in LoongArch is + * `or dst, src, zero`. + */ + tcg_out_opc_or(s, ret, arg, TCG_REG_ZERO); + break; + default: + g_assert_not_reached(); + } + return true; +} + +static bool imm_part_needs_loading(bool high_bits_are_ones, + tcg_target_long part) +{ + if (high_bits_are_ones) { + return part != -1; + } else { + return part != 0; + } +} + +/* Loads a 32-bit immediate into rd, sign-extended. */ +static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val) +{ + tcg_target_long lo = sextreg(val, 0, 12); + tcg_target_long hi12 = sextreg(val, 12, 20); + + /* Single-instruction cases. */ + if (lo == val) { + /* val fits in simm12: addi.w rd, zero, val */ + tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val); + return; + } + if (0x800 <= val && val <= 0xfff) { + /* val fits in uimm12: ori rd, zero, val */ + tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val); + return; + } + + /* High bits must be set; load with lu12i.w + optional ori. */ + tcg_out_opc_lu12i_w(s, rd, hi12); + if (lo != 0) { + tcg_out_opc_ori(s, rd, rd, lo & 0xfff); + } +} + +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, + tcg_target_long val) +{ + /* + * LoongArch conventionally loads 64-bit immediates in at most 4 steps, + * with dedicated instructions for filling the respective bitfields + * below: + * + * 6 5 4 3 + * 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 + * +-----------------------+---------------------------------------+... + * | hi52 | hi32 | + * +-----------------------+---------------------------------------+... + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * ...+-------------------------------------+-------------------------+ + * | hi12 | lo | + * ...+-------------------------------------+-------------------------+ + * + * Check if val belong to one of the several fast cases, before falling + * back to the slow path. + */ + + intptr_t pc_offset; + tcg_target_long val_lo, val_hi, pc_hi, offset_hi; + tcg_target_long hi32, hi52; + bool rd_high_bits_are_ones; + + /* Value fits in signed i32. */ + if (type == TCG_TYPE_I32 || val == (int32_t)val) { + tcg_out_movi_i32(s, rd, val); + return; + } + + /* PC-relative cases. */ + pc_offset = tcg_pcrel_diff(s, (void *)val); + if (pc_offset == sextreg(pc_offset, 0, 22) && (pc_offset & 3) == 0) { + /* Single pcaddu2i. */ + tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2); + return; + } + + if (pc_offset == (int32_t)pc_offset) { + /* Offset within 32 bits; load with pcalau12i + ori. */ + val_lo = sextreg(val, 0, 12); + val_hi = val >> 12; + pc_hi = (val - pc_offset) >> 12; + offset_hi = val_hi - pc_hi; + + tcg_debug_assert(offset_hi == sextreg(offset_hi, 0, 20)); + tcg_out_opc_pcalau12i(s, rd, offset_hi); + if (val_lo != 0) { + tcg_out_opc_ori(s, rd, rd, val_lo & 0xfff); + } + return; + } + + hi32 = sextreg(val, 32, 20); + hi52 = sextreg(val, 52, 12); + + /* Single cu52i.d case. */ + if (ctz64(val) >= 52) { + tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, hi52); + return; + } + + /* Slow path. Initialize the low 32 bits, then concat high bits. */ + tcg_out_movi_i32(s, rd, val); + rd_high_bits_are_ones = (int32_t)val < 0; + + if (imm_part_needs_loading(rd_high_bits_are_ones, hi32)) { + tcg_out_opc_cu32i_d(s, rd, hi32); + rd_high_bits_are_ones = hi32 < 0; + } + + if (imm_part_needs_loading(rd_high_bits_are_ones, hi52)) { + tcg_out_opc_cu52i_d(s, rd, rd, hi52); + } +} + /* * Entry-points */ @@ -262,6 +397,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_mb(s, a0); break; + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: default: g_assert_not_reached(); }