diff --git a/tcg/optimize.c b/tcg/optimize.c index e2ecad2884..f723deaafe 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -24,6 +24,7 @@ */ #include "qemu/osdep.h" +#include "qemu/int128.h" #include "tcg/tcg-op.h" #include "tcg-internal.h" @@ -838,37 +839,59 @@ static bool fold_add(OptContext *ctx, TCGOp *op) return false; } -static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add) +static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) { if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { - uint32_t al = arg_info(op->args[2])->val; - uint32_t ah = arg_info(op->args[3])->val; - uint32_t bl = arg_info(op->args[4])->val; - uint32_t bh = arg_info(op->args[5])->val; - uint64_t a = ((uint64_t)ah << 32) | al; - uint64_t b = ((uint64_t)bh << 32) | bl; + uint64_t al = arg_info(op->args[2])->val; + uint64_t ah = arg_info(op->args[3])->val; + uint64_t bl = arg_info(op->args[4])->val; + uint64_t bh = arg_info(op->args[5])->val; TCGArg rl, rh; - TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32); + TCGOp *op2; - if (add) { - a += b; + if (ctx->type == TCG_TYPE_I32) { + uint64_t a = deposit64(al, 32, 32, ah); + uint64_t b = deposit64(bl, 32, 32, bh); + + if (add) { + a += b; + } else { + a -= b; + } + + al = sextract64(a, 0, 32); + ah = sextract64(a, 32, 32); } else { - a -= b; + Int128 a = int128_make128(al, ah); + Int128 b = int128_make128(bl, bh); + + if (add) { + a = int128_add(a, b); + } else { + a = int128_sub(a, b); + } + + al = int128_getlo(a); + ah = int128_gethi(a); } rl = op->args[0]; rh = op->args[1]; - tcg_opt_gen_movi(ctx, op, rl, (int32_t)a); - tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32)); + + /* The proper opcode is supplied by tcg_opt_gen_mov. */ + op2 = tcg_op_insert_before(ctx->tcg, op, 0); + + tcg_opt_gen_movi(ctx, op, rl, al); + tcg_opt_gen_movi(ctx, op2, rh, ah); return true; } return false; } -static bool fold_add2_i32(OptContext *ctx, TCGOp *op) +static bool fold_add2(OptContext *ctx, TCGOp *op) { - return fold_addsub2_i32(ctx, op, true); + return fold_addsub2(ctx, op, true); } static bool fold_and(OptContext *ctx, TCGOp *op) @@ -1725,9 +1748,9 @@ static bool fold_sub(OptContext *ctx, TCGOp *op) return false; } -static bool fold_sub2_i32(OptContext *ctx, TCGOp *op) +static bool fold_sub2(OptContext *ctx, TCGOp *op) { - return fold_addsub2_i32(ctx, op, false); + return fold_addsub2(ctx, op, false); } static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) @@ -1873,8 +1896,8 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64_VEC(add): done = fold_add(&ctx, op); break; - case INDEX_op_add2_i32: - done = fold_add2_i32(&ctx, op); + CASE_OP_32_64(add2): + done = fold_add2(&ctx, op); break; CASE_OP_32_64_VEC(and): done = fold_and(&ctx, op); @@ -2011,8 +2034,8 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64_VEC(sub): done = fold_sub(&ctx, op); break; - case INDEX_op_sub2_i32: - done = fold_sub2_i32(&ctx, op); + CASE_OP_32_64(sub2): + done = fold_sub2(&ctx, op); break; CASE_OP_32_64_VEC(xor): done = fold_xor(&ctx, op);