tcg/optimize: Fix folding of vector ops

Bitwise operations are easy to fold, because the operation is identical regardless of element size. But add and sub need extra element size info that is not currently propagated. Fixes: 2f9f08ba43 Cc: qemu-stable@nongnu.org Resolves: https://gitlab.com/qemu-project/qemu/-/issues/799 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2021-12-16 06:07:25 -08:00 · 2021-12-16 06:07:25 -08:00 · c578ff1858
commit c578ff1858
parent 67e41fe0cf
1 changed files with 38 additions and 11 deletions
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@ -308,13 +308,13 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
    CASE_OP_32_64(mul):
        return x * y;

-    CASE_OP_32_64(and):
+    CASE_OP_32_64_VEC(and):
        return x & y;

-    CASE_OP_32_64(or):
+    CASE_OP_32_64_VEC(or):
        return x | y;

-    CASE_OP_32_64(xor):
+    CASE_OP_32_64_VEC(xor):
        return x ^ y;

    case INDEX_op_shl_i32:
@ -347,16 +347,16 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
    case INDEX_op_rotl_i64:
        return rol64(x, y & 63);

-    CASE_OP_32_64(not):
+    CASE_OP_32_64_VEC(not):
        return ~x;

    CASE_OP_32_64(neg):
        return -x;

-    CASE_OP_32_64(andc):
+    CASE_OP_32_64_VEC(andc):
        return x & ~y;

-    CASE_OP_32_64(orc):
+    CASE_OP_32_64_VEC(orc):
        return x | ~y;

    CASE_OP_32_64(eqv):
@ -751,6 +751,12 @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
    return false;
 }

+static bool fold_commutative(OptContext *ctx, TCGOp *op)
+{
+    swap_commutative(op->args[0], &op->args[1], &op->args[2]);
+    return false;
+}
+
 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
 {
    swap_commutative(op->args[0], &op->args[1], &op->args[2]);
@ -905,6 +911,16 @@ static bool fold_add(OptContext *ctx, TCGOp *op)
    return false;
 }

+/* We cannot as yet do_constant_folding with vectors. */
+static bool fold_add_vec(OptContext *ctx, TCGOp *op)
+{
+    if (fold_commutative(ctx, op) ||
+        fold_xi_to_x(ctx, op, 0)) {
+        return true;
+    }
+    return false;
+}
+
 static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
 {
    if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
@ -1938,10 +1954,10 @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
    return false;
 }

-static bool fold_sub(OptContext *ctx, TCGOp *op)
+/* We cannot as yet do_constant_folding with vectors. */
+static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
 {
-    if (fold_const2(ctx, op) ||
-        fold_xx_to_i(ctx, op, 0) ||
+    if (fold_xx_to_i(ctx, op, 0) ||
        fold_xi_to_x(ctx, op, 0) ||
        fold_sub_to_neg(ctx, op)) {
        return true;
@ -1949,6 +1965,11 @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
    return false;
 }

+static bool fold_sub(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op) || fold_sub_vec(ctx, op);
+}
+
 static bool fold_sub2(OptContext *ctx, TCGOp *op)
 {
    return fold_addsub2(ctx, op, false);
@ -2052,9 +2073,12 @@ void tcg_optimize(TCGContext *s)
         * Sorted alphabetically by opcode as much as possible.
         */
        switch (opc) {
-        CASE_OP_32_64_VEC(add):
+        CASE_OP_32_64(add):
            done = fold_add(&ctx, op);
            break;
+        case INDEX_op_add_vec:
+            done = fold_add_vec(&ctx, op);
+            break;
        CASE_OP_32_64(add2):
            done = fold_add2(&ctx, op);
            break;
@ -2193,9 +2217,12 @@ void tcg_optimize(TCGContext *s)
        CASE_OP_32_64(sextract):
            done = fold_sextract(&ctx, op);
            break;
-        CASE_OP_32_64_VEC(sub):
+        CASE_OP_32_64(sub):
            done = fold_sub(&ctx, op);
            break;
+        case INDEX_op_sub_vec:
+            done = fold_sub_vec(&ctx, op);
+            break;
        CASE_OP_32_64(sub2):
            done = fold_sub2(&ctx, op);
            break;