tcg/optimize: Split out fold_mb, fold_qemu_{ld,st}
This puts the separate mb optimization into the same framework as the others. While fold_qemu_{ld,st} are currently identical, that won't last as more code gets moved. Reviewed-by: Luis Pires <luis.pires@eldorado.org.br> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
404a148d89
commit
3eefdf2b58
|
@ -692,6 +692,44 @@ static bool fold_call(OptContext *ctx, TCGOp *op)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool fold_mb(OptContext *ctx, TCGOp *op)
|
||||
{
|
||||
/* Eliminate duplicate and redundant fence instructions. */
|
||||
if (ctx->prev_mb) {
|
||||
/*
|
||||
* Merge two barriers of the same type into one,
|
||||
* or a weaker barrier into a stronger one,
|
||||
* or two weaker barriers into a stronger one.
|
||||
* mb X; mb Y => mb X|Y
|
||||
* mb; strl => mb; st
|
||||
* ldaq; mb => ld; mb
|
||||
* ldaq; strl => ld; mb; st
|
||||
* Other combinations are also merged into a strong
|
||||
* barrier. This is stricter than specified but for
|
||||
* the purposes of TCG is better than not optimizing.
|
||||
*/
|
||||
ctx->prev_mb->args[0] |= op->args[0];
|
||||
tcg_op_remove(ctx->tcg, op);
|
||||
} else {
|
||||
ctx->prev_mb = op;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
|
||||
{
|
||||
/* Opcodes that touch guest memory stop the mb optimization. */
|
||||
ctx->prev_mb = NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
|
||||
{
|
||||
/* Opcodes that touch guest memory stop the mb optimization. */
|
||||
ctx->prev_mb = NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Propagate constants and copies, fold constant expressions. */
|
||||
void tcg_optimize(TCGContext *s)
|
||||
{
|
||||
|
@ -1599,6 +1637,19 @@ void tcg_optimize(TCGContext *s)
|
|||
}
|
||||
break;
|
||||
|
||||
case INDEX_op_mb:
|
||||
done = fold_mb(&ctx, op);
|
||||
break;
|
||||
case INDEX_op_qemu_ld_i32:
|
||||
case INDEX_op_qemu_ld_i64:
|
||||
done = fold_qemu_ld(&ctx, op);
|
||||
break;
|
||||
case INDEX_op_qemu_st_i32:
|
||||
case INDEX_op_qemu_st8_i32:
|
||||
case INDEX_op_qemu_st_i64:
|
||||
done = fold_qemu_st(&ctx, op);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -1606,43 +1657,5 @@ void tcg_optimize(TCGContext *s)
|
|||
if (!done) {
|
||||
finish_folding(&ctx, op);
|
||||
}
|
||||
|
||||
/* Eliminate duplicate and redundant fence instructions. */
|
||||
if (ctx.prev_mb) {
|
||||
switch (opc) {
|
||||
case INDEX_op_mb:
|
||||
/* Merge two barriers of the same type into one,
|
||||
* or a weaker barrier into a stronger one,
|
||||
* or two weaker barriers into a stronger one.
|
||||
* mb X; mb Y => mb X|Y
|
||||
* mb; strl => mb; st
|
||||
* ldaq; mb => ld; mb
|
||||
* ldaq; strl => ld; mb; st
|
||||
* Other combinations are also merged into a strong
|
||||
* barrier. This is stricter than specified but for
|
||||
* the purposes of TCG is better than not optimizing.
|
||||
*/
|
||||
ctx.prev_mb->args[0] |= op->args[0];
|
||||
tcg_op_remove(s, op);
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Opcodes that end the block stop the optimization. */
|
||||
if ((def->flags & TCG_OPF_BB_END) == 0) {
|
||||
break;
|
||||
}
|
||||
/* fallthru */
|
||||
case INDEX_op_qemu_ld_i32:
|
||||
case INDEX_op_qemu_ld_i64:
|
||||
case INDEX_op_qemu_st_i32:
|
||||
case INDEX_op_qemu_st8_i32:
|
||||
case INDEX_op_qemu_st_i64:
|
||||
/* Opcodes that touch guest memory stop the optimization. */
|
||||
ctx.prev_mb = NULL;
|
||||
break;
|
||||
}
|
||||
} else if (opc == INDEX_op_mb) {
|
||||
ctx.prev_mb = op;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue