tcg: Vary the allocation size for TCGOp

We have been allocating a worst case number of arguments
to support calls.  Instead, allow the size to vary.
By default leave space for 4 args, to maximize reuse,
but allow calls to increase the number of args to 32.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
[PMD: Split patch in two]
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-Id: <20221218211832.73312-3-philmd@linaro.org>
This commit is contained in:
Richard Henderson 2022-12-18 22:18:32 +01:00
parent d44789434b
commit cb10bc63b7
4 changed files with 47 additions and 46 deletions

View File

@ -258,12 +258,12 @@ static TCGOp *rm_ops(TCGOp *op)
static TCGOp *copy_op_nocheck(TCGOp **begin_op, TCGOp *op) static TCGOp *copy_op_nocheck(TCGOp **begin_op, TCGOp *op)
{ {
unsigned nargs = ARRAY_SIZE(op->args); TCGOp *old_op = QTAILQ_NEXT(*begin_op, link);
unsigned nargs = old_op->nargs;
*begin_op = QTAILQ_NEXT(*begin_op, link); *begin_op = old_op;
tcg_debug_assert(*begin_op); op = tcg_op_insert_after(tcg_ctx, op, old_op->opc, nargs);
op = tcg_op_insert_after(tcg_ctx, op, (*begin_op)->opc, nargs); memcpy(op->args, old_op->args, sizeof(op->args[0]) * nargs);
memcpy(op->args, (*begin_op)->args, sizeof(op->args));
return op; return op;
} }

View File

@ -133,6 +133,4 @@
#define DEF_HELPER_7(name, ret, t1, t2, t3, t4, t5, t6, t7) \ #define DEF_HELPER_7(name, ret, t1, t2, t3, t4, t5, t6, t7) \
DEF_HELPER_FLAGS_7(name, 0, ret, t1, t2, t3, t4, t5, t6, t7) DEF_HELPER_FLAGS_7(name, 0, ret, t1, t2, t3, t4, t5, t6, t7)
/* MAX_OPC_PARAM_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */
#endif /* EXEC_HELPER_HEAD_H */ #endif /* EXEC_HELPER_HEAD_H */

View File

@ -38,20 +38,6 @@
/* XXX: make safe guess about sizes */ /* XXX: make safe guess about sizes */
#define MAX_OP_PER_INSTR 266 #define MAX_OP_PER_INSTR 266
#if HOST_LONG_BITS == 32
#define MAX_OPC_PARAM_PER_ARG 2
#else
#define MAX_OPC_PARAM_PER_ARG 1
#endif
#define MAX_OPC_PARAM_IARGS 7
#define MAX_OPC_PARAM_OARGS 1
#define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS)
/* A Call op needs up to 4 + 2N parameters on 32-bit archs,
* and up to 4 + N parameters on 64-bit archs
* (N = number of input arguments + output arguments). */
#define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS))
#define CPU_TEMP_BUF_NLONGS 128 #define CPU_TEMP_BUF_NLONGS 128
#define TCG_STATIC_FRAME_SIZE (CPU_TEMP_BUF_NLONGS * sizeof(long)) #define TCG_STATIC_FRAME_SIZE (CPU_TEMP_BUF_NLONGS * sizeof(long))
@ -493,34 +479,34 @@ typedef struct TCGTempSet {
unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)]; unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
} TCGTempSet; } TCGTempSet;
/* While we limit helpers to 6 arguments, for 32-bit hosts, with padding, /*
this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands. * With 1 128-bit output, a 32-bit host requires 4 output parameters,
There are never more than 2 outputs, which means that we can store all * which leaves a maximum of 28 other slots. Which is enough for 7
dead + sync data within 16 bits. */ * 128-bit operands.
#define DEAD_ARG 4 */
#define SYNC_ARG 1 #define DEAD_ARG (1 << 4)
typedef uint16_t TCGLifeData; #define SYNC_ARG (1 << 0)
typedef uint32_t TCGLifeData;
/* The layout here is designed to avoid a bitfield crossing of
a 32-bit boundary, which would cause GCC to add extra padding. */
typedef struct TCGOp { typedef struct TCGOp {
TCGOpcode opc : 8; /* 8 */ TCGOpcode opc : 8;
unsigned nargs : 8;
/* Parameters for this opcode. See below. */ /* Parameters for this opcode. See below. */
unsigned param1 : 4; /* 12 */ unsigned param1 : 8;
unsigned param2 : 4; /* 16 */ unsigned param2 : 8;
/* Lifetime data of the operands. */ /* Lifetime data of the operands. */
unsigned life : 16; /* 32 */ TCGLifeData life;
/* Next and previous opcodes. */ /* Next and previous opcodes. */
QTAILQ_ENTRY(TCGOp) link; QTAILQ_ENTRY(TCGOp) link;
/* Arguments for the opcode. */
TCGArg args[MAX_OPC_PARAM];
/* Register preferences for the output(s). */ /* Register preferences for the output(s). */
TCGRegSet output_pref[2]; TCGRegSet output_pref[2];
/* Arguments for the opcode. */
TCGArg args[];
} TCGOp; } TCGOp;
#define TCGOP_CALLI(X) (X)->param1 #define TCGOP_CALLI(X) (X)->param1

View File

@ -1513,7 +1513,12 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
} }
} }
max_args = ARRAY_SIZE(op->args); /*
* A Call op needs up to 4 + 2N parameters on 32-bit archs,
* and up to 4 + N parameters on 64-bit archs
* (N = number of input arguments + output arguments).
*/
max_args = (64 / TCG_TARGET_REG_BITS) * nargs + 4;
op = tcg_emit_op(INDEX_op_call, max_args); op = tcg_emit_op(INDEX_op_call, max_args);
pi = 0; pi = 0;
@ -2298,19 +2303,31 @@ void tcg_remove_ops_after(TCGOp *op)
static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
{ {
TCGContext *s = tcg_ctx; TCGContext *s = tcg_ctx;
TCGOp *op; TCGOp *op = NULL;
assert(nargs < ARRAY_SIZE(op->args)); if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
if (likely(QTAILQ_EMPTY(&s->free_ops))) { QTAILQ_FOREACH(op, &s->free_ops, link) {
op = tcg_malloc(sizeof(TCGOp)); if (nargs <= op->nargs) {
} else {
op = QTAILQ_FIRST(&s->free_ops);
QTAILQ_REMOVE(&s->free_ops, op, link); QTAILQ_REMOVE(&s->free_ops, op, link);
nargs = op->nargs;
goto found;
} }
}
}
/* Most opcodes have 3 or 4 operands: reduce fragmentation. */
nargs = MAX(4, nargs);
op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
found:
memset(op, 0, offsetof(TCGOp, link)); memset(op, 0, offsetof(TCGOp, link));
op->opc = opc; op->opc = opc;
s->nb_ops++; op->nargs = nargs;
/* Check for bitfield overflow. */
tcg_debug_assert(op->nargs == nargs);
s->nb_ops++;
return op; return op;
} }