Clean up code_gen_buffer allocation.
Add tcg_remove_ops_after. Fix tcg_constant_* documentation. Improve TB chaining documentation. Fix float32_exp2. Fix arm tcg_out_op function signature. -----BEGIN PGP SIGNATURE----- iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmDGrQMdHHJpY2hhcmQu aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV9kZgf+LSKbkimQKNGJNpFp xdMG2B0681tHyO7aiDHZqZf9Izeey7x9vGZRZzPfxomdN8qYT2PiklNx2yIIxNRt WdJ3e7+l7cYjAGY6HdrTqJ6ZiqOOftMzMuHWiXiHD0rMYTIjXgAfsf+H+lVRwMCR BbQBB1ttUJzDSLkM5B2rNuWEjup4shAMgijkipkqkaWrzZIAvfIkcfScZyqWFguG GoWnQxIHq7XMbveUX1Tu1JcdVZlXmuMl0LMQ8Qj5sbep1gjyYixuV6lWupp6SPX9 quRBsyVSmAa4frK67huJ4WVq4gZ2VylNvpiwjwoChYgJ8TOU73n7KGZOAl6i0iq2 ytR6Pw== =ft63 -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20210613' into staging Clean up code_gen_buffer allocation. Add tcg_remove_ops_after. Fix tcg_constant_* documentation. Improve TB chaining documentation. Fix float32_exp2. Fix arm tcg_out_op function signature. # gpg: Signature made Mon 14 Jun 2021 02:12:35 BST # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full] # Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F * remotes/rth-gitlab/tags/pull-tcg-20210613: (34 commits) docs/devel: Explain in more detail the TB chaining mechanisms softfloat: Fix tp init in float32_exp2 tcg/arm: Fix tcg_out_op function signature tcg: Fix documentation for tcg_constant_* vs tcg_temp_free_* tcg: Introduce tcg_remove_ops_after tcg: Move tcg_init_ctx and tcg_ctx from accel/tcg/ tcg: When allocating for !splitwx, begin with PROT_NONE tcg: Merge buffer protection and guard page protection tcg: Round the tb_size default from qemu_get_host_physmem util/osdep: Add qemu_mprotect_rw tcg: Sink qemu_madvise call to common code tcg: Return the map protection from alloc_code_gen_buffer tcg: Allocate code_gen_buffer into struct tcg_region_state tcg: Move in_code_gen_buffer and tests to region.c tcg: Tidy split_cross_256mb tcg: Tidy tcg_n_regions tcg: Rename region.start to region.after_prologue tcg: Replace region.end with region.total_size tcg: Move MAX_CODE_GEN_BUFFER_SIZE to tcg-target.h tcg: Introduce tcg_max_ctxs ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
fbe7919ece
|
@ -16,5 +16,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc,
|
||||||
int cflags);
|
int cflags);
|
||||||
|
|
||||||
void QEMU_NORETURN cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
|
void QEMU_NORETURN cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
|
||||||
|
void page_init(void);
|
||||||
|
void tb_htable_init(void);
|
||||||
|
|
||||||
#endif /* ACCEL_TCG_INTERNAL_H */
|
#endif /* ACCEL_TCG_INTERNAL_H */
|
||||||
|
|
|
@ -32,6 +32,11 @@
|
||||||
#include "qemu/error-report.h"
|
#include "qemu/error-report.h"
|
||||||
#include "qemu/accel.h"
|
#include "qemu/accel.h"
|
||||||
#include "qapi/qapi-builtin-visit.h"
|
#include "qapi/qapi-builtin-visit.h"
|
||||||
|
#include "qemu/units.h"
|
||||||
|
#if !defined(CONFIG_USER_ONLY)
|
||||||
|
#include "hw/boards.h"
|
||||||
|
#endif
|
||||||
|
#include "internal.h"
|
||||||
|
|
||||||
struct TCGState {
|
struct TCGState {
|
||||||
AccelState parent_obj;
|
AccelState parent_obj;
|
||||||
|
@ -105,22 +110,29 @@ static void tcg_accel_instance_init(Object *obj)
|
||||||
|
|
||||||
bool mttcg_enabled;
|
bool mttcg_enabled;
|
||||||
|
|
||||||
static int tcg_init(MachineState *ms)
|
static int tcg_init_machine(MachineState *ms)
|
||||||
{
|
{
|
||||||
TCGState *s = TCG_STATE(current_accel());
|
TCGState *s = TCG_STATE(current_accel());
|
||||||
|
#ifdef CONFIG_USER_ONLY
|
||||||
|
unsigned max_cpus = 1;
|
||||||
|
#else
|
||||||
|
unsigned max_cpus = ms->smp.max_cpus;
|
||||||
|
#endif
|
||||||
|
|
||||||
tcg_exec_init(s->tb_size * 1024 * 1024, s->splitwx_enabled);
|
tcg_allowed = true;
|
||||||
mttcg_enabled = s->mttcg_enabled;
|
mttcg_enabled = s->mttcg_enabled;
|
||||||
|
|
||||||
|
page_init();
|
||||||
|
tb_htable_init();
|
||||||
|
tcg_init(s->tb_size * MiB, s->splitwx_enabled, max_cpus);
|
||||||
|
|
||||||
|
#if defined(CONFIG_SOFTMMU)
|
||||||
/*
|
/*
|
||||||
* Initialize TCG regions only for softmmu.
|
* There's no guest base to take into account, so go ahead and
|
||||||
*
|
* initialize the prologue now.
|
||||||
* This needs to be done later for user mode, because the prologue
|
|
||||||
* generation needs to be delayed so that GUEST_BASE is already set.
|
|
||||||
*/
|
*/
|
||||||
#ifndef CONFIG_USER_ONLY
|
tcg_prologue_init(tcg_ctx);
|
||||||
tcg_region_init();
|
#endif
|
||||||
#endif /* !CONFIG_USER_ONLY */
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -200,7 +212,7 @@ static void tcg_accel_class_init(ObjectClass *oc, void *data)
|
||||||
{
|
{
|
||||||
AccelClass *ac = ACCEL_CLASS(oc);
|
AccelClass *ac = ACCEL_CLASS(oc);
|
||||||
ac->name = "tcg";
|
ac->name = "tcg";
|
||||||
ac->init_machine = tcg_init;
|
ac->init_machine = tcg_init_machine;
|
||||||
ac->allowed = &tcg_allowed;
|
ac->allowed = &tcg_allowed;
|
||||||
|
|
||||||
object_class_property_add_str(oc, "thread",
|
object_class_property_add_str(oc, "thread",
|
||||||
|
|
|
@ -18,7 +18,6 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "qemu/osdep.h"
|
#include "qemu/osdep.h"
|
||||||
#include "qemu/units.h"
|
|
||||||
#include "qemu-common.h"
|
#include "qemu-common.h"
|
||||||
|
|
||||||
#define NO_CPU_IO_DEFS
|
#define NO_CPU_IO_DEFS
|
||||||
|
@ -49,7 +48,6 @@
|
||||||
#include "exec/cputlb.h"
|
#include "exec/cputlb.h"
|
||||||
#include "exec/translate-all.h"
|
#include "exec/translate-all.h"
|
||||||
#include "qemu/bitmap.h"
|
#include "qemu/bitmap.h"
|
||||||
#include "qemu/error-report.h"
|
|
||||||
#include "qemu/qemu-print.h"
|
#include "qemu/qemu-print.h"
|
||||||
#include "qemu/timer.h"
|
#include "qemu/timer.h"
|
||||||
#include "qemu/main-loop.h"
|
#include "qemu/main-loop.h"
|
||||||
|
@ -220,9 +218,6 @@ static int v_l2_levels;
|
||||||
|
|
||||||
static void *l1_map[V_L1_MAX_SIZE];
|
static void *l1_map[V_L1_MAX_SIZE];
|
||||||
|
|
||||||
/* code generation context */
|
|
||||||
TCGContext tcg_init_ctx;
|
|
||||||
__thread TCGContext *tcg_ctx;
|
|
||||||
TBContext tb_ctx;
|
TBContext tb_ctx;
|
||||||
|
|
||||||
static void page_table_config_init(void)
|
static void page_table_config_init(void)
|
||||||
|
@ -245,11 +240,6 @@ static void page_table_config_init(void)
|
||||||
assert(v_l2_levels >= 0);
|
assert(v_l2_levels >= 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cpu_gen_init(void)
|
|
||||||
{
|
|
||||||
tcg_context_init(&tcg_init_ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Encode VAL as a signed leb128 sequence at P.
|
/* Encode VAL as a signed leb128 sequence at P.
|
||||||
Return P incremented past the encoded value. */
|
Return P incremented past the encoded value. */
|
||||||
static uint8_t *encode_sleb128(uint8_t *p, target_long val)
|
static uint8_t *encode_sleb128(uint8_t *p, target_long val)
|
||||||
|
@ -415,7 +405,7 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void page_init(void)
|
void page_init(void)
|
||||||
{
|
{
|
||||||
page_size_init();
|
page_size_init();
|
||||||
page_table_config_init();
|
page_table_config_init();
|
||||||
|
@ -900,408 +890,6 @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Minimum size of the code gen buffer. This number is randomly chosen,
|
|
||||||
but not so small that we can't have a fair number of TB's live. */
|
|
||||||
#define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB)
|
|
||||||
|
|
||||||
/* Maximum size of the code gen buffer we'd like to use. Unless otherwise
|
|
||||||
indicated, this is constrained by the range of direct branches on the
|
|
||||||
host cpu, as used by the TCG implementation of goto_tb. */
|
|
||||||
#if defined(__x86_64__)
|
|
||||||
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
|
|
||||||
#elif defined(__sparc__)
|
|
||||||
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
|
|
||||||
#elif defined(__powerpc64__)
|
|
||||||
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
|
|
||||||
#elif defined(__powerpc__)
|
|
||||||
# define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB)
|
|
||||||
#elif defined(__aarch64__)
|
|
||||||
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
|
|
||||||
#elif defined(__s390x__)
|
|
||||||
/* We have a +- 4GB range on the branches; leave some slop. */
|
|
||||||
# define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB)
|
|
||||||
#elif defined(__mips__)
|
|
||||||
/* We have a 256MB branch region, but leave room to make sure the
|
|
||||||
main executable is also within that region. */
|
|
||||||
# define MAX_CODE_GEN_BUFFER_SIZE (128 * MiB)
|
|
||||||
#else
|
|
||||||
# define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if TCG_TARGET_REG_BITS == 32
|
|
||||||
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
|
|
||||||
#ifdef CONFIG_USER_ONLY
|
|
||||||
/*
|
|
||||||
* For user mode on smaller 32 bit systems we may run into trouble
|
|
||||||
* allocating big chunks of data in the right place. On these systems
|
|
||||||
* we utilise a static code generation buffer directly in the binary.
|
|
||||||
*/
|
|
||||||
#define USE_STATIC_CODE_GEN_BUFFER
|
|
||||||
#endif
|
|
||||||
#else /* TCG_TARGET_REG_BITS == 64 */
|
|
||||||
#ifdef CONFIG_USER_ONLY
|
|
||||||
/*
|
|
||||||
* As user-mode emulation typically means running multiple instances
|
|
||||||
* of the translator don't go too nuts with our default code gen
|
|
||||||
* buffer lest we make things too hard for the OS.
|
|
||||||
*/
|
|
||||||
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
|
|
||||||
#else
|
|
||||||
/*
|
|
||||||
* We expect most system emulation to run one or two guests per host.
|
|
||||||
* Users running large scale system emulation may want to tweak their
|
|
||||||
* runtime setup via the tb-size control on the command line.
|
|
||||||
*/
|
|
||||||
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define DEFAULT_CODE_GEN_BUFFER_SIZE \
|
|
||||||
(DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
|
|
||||||
? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
|
|
||||||
|
|
||||||
static size_t size_code_gen_buffer(size_t tb_size)
|
|
||||||
{
|
|
||||||
/* Size the buffer. */
|
|
||||||
if (tb_size == 0) {
|
|
||||||
size_t phys_mem = qemu_get_host_physmem();
|
|
||||||
if (phys_mem == 0) {
|
|
||||||
tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
|
|
||||||
} else {
|
|
||||||
tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, phys_mem / 8);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
|
|
||||||
tb_size = MIN_CODE_GEN_BUFFER_SIZE;
|
|
||||||
}
|
|
||||||
if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
|
|
||||||
tb_size = MAX_CODE_GEN_BUFFER_SIZE;
|
|
||||||
}
|
|
||||||
return tb_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef __mips__
|
|
||||||
/* In order to use J and JAL within the code_gen_buffer, we require
|
|
||||||
that the buffer not cross a 256MB boundary. */
|
|
||||||
static inline bool cross_256mb(void *addr, size_t size)
|
|
||||||
{
|
|
||||||
return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We weren't able to allocate a buffer without crossing that boundary,
|
|
||||||
so make do with the larger portion of the buffer that doesn't cross.
|
|
||||||
Returns the new base of the buffer, and adjusts code_gen_buffer_size. */
|
|
||||||
static inline void *split_cross_256mb(void *buf1, size_t size1)
|
|
||||||
{
|
|
||||||
void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful);
|
|
||||||
size_t size2 = buf1 + size1 - buf2;
|
|
||||||
|
|
||||||
size1 = buf2 - buf1;
|
|
||||||
if (size1 < size2) {
|
|
||||||
size1 = size2;
|
|
||||||
buf1 = buf2;
|
|
||||||
}
|
|
||||||
|
|
||||||
tcg_ctx->code_gen_buffer_size = size1;
|
|
||||||
return buf1;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef USE_STATIC_CODE_GEN_BUFFER
|
|
||||||
static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
|
|
||||||
__attribute__((aligned(CODE_GEN_ALIGN)));
|
|
||||||
|
|
||||||
static bool alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
|
|
||||||
{
|
|
||||||
void *buf, *end;
|
|
||||||
size_t size;
|
|
||||||
|
|
||||||
if (splitwx > 0) {
|
|
||||||
error_setg(errp, "jit split-wx not supported");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* page-align the beginning and end of the buffer */
|
|
||||||
buf = static_code_gen_buffer;
|
|
||||||
end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
|
|
||||||
buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
|
|
||||||
end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
|
|
||||||
|
|
||||||
size = end - buf;
|
|
||||||
|
|
||||||
/* Honor a command-line option limiting the size of the buffer. */
|
|
||||||
if (size > tb_size) {
|
|
||||||
size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
|
|
||||||
}
|
|
||||||
tcg_ctx->code_gen_buffer_size = size;
|
|
||||||
|
|
||||||
#ifdef __mips__
|
|
||||||
if (cross_256mb(buf, size)) {
|
|
||||||
buf = split_cross_256mb(buf, size);
|
|
||||||
size = tcg_ctx->code_gen_buffer_size;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (qemu_mprotect_rwx(buf, size)) {
|
|
||||||
error_setg_errno(errp, errno, "mprotect of jit buffer");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
|
|
||||||
|
|
||||||
tcg_ctx->code_gen_buffer = buf;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
#elif defined(_WIN32)
|
|
||||||
static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
|
|
||||||
{
|
|
||||||
void *buf;
|
|
||||||
|
|
||||||
if (splitwx > 0) {
|
|
||||||
error_setg(errp, "jit split-wx not supported");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
|
|
||||||
PAGE_EXECUTE_READWRITE);
|
|
||||||
if (buf == NULL) {
|
|
||||||
error_setg_win32(errp, GetLastError(),
|
|
||||||
"allocate %zu bytes for jit buffer", size);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
tcg_ctx->code_gen_buffer = buf;
|
|
||||||
tcg_ctx->code_gen_buffer_size = size;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static bool alloc_code_gen_buffer_anon(size_t size, int prot,
|
|
||||||
int flags, Error **errp)
|
|
||||||
{
|
|
||||||
void *buf;
|
|
||||||
|
|
||||||
buf = mmap(NULL, size, prot, flags, -1, 0);
|
|
||||||
if (buf == MAP_FAILED) {
|
|
||||||
error_setg_errno(errp, errno,
|
|
||||||
"allocate %zu bytes for jit buffer", size);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
tcg_ctx->code_gen_buffer_size = size;
|
|
||||||
|
|
||||||
#ifdef __mips__
|
|
||||||
if (cross_256mb(buf, size)) {
|
|
||||||
/*
|
|
||||||
* Try again, with the original still mapped, to avoid re-acquiring
|
|
||||||
* the same 256mb crossing.
|
|
||||||
*/
|
|
||||||
size_t size2;
|
|
||||||
void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
|
|
||||||
switch ((int)(buf2 != MAP_FAILED)) {
|
|
||||||
case 1:
|
|
||||||
if (!cross_256mb(buf2, size)) {
|
|
||||||
/* Success! Use the new buffer. */
|
|
||||||
munmap(buf, size);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
/* Failure. Work with what we had. */
|
|
||||||
munmap(buf2, size);
|
|
||||||
/* fallthru */
|
|
||||||
default:
|
|
||||||
/* Split the original buffer. Free the smaller half. */
|
|
||||||
buf2 = split_cross_256mb(buf, size);
|
|
||||||
size2 = tcg_ctx->code_gen_buffer_size;
|
|
||||||
if (buf == buf2) {
|
|
||||||
munmap(buf + size2, size - size2);
|
|
||||||
} else {
|
|
||||||
munmap(buf, size - size2);
|
|
||||||
}
|
|
||||||
size = size2;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
buf = buf2;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Request large pages for the buffer. */
|
|
||||||
qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
|
|
||||||
|
|
||||||
tcg_ctx->code_gen_buffer = buf;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef CONFIG_TCG_INTERPRETER
|
|
||||||
#ifdef CONFIG_POSIX
|
|
||||||
#include "qemu/memfd.h"
|
|
||||||
|
|
||||||
static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
|
|
||||||
{
|
|
||||||
void *buf_rw = NULL, *buf_rx = MAP_FAILED;
|
|
||||||
int fd = -1;
|
|
||||||
|
|
||||||
#ifdef __mips__
|
|
||||||
/* Find space for the RX mapping, vs the 256MiB regions. */
|
|
||||||
if (!alloc_code_gen_buffer_anon(size, PROT_NONE,
|
|
||||||
MAP_PRIVATE | MAP_ANONYMOUS |
|
|
||||||
MAP_NORESERVE, errp)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
/* The size of the mapping may have been adjusted. */
|
|
||||||
size = tcg_ctx->code_gen_buffer_size;
|
|
||||||
buf_rx = tcg_ctx->code_gen_buffer;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
|
|
||||||
if (buf_rw == NULL) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef __mips__
|
|
||||||
void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC,
|
|
||||||
MAP_SHARED | MAP_FIXED, fd, 0);
|
|
||||||
if (tmp != buf_rx) {
|
|
||||||
goto fail_rx;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
|
|
||||||
if (buf_rx == MAP_FAILED) {
|
|
||||||
goto fail_rx;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
close(fd);
|
|
||||||
tcg_ctx->code_gen_buffer = buf_rw;
|
|
||||||
tcg_ctx->code_gen_buffer_size = size;
|
|
||||||
tcg_splitwx_diff = buf_rx - buf_rw;
|
|
||||||
|
|
||||||
/* Request large pages for the buffer and the splitwx. */
|
|
||||||
qemu_madvise(buf_rw, size, QEMU_MADV_HUGEPAGE);
|
|
||||||
qemu_madvise(buf_rx, size, QEMU_MADV_HUGEPAGE);
|
|
||||||
return true;
|
|
||||||
|
|
||||||
fail_rx:
|
|
||||||
error_setg_errno(errp, errno, "failed to map shared memory for execute");
|
|
||||||
fail:
|
|
||||||
if (buf_rx != MAP_FAILED) {
|
|
||||||
munmap(buf_rx, size);
|
|
||||||
}
|
|
||||||
if (buf_rw) {
|
|
||||||
munmap(buf_rw, size);
|
|
||||||
}
|
|
||||||
if (fd >= 0) {
|
|
||||||
close(fd);
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_POSIX */
|
|
||||||
|
|
||||||
#ifdef CONFIG_DARWIN
|
|
||||||
#include <mach/mach.h>
|
|
||||||
|
|
||||||
extern kern_return_t mach_vm_remap(vm_map_t target_task,
|
|
||||||
mach_vm_address_t *target_address,
|
|
||||||
mach_vm_size_t size,
|
|
||||||
mach_vm_offset_t mask,
|
|
||||||
int flags,
|
|
||||||
vm_map_t src_task,
|
|
||||||
mach_vm_address_t src_address,
|
|
||||||
boolean_t copy,
|
|
||||||
vm_prot_t *cur_protection,
|
|
||||||
vm_prot_t *max_protection,
|
|
||||||
vm_inherit_t inheritance);
|
|
||||||
|
|
||||||
static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
|
|
||||||
{
|
|
||||||
kern_return_t ret;
|
|
||||||
mach_vm_address_t buf_rw, buf_rx;
|
|
||||||
vm_prot_t cur_prot, max_prot;
|
|
||||||
|
|
||||||
/* Map the read-write portion via normal anon memory. */
|
|
||||||
if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
|
|
||||||
MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
buf_rw = (mach_vm_address_t)tcg_ctx->code_gen_buffer;
|
|
||||||
buf_rx = 0;
|
|
||||||
ret = mach_vm_remap(mach_task_self(),
|
|
||||||
&buf_rx,
|
|
||||||
size,
|
|
||||||
0,
|
|
||||||
VM_FLAGS_ANYWHERE,
|
|
||||||
mach_task_self(),
|
|
||||||
buf_rw,
|
|
||||||
false,
|
|
||||||
&cur_prot,
|
|
||||||
&max_prot,
|
|
||||||
VM_INHERIT_NONE);
|
|
||||||
if (ret != KERN_SUCCESS) {
|
|
||||||
/* TODO: Convert "ret" to a human readable error message. */
|
|
||||||
error_setg(errp, "vm_remap for jit splitwx failed");
|
|
||||||
munmap((void *)buf_rw, size);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
|
|
||||||
error_setg_errno(errp, errno, "mprotect for jit splitwx");
|
|
||||||
munmap((void *)buf_rx, size);
|
|
||||||
munmap((void *)buf_rw, size);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
tcg_splitwx_diff = buf_rx - buf_rw;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_DARWIN */
|
|
||||||
#endif /* CONFIG_TCG_INTERPRETER */
|
|
||||||
|
|
||||||
static bool alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
|
|
||||||
{
|
|
||||||
#ifndef CONFIG_TCG_INTERPRETER
|
|
||||||
# ifdef CONFIG_DARWIN
|
|
||||||
return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
|
|
||||||
# endif
|
|
||||||
# ifdef CONFIG_POSIX
|
|
||||||
return alloc_code_gen_buffer_splitwx_memfd(size, errp);
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
error_setg(errp, "jit split-wx not supported");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
|
|
||||||
{
|
|
||||||
ERRP_GUARD();
|
|
||||||
int prot, flags;
|
|
||||||
|
|
||||||
if (splitwx) {
|
|
||||||
if (alloc_code_gen_buffer_splitwx(size, errp)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* If splitwx force-on (1), fail;
|
|
||||||
* if splitwx default-on (-1), fall through to splitwx off.
|
|
||||||
*/
|
|
||||||
if (splitwx > 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
error_free_or_abort(errp);
|
|
||||||
}
|
|
||||||
|
|
||||||
prot = PROT_READ | PROT_WRITE | PROT_EXEC;
|
|
||||||
flags = MAP_PRIVATE | MAP_ANONYMOUS;
|
|
||||||
#ifdef CONFIG_TCG_INTERPRETER
|
|
||||||
/* The tcg interpreter does not need execute permission. */
|
|
||||||
prot = PROT_READ | PROT_WRITE;
|
|
||||||
#elif defined(CONFIG_DARWIN)
|
|
||||||
/* Applicable to both iOS and macOS (Apple Silicon). */
|
|
||||||
if (!splitwx) {
|
|
||||||
flags |= MAP_JIT;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return alloc_code_gen_buffer_anon(size, prot, flags, errp);
|
|
||||||
}
|
|
||||||
#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
|
|
||||||
|
|
||||||
static bool tb_cmp(const void *ap, const void *bp)
|
static bool tb_cmp(const void *ap, const void *bp)
|
||||||
{
|
{
|
||||||
const TranslationBlock *a = ap;
|
const TranslationBlock *a = ap;
|
||||||
|
@ -1316,36 +904,13 @@ static bool tb_cmp(const void *ap, const void *bp)
|
||||||
a->page_addr[1] == b->page_addr[1];
|
a->page_addr[1] == b->page_addr[1];
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tb_htable_init(void)
|
void tb_htable_init(void)
|
||||||
{
|
{
|
||||||
unsigned int mode = QHT_MODE_AUTO_RESIZE;
|
unsigned int mode = QHT_MODE_AUTO_RESIZE;
|
||||||
|
|
||||||
qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
|
qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Must be called before using the QEMU cpus. 'tb_size' is the size
|
|
||||||
(in bytes) allocated to the translation buffer. Zero means default
|
|
||||||
size. */
|
|
||||||
void tcg_exec_init(unsigned long tb_size, int splitwx)
|
|
||||||
{
|
|
||||||
bool ok;
|
|
||||||
|
|
||||||
tcg_allowed = true;
|
|
||||||
cpu_gen_init();
|
|
||||||
page_init();
|
|
||||||
tb_htable_init();
|
|
||||||
|
|
||||||
ok = alloc_code_gen_buffer(size_code_gen_buffer(tb_size),
|
|
||||||
splitwx, &error_fatal);
|
|
||||||
assert(ok);
|
|
||||||
|
|
||||||
#if defined(CONFIG_SOFTMMU)
|
|
||||||
/* There's no guest base to take into account, so go ahead and
|
|
||||||
initialize the prologue now. */
|
|
||||||
tcg_prologue_init(tcg_ctx);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/* call with @p->lock held */
|
/* call with @p->lock held */
|
||||||
static inline void invalidate_page_bitmap(PageDesc *p)
|
static inline void invalidate_page_bitmap(PageDesc *p)
|
||||||
{
|
{
|
||||||
|
|
|
@ -813,7 +813,7 @@ int main(int argc, char **argv)
|
||||||
envlist_free(envlist);
|
envlist_free(envlist);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now that page sizes are configured in tcg_exec_init() we can do
|
* Now that page sizes are configured we can do
|
||||||
* proper page alignment for guest_base.
|
* proper page alignment for guest_base.
|
||||||
*/
|
*/
|
||||||
guest_base = HOST_PAGE_ALIGN(guest_base);
|
guest_base = HOST_PAGE_ALIGN(guest_base);
|
||||||
|
@ -879,7 +879,6 @@ int main(int argc, char **argv)
|
||||||
* the real value of GUEST_BASE into account.
|
* the real value of GUEST_BASE into account.
|
||||||
*/
|
*/
|
||||||
tcg_prologue_init(tcg_ctx);
|
tcg_prologue_init(tcg_ctx);
|
||||||
tcg_region_init();
|
|
||||||
|
|
||||||
/* build Task State */
|
/* build Task State */
|
||||||
memset(ts, 0, sizeof(TaskState));
|
memset(ts, 0, sizeof(TaskState));
|
||||||
|
|
|
@ -11,13 +11,14 @@ performances.
|
||||||
QEMU's dynamic translation backend is called TCG, for "Tiny Code
|
QEMU's dynamic translation backend is called TCG, for "Tiny Code
|
||||||
Generator". For more information, please take a look at ``tcg/README``.
|
Generator". For more information, please take a look at ``tcg/README``.
|
||||||
|
|
||||||
Some notable features of QEMU's dynamic translator are:
|
The following sections outline some notable features and implementation
|
||||||
|
details of QEMU's dynamic translator.
|
||||||
|
|
||||||
CPU state optimisations
|
CPU state optimisations
|
||||||
-----------------------
|
-----------------------
|
||||||
|
|
||||||
The target CPUs have many internal states which change the way it
|
The target CPUs have many internal states which change the way they
|
||||||
evaluates instructions. In order to achieve a good speed, the
|
evaluate instructions. In order to achieve a good speed, the
|
||||||
translation phase considers that some state information of the virtual
|
translation phase considers that some state information of the virtual
|
||||||
CPU cannot change in it. The state is recorded in the Translation
|
CPU cannot change in it. The state is recorded in the Translation
|
||||||
Block (TB). If the state changes (e.g. privilege level), a new TB will
|
Block (TB). If the state changes (e.g. privilege level), a new TB will
|
||||||
|
@ -31,17 +32,95 @@ Direct block chaining
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
After each translated basic block is executed, QEMU uses the simulated
|
After each translated basic block is executed, QEMU uses the simulated
|
||||||
Program Counter (PC) and other cpu state information (such as the CS
|
Program Counter (PC) and other CPU state information (such as the CS
|
||||||
segment base value) to find the next basic block.
|
segment base value) to find the next basic block.
|
||||||
|
|
||||||
In order to accelerate the most common cases where the new simulated PC
|
In its simplest, less optimized form, this is done by exiting from the
|
||||||
is known, QEMU can patch a basic block so that it jumps directly to the
|
current TB, going through the TB epilogue, and then back to the
|
||||||
next one.
|
main loop. That’s where QEMU looks for the next TB to execute,
|
||||||
|
translating it from the guest architecture if it isn’t already available
|
||||||
|
in memory. Then QEMU proceeds to execute this next TB, starting at the
|
||||||
|
prologue and then moving on to the translated instructions.
|
||||||
|
|
||||||
The most portable code uses an indirect jump. An indirect jump makes
|
Exiting from the TB this way will cause the ``cpu_exec_interrupt()``
|
||||||
it easier to make the jump target modification atomic. On some host
|
callback to be re-evaluated before executing additional instructions.
|
||||||
architectures (such as x86 or PowerPC), the ``JUMP`` opcode is
|
It is mandatory to exit this way after any CPU state changes that may
|
||||||
directly patched so that the block chaining has no overhead.
|
unmask interrupts.
|
||||||
|
|
||||||
|
In order to accelerate the cases where the TB for the new
|
||||||
|
simulated PC is already available, QEMU has mechanisms that allow
|
||||||
|
multiple TBs to be chained directly, without having to go back to the
|
||||||
|
main loop as described above. These mechanisms are:
|
||||||
|
|
||||||
|
``lookup_and_goto_ptr``
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Calling ``tcg_gen_lookup_and_goto_ptr()`` will emit a call to
|
||||||
|
``helper_lookup_tb_ptr``. This helper will look for an existing TB that
|
||||||
|
matches the current CPU state. If the destination TB is available its
|
||||||
|
code address is returned, otherwise the address of the JIT epilogue is
|
||||||
|
returned. The call to the helper is always followed by the tcg ``goto_ptr``
|
||||||
|
opcode, which branches to the returned address. In this way, we either
|
||||||
|
branch to the next TB or return to the main loop.
|
||||||
|
|
||||||
|
``goto_tb + exit_tb``
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
The translation code usually implements branching by performing the
|
||||||
|
following steps:
|
||||||
|
|
||||||
|
1. Call ``tcg_gen_goto_tb()`` passing a jump slot index (either 0 or 1)
|
||||||
|
as a parameter.
|
||||||
|
|
||||||
|
2. Emit TCG instructions to update the CPU state with any information
|
||||||
|
that has been assumed constant and is required by the main loop to
|
||||||
|
correctly locate and execute the next TB. For most guests, this is
|
||||||
|
just the PC of the branch destination, but others may store additional
|
||||||
|
data. The information updated in this step must be inferable from both
|
||||||
|
``cpu_get_tb_cpu_state()`` and ``cpu_restore_state()``.
|
||||||
|
|
||||||
|
3. Call ``tcg_gen_exit_tb()`` passing the address of the current TB and
|
||||||
|
the jump slot index again.
|
||||||
|
|
||||||
|
Step 1, ``tcg_gen_goto_tb()``, will emit a ``goto_tb`` TCG
|
||||||
|
instruction that later on gets translated to a jump to an address
|
||||||
|
associated with the specified jump slot. Initially, this is the address
|
||||||
|
of step 2's instructions, which update the CPU state information. Step 3,
|
||||||
|
``tcg_gen_exit_tb()``, exits from the current TB returning a tagged
|
||||||
|
pointer composed of the last executed TB’s address and the jump slot
|
||||||
|
index.
|
||||||
|
|
||||||
|
The first time this whole sequence is executed, step 1 simply jumps
|
||||||
|
to step 2. Then the CPU state information gets updated and we exit from
|
||||||
|
the current TB. As a result, the behavior is very similar to the less
|
||||||
|
optimized form described earlier in this section.
|
||||||
|
|
||||||
|
Next, the main loop looks for the next TB to execute using the
|
||||||
|
current CPU state information (creating the TB if it wasn’t already
|
||||||
|
available) and, before starting to execute the new TB’s instructions,
|
||||||
|
patches the previously executed TB by associating one of its jump
|
||||||
|
slots (the one specified in the call to ``tcg_gen_exit_tb()``) with the
|
||||||
|
address of the new TB.
|
||||||
|
|
||||||
|
The next time this previous TB is executed and we get to that same
|
||||||
|
``goto_tb`` step, it will already be patched (assuming the destination TB
|
||||||
|
is still in memory) and will jump directly to the first instruction of
|
||||||
|
the destination TB, without going back to the main loop.
|
||||||
|
|
||||||
|
For the ``goto_tb + exit_tb`` mechanism to be used, the following
|
||||||
|
conditions need to be satisfied:
|
||||||
|
|
||||||
|
* The change in CPU state must be constant, e.g., a direct branch and
|
||||||
|
not an indirect branch.
|
||||||
|
|
||||||
|
* The direct branch cannot cross a page boundary. Memory mappings
|
||||||
|
may change, causing the code at the destination address to change.
|
||||||
|
|
||||||
|
Note that, on step 3 (``tcg_gen_exit_tb()``), in addition to the
|
||||||
|
jump slot index, the address of the TB just executed is also returned.
|
||||||
|
This address corresponds to the TB that will be patched; it may be
|
||||||
|
different than the one that was directly executed from the main loop
|
||||||
|
if the latter had already been chained to other TBs.
|
||||||
|
|
||||||
Self-modifying code and translated code invalidation
|
Self-modifying code and translated code invalidation
|
||||||
----------------------------------------------------
|
----------------------------------------------------
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
specific_ss.add(when: 'CONFIG_TCG', if_true: files('softfloat.c'))
|
|
@ -4818,7 +4818,7 @@ float32 float32_exp2(float32 a, float_status *status)
|
||||||
|
|
||||||
float_raise(float_flag_inexact, status);
|
float_raise(float_flag_inexact, status);
|
||||||
|
|
||||||
float64_unpack_canonical(&xnp, float64_ln2, status);
|
float64_unpack_canonical(&tp, float64_ln2, status);
|
||||||
xp = *parts_mul(&xp, &tp, status);
|
xp = *parts_mul(&xp, &tp, status);
|
||||||
xnp = xp;
|
xnp = xp;
|
||||||
|
|
||||||
|
|
|
@ -512,6 +512,7 @@ void sigaction_invoke(struct sigaction *action,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int qemu_madvise(void *addr, size_t len, int advice);
|
int qemu_madvise(void *addr, size_t len, int advice);
|
||||||
|
int qemu_mprotect_rw(void *addr, size_t size);
|
||||||
int qemu_mprotect_rwx(void *addr, size_t size);
|
int qemu_mprotect_rwx(void *addr, size_t size);
|
||||||
int qemu_mprotect_none(void *addr, size_t size);
|
int qemu_mprotect_none(void *addr, size_t size);
|
||||||
|
|
||||||
|
|
|
@ -8,8 +8,6 @@
|
||||||
#ifndef SYSEMU_TCG_H
|
#ifndef SYSEMU_TCG_H
|
||||||
#define SYSEMU_TCG_H
|
#define SYSEMU_TCG_H
|
||||||
|
|
||||||
void tcg_exec_init(unsigned long tb_size, int splitwx);
|
|
||||||
|
|
||||||
#ifdef CONFIG_TCG
|
#ifdef CONFIG_TCG
|
||||||
extern bool tcg_allowed;
|
extern bool tcg_allowed;
|
||||||
#define tcg_enabled() (tcg_allowed)
|
#define tcg_enabled() (tcg_allowed)
|
||||||
|
|
|
@ -689,22 +689,12 @@ static inline bool temp_readonly(TCGTemp *ts)
|
||||||
return ts->kind >= TEMP_FIXED;
|
return ts->kind >= TEMP_FIXED;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern TCGContext tcg_init_ctx;
|
|
||||||
extern __thread TCGContext *tcg_ctx;
|
extern __thread TCGContext *tcg_ctx;
|
||||||
extern const void *tcg_code_gen_epilogue;
|
extern const void *tcg_code_gen_epilogue;
|
||||||
extern uintptr_t tcg_splitwx_diff;
|
extern uintptr_t tcg_splitwx_diff;
|
||||||
extern TCGv_env cpu_env;
|
extern TCGv_env cpu_env;
|
||||||
|
|
||||||
static inline bool in_code_gen_buffer(const void *p)
|
bool in_code_gen_buffer(const void *p);
|
||||||
{
|
|
||||||
const TCGContext *s = &tcg_init_ctx;
|
|
||||||
/*
|
|
||||||
* Much like it is valid to have a pointer to the byte past the
|
|
||||||
* end of an array (so long as you don't dereference it), allow
|
|
||||||
* a pointer to the byte past the end of the code gen buffer.
|
|
||||||
*/
|
|
||||||
return (size_t)(p - s->code_gen_buffer) <= s->code_gen_buffer_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_TCG
|
#ifdef CONFIG_DEBUG_TCG
|
||||||
const void *tcg_splitwx_to_rx(void *rw);
|
const void *tcg_splitwx_to_rx(void *rw);
|
||||||
|
@ -873,7 +863,6 @@ void *tcg_malloc_internal(TCGContext *s, int size);
|
||||||
void tcg_pool_reset(TCGContext *s);
|
void tcg_pool_reset(TCGContext *s);
|
||||||
TranslationBlock *tcg_tb_alloc(TCGContext *s);
|
TranslationBlock *tcg_tb_alloc(TCGContext *s);
|
||||||
|
|
||||||
void tcg_region_init(void);
|
|
||||||
void tb_destroy(TranslationBlock *tb);
|
void tb_destroy(TranslationBlock *tb);
|
||||||
void tcg_region_reset_all(void);
|
void tcg_region_reset_all(void);
|
||||||
|
|
||||||
|
@ -906,7 +895,7 @@ static inline void *tcg_malloc(int size)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void tcg_context_init(TCGContext *s);
|
void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus);
|
||||||
void tcg_register_thread(void);
|
void tcg_register_thread(void);
|
||||||
void tcg_prologue_init(TCGContext *s);
|
void tcg_prologue_init(TCGContext *s);
|
||||||
void tcg_func_start(TCGContext *s);
|
void tcg_func_start(TCGContext *s);
|
||||||
|
@ -1082,6 +1071,16 @@ void tcg_op_remove(TCGContext *s, TCGOp *op);
|
||||||
TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc);
|
TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc);
|
||||||
TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc);
|
TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* tcg_remove_ops_after:
|
||||||
|
* @op: target operation
|
||||||
|
*
|
||||||
|
* Discard any opcodes emitted since @op. Expected usage is to save
|
||||||
|
* a starting point with tcg_last_op(), speculatively emit opcodes,
|
||||||
|
* then decide whether or not to keep those opcodes after the fact.
|
||||||
|
*/
|
||||||
|
void tcg_remove_ops_after(TCGOp *op);
|
||||||
|
|
||||||
void tcg_optimize(TCGContext *s);
|
void tcg_optimize(TCGContext *s);
|
||||||
|
|
||||||
/* Allocate a new temporary and initialize it with a constant. */
|
/* Allocate a new temporary and initialize it with a constant. */
|
||||||
|
@ -1096,7 +1095,8 @@ TCGv_vec tcg_const_ones_vec_matching(TCGv_vec);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Locate or create a read-only temporary that is a constant.
|
* Locate or create a read-only temporary that is a constant.
|
||||||
* This kind of temporary need not and should not be freed.
|
* This kind of temporary need not be freed, but for convenience
|
||||||
|
* will be silently ignored by tcg_temp_free_*.
|
||||||
*/
|
*/
|
||||||
TCGTemp *tcg_constant_internal(TCGType type, int64_t val);
|
TCGTemp *tcg_constant_internal(TCGType type, int64_t val);
|
||||||
|
|
||||||
|
|
|
@ -868,7 +868,6 @@ int main(int argc, char **argv, char **envp)
|
||||||
generating the prologue until now so that the prologue can take
|
generating the prologue until now so that the prologue can take
|
||||||
the real value of GUEST_BASE into account. */
|
the real value of GUEST_BASE into account. */
|
||||||
tcg_prologue_init(tcg_ctx);
|
tcg_prologue_init(tcg_ctx);
|
||||||
tcg_region_init();
|
|
||||||
|
|
||||||
target_cpu_copy_regs(env, regs);
|
target_cpu_copy_regs(env, regs);
|
||||||
|
|
||||||
|
|
12
meson.build
12
meson.build
|
@ -1968,16 +1968,6 @@ subdir('softmmu')
|
||||||
|
|
||||||
common_ss.add(capstone)
|
common_ss.add(capstone)
|
||||||
specific_ss.add(files('cpu.c', 'disas.c', 'gdbstub.c'), capstone)
|
specific_ss.add(files('cpu.c', 'disas.c', 'gdbstub.c'), capstone)
|
||||||
specific_ss.add(when: 'CONFIG_TCG', if_true: files(
|
|
||||||
'fpu/softfloat.c',
|
|
||||||
'tcg/optimize.c',
|
|
||||||
'tcg/tcg-common.c',
|
|
||||||
'tcg/tcg-op-gvec.c',
|
|
||||||
'tcg/tcg-op-vec.c',
|
|
||||||
'tcg/tcg-op.c',
|
|
||||||
'tcg/tcg.c',
|
|
||||||
))
|
|
||||||
specific_ss.add(when: 'CONFIG_TCG_INTERPRETER', if_true: files('tcg/tci.c'))
|
|
||||||
|
|
||||||
# Work around a gcc bug/misfeature wherein constant propagation looks
|
# Work around a gcc bug/misfeature wherein constant propagation looks
|
||||||
# through an alias:
|
# through an alias:
|
||||||
|
@ -2007,6 +1997,8 @@ subdir('net')
|
||||||
subdir('replay')
|
subdir('replay')
|
||||||
subdir('semihosting')
|
subdir('semihosting')
|
||||||
subdir('hw')
|
subdir('hw')
|
||||||
|
subdir('tcg')
|
||||||
|
subdir('fpu')
|
||||||
subdir('accel')
|
subdir('accel')
|
||||||
subdir('plugins')
|
subdir('plugins')
|
||||||
subdir('bsd-user')
|
subdir('bsd-user')
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
|
|
||||||
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
||||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
|
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
|
||||||
|
#define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
|
||||||
#undef TCG_TARGET_STACK_GROWSUP
|
#undef TCG_TARGET_STACK_GROWSUP
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
|
|
|
@ -1984,7 +1984,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
|
||||||
static void tcg_out_epilogue(TCGContext *s);
|
static void tcg_out_epilogue(TCGContext *s);
|
||||||
|
|
||||||
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||||
const TCGArg *args, const int *const_args)
|
const TCGArg args[TCG_MAX_OP_ARGS],
|
||||||
|
const int const_args[TCG_MAX_OP_ARGS])
|
||||||
{
|
{
|
||||||
TCGArg a0, a1, a2, a3, a4, a5;
|
TCGArg a0, a1, a2, a3, a4, a5;
|
||||||
int c;
|
int c;
|
||||||
|
|
|
@ -60,6 +60,7 @@ extern int arm_arch;
|
||||||
#undef TCG_TARGET_STACK_GROWSUP
|
#undef TCG_TARGET_STACK_GROWSUP
|
||||||
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
||||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
|
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
|
||||||
|
#define MAX_CODE_GEN_BUFFER_SIZE UINT32_MAX
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
TCG_REG_R0 = 0,
|
TCG_REG_R0 = 0,
|
||||||
|
|
|
@ -31,9 +31,11 @@
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
# define TCG_TARGET_REG_BITS 64
|
# define TCG_TARGET_REG_BITS 64
|
||||||
# define TCG_TARGET_NB_REGS 32
|
# define TCG_TARGET_NB_REGS 32
|
||||||
|
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
|
||||||
#else
|
#else
|
||||||
# define TCG_TARGET_REG_BITS 32
|
# define TCG_TARGET_REG_BITS 32
|
||||||
# define TCG_TARGET_NB_REGS 24
|
# define TCG_TARGET_NB_REGS 24
|
||||||
|
# define MAX_CODE_GEN_BUFFER_SIZE UINT32_MAX
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
tcg_ss = ss.source_set()
|
||||||
|
|
||||||
|
tcg_ss.add(files(
|
||||||
|
'optimize.c',
|
||||||
|
'region.c',
|
||||||
|
'tcg.c',
|
||||||
|
'tcg-common.c',
|
||||||
|
'tcg-op.c',
|
||||||
|
'tcg-op-gvec.c',
|
||||||
|
'tcg-op-vec.c',
|
||||||
|
))
|
||||||
|
tcg_ss.add(when: 'CONFIG_TCG_INTERPRETER', if_true: files('tci.c'))
|
||||||
|
|
||||||
|
specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
|
|
@ -39,6 +39,12 @@
|
||||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
|
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
|
||||||
#define TCG_TARGET_NB_REGS 32
|
#define TCG_TARGET_NB_REGS 32
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We have a 256MB branch region, but leave room to make sure the
|
||||||
|
* main executable is also within that region.
|
||||||
|
*/
|
||||||
|
#define MAX_CODE_GEN_BUFFER_SIZE (128 * MiB)
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
TCG_REG_ZERO = 0,
|
TCG_REG_ZERO = 0,
|
||||||
TCG_REG_AT,
|
TCG_REG_AT,
|
||||||
|
|
|
@ -27,8 +27,10 @@
|
||||||
|
|
||||||
#ifdef _ARCH_PPC64
|
#ifdef _ARCH_PPC64
|
||||||
# define TCG_TARGET_REG_BITS 64
|
# define TCG_TARGET_REG_BITS 64
|
||||||
|
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
|
||||||
#else
|
#else
|
||||||
# define TCG_TARGET_REG_BITS 32
|
# define TCG_TARGET_REG_BITS 32
|
||||||
|
# define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define TCG_TARGET_NB_REGS 64
|
#define TCG_TARGET_NB_REGS 64
|
||||||
|
|
|
@ -0,0 +1,999 @@
|
||||||
|
/*
|
||||||
|
* Memory region management for Tiny Code Generator for QEMU
|
||||||
|
*
|
||||||
|
* Copyright (c) 2008 Fabrice Bellard
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "qemu/osdep.h"
|
||||||
|
#include "qemu/units.h"
|
||||||
|
#include "qapi/error.h"
|
||||||
|
#include "exec/exec-all.h"
|
||||||
|
#include "tcg/tcg.h"
|
||||||
|
#include "tcg-internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
struct tcg_region_tree {
|
||||||
|
QemuMutex lock;
|
||||||
|
GTree *tree;
|
||||||
|
/* padding to avoid false sharing is computed at run-time */
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We divide code_gen_buffer into equally-sized "regions" that TCG threads
|
||||||
|
* dynamically allocate from as demand dictates. Given appropriate region
|
||||||
|
* sizing, this minimizes flushes even when some TCG threads generate a lot
|
||||||
|
* more code than others.
|
||||||
|
*/
|
||||||
|
struct tcg_region_state {
|
||||||
|
QemuMutex lock;
|
||||||
|
|
||||||
|
/* fields set at init time */
|
||||||
|
void *start_aligned;
|
||||||
|
void *after_prologue;
|
||||||
|
size_t n;
|
||||||
|
size_t size; /* size of one region */
|
||||||
|
size_t stride; /* .size + guard size */
|
||||||
|
size_t total_size; /* size of entire buffer, >= n * stride */
|
||||||
|
|
||||||
|
/* fields protected by the lock */
|
||||||
|
size_t current; /* current region index */
|
||||||
|
size_t agg_size_full; /* aggregate size of full regions */
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct tcg_region_state region;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is an array of struct tcg_region_tree's, with padding.
|
||||||
|
* We use void * to simplify the computation of region_trees[i]; each
|
||||||
|
* struct is found every tree_size bytes.
|
||||||
|
*/
|
||||||
|
static void *region_trees;
|
||||||
|
static size_t tree_size;
|
||||||
|
|
||||||
|
bool in_code_gen_buffer(const void *p)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Much like it is valid to have a pointer to the byte past the
|
||||||
|
* end of an array (so long as you don't dereference it), allow
|
||||||
|
* a pointer to the byte past the end of the code gen buffer.
|
||||||
|
*/
|
||||||
|
return (size_t)(p - region.start_aligned) <= region.total_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_DEBUG_TCG
|
||||||
|
const void *tcg_splitwx_to_rx(void *rw)
|
||||||
|
{
|
||||||
|
/* Pass NULL pointers unchanged. */
|
||||||
|
if (rw) {
|
||||||
|
g_assert(in_code_gen_buffer(rw));
|
||||||
|
rw += tcg_splitwx_diff;
|
||||||
|
}
|
||||||
|
return rw;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *tcg_splitwx_to_rw(const void *rx)
|
||||||
|
{
|
||||||
|
/* Pass NULL pointers unchanged. */
|
||||||
|
if (rx) {
|
||||||
|
rx -= tcg_splitwx_diff;
|
||||||
|
/* Assert that we end with a pointer in the rw region. */
|
||||||
|
g_assert(in_code_gen_buffer(rx));
|
||||||
|
}
|
||||||
|
return (void *)rx;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_DEBUG_TCG */
|
||||||
|
|
||||||
|
/* compare a pointer @ptr and a tb_tc @s */
|
||||||
|
static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
|
||||||
|
{
|
||||||
|
if (ptr >= s->ptr + s->size) {
|
||||||
|
return 1;
|
||||||
|
} else if (ptr < s->ptr) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
|
||||||
|
{
|
||||||
|
const struct tb_tc *a = ap;
|
||||||
|
const struct tb_tc *b = bp;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When both sizes are set, we know this isn't a lookup.
|
||||||
|
* This is the most likely case: every TB must be inserted; lookups
|
||||||
|
* are a lot less frequent.
|
||||||
|
*/
|
||||||
|
if (likely(a->size && b->size)) {
|
||||||
|
if (a->ptr > b->ptr) {
|
||||||
|
return 1;
|
||||||
|
} else if (a->ptr < b->ptr) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
/* a->ptr == b->ptr should happen only on deletions */
|
||||||
|
g_assert(a->size == b->size);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* All lookups have either .size field set to 0.
|
||||||
|
* From the glib sources we see that @ap is always the lookup key. However
|
||||||
|
* the docs provide no guarantee, so we just mark this case as likely.
|
||||||
|
*/
|
||||||
|
if (likely(a->size == 0)) {
|
||||||
|
return ptr_cmp_tb_tc(a->ptr, b);
|
||||||
|
}
|
||||||
|
return ptr_cmp_tb_tc(b->ptr, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void tcg_region_trees_init(void)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
|
||||||
|
region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
|
||||||
|
for (i = 0; i < region.n; i++) {
|
||||||
|
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||||
|
|
||||||
|
qemu_mutex_init(&rt->lock);
|
||||||
|
rt->tree = g_tree_new(tb_tc_cmp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
|
||||||
|
{
|
||||||
|
size_t region_idx;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Like tcg_splitwx_to_rw, with no assert. The pc may come from
|
||||||
|
* a signal handler over which the caller has no control.
|
||||||
|
*/
|
||||||
|
if (!in_code_gen_buffer(p)) {
|
||||||
|
p -= tcg_splitwx_diff;
|
||||||
|
if (!in_code_gen_buffer(p)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (p < region.start_aligned) {
|
||||||
|
region_idx = 0;
|
||||||
|
} else {
|
||||||
|
ptrdiff_t offset = p - region.start_aligned;
|
||||||
|
|
||||||
|
if (offset > region.stride * (region.n - 1)) {
|
||||||
|
region_idx = region.n - 1;
|
||||||
|
} else {
|
||||||
|
region_idx = offset / region.stride;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return region_trees + region_idx * tree_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void tcg_tb_insert(TranslationBlock *tb)
|
||||||
|
{
|
||||||
|
struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
|
||||||
|
|
||||||
|
g_assert(rt != NULL);
|
||||||
|
qemu_mutex_lock(&rt->lock);
|
||||||
|
g_tree_insert(rt->tree, &tb->tc, tb);
|
||||||
|
qemu_mutex_unlock(&rt->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void tcg_tb_remove(TranslationBlock *tb)
|
||||||
|
{
|
||||||
|
struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
|
||||||
|
|
||||||
|
g_assert(rt != NULL);
|
||||||
|
qemu_mutex_lock(&rt->lock);
|
||||||
|
g_tree_remove(rt->tree, &tb->tc);
|
||||||
|
qemu_mutex_unlock(&rt->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find the TB 'tb' such that
|
||||||
|
* tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
|
||||||
|
* Return NULL if not found.
|
||||||
|
*/
|
||||||
|
TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
|
||||||
|
{
|
||||||
|
struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
|
||||||
|
TranslationBlock *tb;
|
||||||
|
struct tb_tc s = { .ptr = (void *)tc_ptr };
|
||||||
|
|
||||||
|
if (rt == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
qemu_mutex_lock(&rt->lock);
|
||||||
|
tb = g_tree_lookup(rt->tree, &s);
|
||||||
|
qemu_mutex_unlock(&rt->lock);
|
||||||
|
return tb;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void tcg_region_tree_lock_all(void)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < region.n; i++) {
|
||||||
|
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||||
|
|
||||||
|
qemu_mutex_lock(&rt->lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void tcg_region_tree_unlock_all(void)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < region.n; i++) {
|
||||||
|
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||||
|
|
||||||
|
qemu_mutex_unlock(&rt->lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
tcg_region_tree_lock_all();
|
||||||
|
for (i = 0; i < region.n; i++) {
|
||||||
|
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||||
|
|
||||||
|
g_tree_foreach(rt->tree, func, user_data);
|
||||||
|
}
|
||||||
|
tcg_region_tree_unlock_all();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t tcg_nb_tbs(void)
|
||||||
|
{
|
||||||
|
size_t nb_tbs = 0;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
tcg_region_tree_lock_all();
|
||||||
|
for (i = 0; i < region.n; i++) {
|
||||||
|
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||||
|
|
||||||
|
nb_tbs += g_tree_nnodes(rt->tree);
|
||||||
|
}
|
||||||
|
tcg_region_tree_unlock_all();
|
||||||
|
return nb_tbs;
|
||||||
|
}
|
||||||
|
|
||||||
|
static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
|
||||||
|
{
|
||||||
|
TranslationBlock *tb = v;
|
||||||
|
|
||||||
|
tb_destroy(tb);
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void tcg_region_tree_reset_all(void)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
tcg_region_tree_lock_all();
|
||||||
|
for (i = 0; i < region.n; i++) {
|
||||||
|
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||||
|
|
||||||
|
g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
|
||||||
|
/* Increment the refcount first so that destroy acts as a reset */
|
||||||
|
g_tree_ref(rt->tree);
|
||||||
|
g_tree_destroy(rt->tree);
|
||||||
|
}
|
||||||
|
tcg_region_tree_unlock_all();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
|
||||||
|
{
|
||||||
|
void *start, *end;
|
||||||
|
|
||||||
|
start = region.start_aligned + curr_region * region.stride;
|
||||||
|
end = start + region.size;
|
||||||
|
|
||||||
|
if (curr_region == 0) {
|
||||||
|
start = region.after_prologue;
|
||||||
|
}
|
||||||
|
/* The final region may have a few extra pages due to earlier rounding. */
|
||||||
|
if (curr_region == region.n - 1) {
|
||||||
|
end = region.start_aligned + region.total_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
*pstart = start;
|
||||||
|
*pend = end;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void tcg_region_assign(TCGContext *s, size_t curr_region)
|
||||||
|
{
|
||||||
|
void *start, *end;
|
||||||
|
|
||||||
|
tcg_region_bounds(curr_region, &start, &end);
|
||||||
|
|
||||||
|
s->code_gen_buffer = start;
|
||||||
|
s->code_gen_ptr = start;
|
||||||
|
s->code_gen_buffer_size = end - start;
|
||||||
|
s->code_gen_highwater = end - TCG_HIGHWATER;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool tcg_region_alloc__locked(TCGContext *s)
|
||||||
|
{
|
||||||
|
if (region.current == region.n) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
tcg_region_assign(s, region.current);
|
||||||
|
region.current++;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Request a new region once the one in use has filled up.
|
||||||
|
* Returns true on error.
|
||||||
|
*/
|
||||||
|
bool tcg_region_alloc(TCGContext *s)
|
||||||
|
{
|
||||||
|
bool err;
|
||||||
|
/* read the region size now; alloc__locked will overwrite it on success */
|
||||||
|
size_t size_full = s->code_gen_buffer_size;
|
||||||
|
|
||||||
|
qemu_mutex_lock(®ion.lock);
|
||||||
|
err = tcg_region_alloc__locked(s);
|
||||||
|
if (!err) {
|
||||||
|
region.agg_size_full += size_full - TCG_HIGHWATER;
|
||||||
|
}
|
||||||
|
qemu_mutex_unlock(®ion.lock);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Perform a context's first region allocation.
|
||||||
|
* This function does _not_ increment region.agg_size_full.
|
||||||
|
*/
|
||||||
|
static void tcg_region_initial_alloc__locked(TCGContext *s)
|
||||||
|
{
|
||||||
|
bool err = tcg_region_alloc__locked(s);
|
||||||
|
g_assert(!err);
|
||||||
|
}
|
||||||
|
|
||||||
|
void tcg_region_initial_alloc(TCGContext *s)
|
||||||
|
{
|
||||||
|
qemu_mutex_lock(®ion.lock);
|
||||||
|
tcg_region_initial_alloc__locked(s);
|
||||||
|
qemu_mutex_unlock(®ion.lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Call from a safe-work context */
|
||||||
|
void tcg_region_reset_all(void)
|
||||||
|
{
|
||||||
|
unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
|
||||||
|
unsigned int i;
|
||||||
|
|
||||||
|
qemu_mutex_lock(®ion.lock);
|
||||||
|
region.current = 0;
|
||||||
|
region.agg_size_full = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < n_ctxs; i++) {
|
||||||
|
TCGContext *s = qatomic_read(&tcg_ctxs[i]);
|
||||||
|
tcg_region_initial_alloc__locked(s);
|
||||||
|
}
|
||||||
|
qemu_mutex_unlock(®ion.lock);
|
||||||
|
|
||||||
|
tcg_region_tree_reset_all();
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_USER_ONLY
|
||||||
|
return 1;
|
||||||
|
#else
|
||||||
|
size_t n_regions;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It is likely that some vCPUs will translate more code than others,
|
||||||
|
* so we first try to set more regions than max_cpus, with those regions
|
||||||
|
* being of reasonable size. If that's not possible we make do by evenly
|
||||||
|
* dividing the code_gen_buffer among the vCPUs.
|
||||||
|
*/
|
||||||
|
/* Use a single region if all we have is one vCPU thread */
|
||||||
|
if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Try to have more regions than max_cpus, with each region being >= 2 MB.
|
||||||
|
* If we can't, then just allocate one region per vCPU thread.
|
||||||
|
*/
|
||||||
|
n_regions = tb_size / (2 * MiB);
|
||||||
|
if (n_regions <= max_cpus) {
|
||||||
|
return max_cpus;
|
||||||
|
}
|
||||||
|
return MIN(n_regions, max_cpus * 8);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Minimum size of the code gen buffer. This number is randomly chosen,
|
||||||
|
* but not so small that we can't have a fair number of TB's live.
|
||||||
|
*
|
||||||
|
* Maximum size, MAX_CODE_GEN_BUFFER_SIZE, is defined in tcg-target.h.
|
||||||
|
* Unless otherwise indicated, this is constrained by the range of
|
||||||
|
* direct branches on the host cpu, as used by the TCG implementation
|
||||||
|
* of goto_tb.
|
||||||
|
*/
|
||||||
|
#define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB)
|
||||||
|
|
||||||
|
#if TCG_TARGET_REG_BITS == 32
|
||||||
|
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
|
||||||
|
#ifdef CONFIG_USER_ONLY
|
||||||
|
/*
|
||||||
|
* For user mode on smaller 32 bit systems we may run into trouble
|
||||||
|
* allocating big chunks of data in the right place. On these systems
|
||||||
|
* we utilise a static code generation buffer directly in the binary.
|
||||||
|
*/
|
||||||
|
#define USE_STATIC_CODE_GEN_BUFFER
|
||||||
|
#endif
|
||||||
|
#else /* TCG_TARGET_REG_BITS == 64 */
|
||||||
|
#ifdef CONFIG_USER_ONLY
|
||||||
|
/*
|
||||||
|
* As user-mode emulation typically means running multiple instances
|
||||||
|
* of the translator don't go too nuts with our default code gen
|
||||||
|
* buffer lest we make things too hard for the OS.
|
||||||
|
*/
|
||||||
|
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
|
||||||
|
#else
|
||||||
|
/*
|
||||||
|
* We expect most system emulation to run one or two guests per host.
|
||||||
|
* Users running large scale system emulation may want to tweak their
|
||||||
|
* runtime setup via the tb-size control on the command line.
|
||||||
|
*/
|
||||||
|
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define DEFAULT_CODE_GEN_BUFFER_SIZE \
|
||||||
|
(DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
|
||||||
|
? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
|
||||||
|
|
||||||
|
#ifdef __mips__
|
||||||
|
/*
|
||||||
|
* In order to use J and JAL within the code_gen_buffer, we require
|
||||||
|
* that the buffer not cross a 256MB boundary.
|
||||||
|
*/
|
||||||
|
static inline bool cross_256mb(void *addr, size_t size)
|
||||||
|
{
|
||||||
|
return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We weren't able to allocate a buffer without crossing that boundary,
|
||||||
|
* so make do with the larger portion of the buffer that doesn't cross.
|
||||||
|
* Returns the new base and size of the buffer in *obuf and *osize.
|
||||||
|
*/
|
||||||
|
static inline void split_cross_256mb(void **obuf, size_t *osize,
|
||||||
|
void *buf1, size_t size1)
|
||||||
|
{
|
||||||
|
void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful);
|
||||||
|
size_t size2 = buf1 + size1 - buf2;
|
||||||
|
|
||||||
|
size1 = buf2 - buf1;
|
||||||
|
if (size1 < size2) {
|
||||||
|
size1 = size2;
|
||||||
|
buf1 = buf2;
|
||||||
|
}
|
||||||
|
|
||||||
|
*obuf = buf1;
|
||||||
|
*osize = size1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef USE_STATIC_CODE_GEN_BUFFER
|
||||||
|
static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
|
||||||
|
__attribute__((aligned(CODE_GEN_ALIGN)));
|
||||||
|
|
||||||
|
static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
|
||||||
|
{
|
||||||
|
void *buf, *end;
|
||||||
|
size_t size;
|
||||||
|
|
||||||
|
if (splitwx > 0) {
|
||||||
|
error_setg(errp, "jit split-wx not supported");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* page-align the beginning and end of the buffer */
|
||||||
|
buf = static_code_gen_buffer;
|
||||||
|
end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
|
||||||
|
buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
|
||||||
|
end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
|
||||||
|
|
||||||
|
size = end - buf;
|
||||||
|
|
||||||
|
/* Honor a command-line option limiting the size of the buffer. */
|
||||||
|
if (size > tb_size) {
|
||||||
|
size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __mips__
|
||||||
|
if (cross_256mb(buf, size)) {
|
||||||
|
split_cross_256mb(&buf, &size, buf, size);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
region.start_aligned = buf;
|
||||||
|
region.total_size = size;
|
||||||
|
|
||||||
|
return PROT_READ | PROT_WRITE;
|
||||||
|
}
|
||||||
|
#elif defined(_WIN32)
|
||||||
|
static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
|
||||||
|
{
|
||||||
|
void *buf;
|
||||||
|
|
||||||
|
if (splitwx > 0) {
|
||||||
|
error_setg(errp, "jit split-wx not supported");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
|
||||||
|
PAGE_EXECUTE_READWRITE);
|
||||||
|
if (buf == NULL) {
|
||||||
|
error_setg_win32(errp, GetLastError(),
|
||||||
|
"allocate %zu bytes for jit buffer", size);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
region.start_aligned = buf;
|
||||||
|
region.total_size = size;
|
||||||
|
|
||||||
|
return PAGE_READ | PAGE_WRITE | PAGE_EXEC;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static int alloc_code_gen_buffer_anon(size_t size, int prot,
|
||||||
|
int flags, Error **errp)
|
||||||
|
{
|
||||||
|
void *buf;
|
||||||
|
|
||||||
|
buf = mmap(NULL, size, prot, flags, -1, 0);
|
||||||
|
if (buf == MAP_FAILED) {
|
||||||
|
error_setg_errno(errp, errno,
|
||||||
|
"allocate %zu bytes for jit buffer", size);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __mips__
|
||||||
|
if (cross_256mb(buf, size)) {
|
||||||
|
/*
|
||||||
|
* Try again, with the original still mapped, to avoid re-acquiring
|
||||||
|
* the same 256mb crossing.
|
||||||
|
*/
|
||||||
|
size_t size2;
|
||||||
|
void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
|
||||||
|
switch ((int)(buf2 != MAP_FAILED)) {
|
||||||
|
case 1:
|
||||||
|
if (!cross_256mb(buf2, size)) {
|
||||||
|
/* Success! Use the new buffer. */
|
||||||
|
munmap(buf, size);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* Failure. Work with what we had. */
|
||||||
|
munmap(buf2, size);
|
||||||
|
/* fallthru */
|
||||||
|
default:
|
||||||
|
/* Split the original buffer. Free the smaller half. */
|
||||||
|
split_cross_256mb(&buf2, &size2, buf, size);
|
||||||
|
if (buf == buf2) {
|
||||||
|
munmap(buf + size2, size - size2);
|
||||||
|
} else {
|
||||||
|
munmap(buf, size - size2);
|
||||||
|
}
|
||||||
|
size = size2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
buf = buf2;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
region.start_aligned = buf;
|
||||||
|
region.total_size = size;
|
||||||
|
return prot;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef CONFIG_TCG_INTERPRETER
|
||||||
|
#ifdef CONFIG_POSIX
|
||||||
|
#include "qemu/memfd.h"
|
||||||
|
|
||||||
|
static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
|
||||||
|
{
|
||||||
|
void *buf_rw = NULL, *buf_rx = MAP_FAILED;
|
||||||
|
int fd = -1;
|
||||||
|
|
||||||
|
#ifdef __mips__
|
||||||
|
/* Find space for the RX mapping, vs the 256MiB regions. */
|
||||||
|
if (alloc_code_gen_buffer_anon(size, PROT_NONE,
|
||||||
|
MAP_PRIVATE | MAP_ANONYMOUS |
|
||||||
|
MAP_NORESERVE, errp) < 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
/* The size of the mapping may have been adjusted. */
|
||||||
|
buf_rx = region.start_aligned;
|
||||||
|
size = region.total_size;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
|
||||||
|
if (buf_rw == NULL) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __mips__
|
||||||
|
void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC,
|
||||||
|
MAP_SHARED | MAP_FIXED, fd, 0);
|
||||||
|
if (tmp != buf_rx) {
|
||||||
|
goto fail_rx;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
|
||||||
|
if (buf_rx == MAP_FAILED) {
|
||||||
|
goto fail_rx;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
close(fd);
|
||||||
|
region.start_aligned = buf_rw;
|
||||||
|
region.total_size = size;
|
||||||
|
tcg_splitwx_diff = buf_rx - buf_rw;
|
||||||
|
|
||||||
|
return PROT_READ | PROT_WRITE;
|
||||||
|
|
||||||
|
fail_rx:
|
||||||
|
error_setg_errno(errp, errno, "failed to map shared memory for execute");
|
||||||
|
fail:
|
||||||
|
if (buf_rx != MAP_FAILED) {
|
||||||
|
munmap(buf_rx, size);
|
||||||
|
}
|
||||||
|
if (buf_rw) {
|
||||||
|
munmap(buf_rw, size);
|
||||||
|
}
|
||||||
|
if (fd >= 0) {
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_POSIX */
|
||||||
|
|
||||||
|
#ifdef CONFIG_DARWIN
|
||||||
|
#include <mach/mach.h>
|
||||||
|
|
||||||
|
extern kern_return_t mach_vm_remap(vm_map_t target_task,
|
||||||
|
mach_vm_address_t *target_address,
|
||||||
|
mach_vm_size_t size,
|
||||||
|
mach_vm_offset_t mask,
|
||||||
|
int flags,
|
||||||
|
vm_map_t src_task,
|
||||||
|
mach_vm_address_t src_address,
|
||||||
|
boolean_t copy,
|
||||||
|
vm_prot_t *cur_protection,
|
||||||
|
vm_prot_t *max_protection,
|
||||||
|
vm_inherit_t inheritance);
|
||||||
|
|
||||||
|
static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
|
||||||
|
{
|
||||||
|
kern_return_t ret;
|
||||||
|
mach_vm_address_t buf_rw, buf_rx;
|
||||||
|
vm_prot_t cur_prot, max_prot;
|
||||||
|
|
||||||
|
/* Map the read-write portion via normal anon memory. */
|
||||||
|
if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
|
||||||
|
MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf_rw = (mach_vm_address_t)region.start_aligned;
|
||||||
|
buf_rx = 0;
|
||||||
|
ret = mach_vm_remap(mach_task_self(),
|
||||||
|
&buf_rx,
|
||||||
|
size,
|
||||||
|
0,
|
||||||
|
VM_FLAGS_ANYWHERE,
|
||||||
|
mach_task_self(),
|
||||||
|
buf_rw,
|
||||||
|
false,
|
||||||
|
&cur_prot,
|
||||||
|
&max_prot,
|
||||||
|
VM_INHERIT_NONE);
|
||||||
|
if (ret != KERN_SUCCESS) {
|
||||||
|
/* TODO: Convert "ret" to a human readable error message. */
|
||||||
|
error_setg(errp, "vm_remap for jit splitwx failed");
|
||||||
|
munmap((void *)buf_rw, size);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
|
||||||
|
error_setg_errno(errp, errno, "mprotect for jit splitwx");
|
||||||
|
munmap((void *)buf_rx, size);
|
||||||
|
munmap((void *)buf_rw, size);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
tcg_splitwx_diff = buf_rx - buf_rw;
|
||||||
|
return PROT_READ | PROT_WRITE;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_DARWIN */
|
||||||
|
#endif /* CONFIG_TCG_INTERPRETER */
|
||||||
|
|
||||||
|
static int alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
|
||||||
|
{
|
||||||
|
#ifndef CONFIG_TCG_INTERPRETER
|
||||||
|
# ifdef CONFIG_DARWIN
|
||||||
|
return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
|
||||||
|
# endif
|
||||||
|
# ifdef CONFIG_POSIX
|
||||||
|
return alloc_code_gen_buffer_splitwx_memfd(size, errp);
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
error_setg(errp, "jit split-wx not supported");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
|
||||||
|
{
|
||||||
|
ERRP_GUARD();
|
||||||
|
int prot, flags;
|
||||||
|
|
||||||
|
if (splitwx) {
|
||||||
|
prot = alloc_code_gen_buffer_splitwx(size, errp);
|
||||||
|
if (prot >= 0) {
|
||||||
|
return prot;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* If splitwx force-on (1), fail;
|
||||||
|
* if splitwx default-on (-1), fall through to splitwx off.
|
||||||
|
*/
|
||||||
|
if (splitwx > 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
error_free_or_abort(errp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
|
||||||
|
* rejects a permission change from RWX -> NONE when reserving the
|
||||||
|
* guard pages later. We can go the other way with the same number
|
||||||
|
* of syscalls, so always begin with PROT_NONE.
|
||||||
|
*/
|
||||||
|
prot = PROT_NONE;
|
||||||
|
flags = MAP_PRIVATE | MAP_ANONYMOUS;
|
||||||
|
#ifdef CONFIG_DARWIN
|
||||||
|
/* Applicable to both iOS and macOS (Apple Silicon). */
|
||||||
|
if (!splitwx) {
|
||||||
|
flags |= MAP_JIT;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return alloc_code_gen_buffer_anon(size, prot, flags, errp);
|
||||||
|
}
|
||||||
|
#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initializes region partitioning.
|
||||||
|
*
|
||||||
|
* Called at init time from the parent thread (i.e. the one calling
|
||||||
|
* tcg_context_init), after the target's TCG globals have been set.
|
||||||
|
*
|
||||||
|
* Region partitioning works by splitting code_gen_buffer into separate regions,
|
||||||
|
* and then assigning regions to TCG threads so that the threads can translate
|
||||||
|
* code in parallel without synchronization.
|
||||||
|
*
|
||||||
|
* In softmmu the number of TCG threads is bounded by max_cpus, so we use at
|
||||||
|
* least max_cpus regions in MTTCG. In !MTTCG we use a single region.
|
||||||
|
* Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
|
||||||
|
* must have been parsed before calling this function, since it calls
|
||||||
|
* qemu_tcg_mttcg_enabled().
|
||||||
|
*
|
||||||
|
* In user-mode we use a single region. Having multiple regions in user-mode
|
||||||
|
* is not supported, because the number of vCPU threads (recall that each thread
|
||||||
|
* spawned by the guest corresponds to a vCPU thread) is only bounded by the
|
||||||
|
* OS, and usually this number is huge (tens of thousands is not uncommon).
|
||||||
|
* Thus, given this large bound on the number of vCPU threads and the fact
|
||||||
|
* that code_gen_buffer is allocated at compile-time, we cannot guarantee
|
||||||
|
* that the availability of at least one region per vCPU thread.
|
||||||
|
*
|
||||||
|
* However, this user-mode limitation is unlikely to be a significant problem
|
||||||
|
* in practice. Multi-threaded guests share most if not all of their translated
|
||||||
|
* code, which makes parallel code generation less appealing than in softmmu.
|
||||||
|
*/
|
||||||
|
void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
|
||||||
|
{
|
||||||
|
const size_t page_size = qemu_real_host_page_size;
|
||||||
|
size_t region_size;
|
||||||
|
int have_prot, need_prot;
|
||||||
|
|
||||||
|
/* Size the buffer. */
|
||||||
|
if (tb_size == 0) {
|
||||||
|
size_t phys_mem = qemu_get_host_physmem();
|
||||||
|
if (phys_mem == 0) {
|
||||||
|
tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
|
||||||
|
} else {
|
||||||
|
tb_size = QEMU_ALIGN_DOWN(phys_mem / 8, page_size);
|
||||||
|
tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, tb_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
|
||||||
|
tb_size = MIN_CODE_GEN_BUFFER_SIZE;
|
||||||
|
}
|
||||||
|
if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
|
||||||
|
tb_size = MAX_CODE_GEN_BUFFER_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
have_prot = alloc_code_gen_buffer(tb_size, splitwx, &error_fatal);
|
||||||
|
assert(have_prot >= 0);
|
||||||
|
|
||||||
|
/* Request large pages for the buffer and the splitwx. */
|
||||||
|
qemu_madvise(region.start_aligned, region.total_size, QEMU_MADV_HUGEPAGE);
|
||||||
|
if (tcg_splitwx_diff) {
|
||||||
|
qemu_madvise(region.start_aligned + tcg_splitwx_diff,
|
||||||
|
region.total_size, QEMU_MADV_HUGEPAGE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make region_size a multiple of page_size, using aligned as the start.
|
||||||
|
* As a result of this we might end up with a few extra pages at the end of
|
||||||
|
* the buffer; we will assign those to the last region.
|
||||||
|
*/
|
||||||
|
region.n = tcg_n_regions(tb_size, max_cpus);
|
||||||
|
region_size = tb_size / region.n;
|
||||||
|
region_size = QEMU_ALIGN_DOWN(region_size, page_size);
|
||||||
|
|
||||||
|
/* A region must have at least 2 pages; one code, one guard */
|
||||||
|
g_assert(region_size >= 2 * page_size);
|
||||||
|
region.stride = region_size;
|
||||||
|
|
||||||
|
/* Reserve space for guard pages. */
|
||||||
|
region.size = region_size - page_size;
|
||||||
|
region.total_size -= page_size;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The first region will be smaller than the others, via the prologue,
|
||||||
|
* which has yet to be allocated. For now, the first region begins at
|
||||||
|
* the page boundary.
|
||||||
|
*/
|
||||||
|
region.after_prologue = region.start_aligned;
|
||||||
|
|
||||||
|
/* init the region struct */
|
||||||
|
qemu_mutex_init(®ion.lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set guard pages in the rw buffer, as that's the one into which
|
||||||
|
* buffer overruns could occur. Do not set guard pages in the rx
|
||||||
|
* buffer -- let that one use hugepages throughout.
|
||||||
|
* Work with the page protections set up with the initial mapping.
|
||||||
|
*/
|
||||||
|
need_prot = PAGE_READ | PAGE_WRITE;
|
||||||
|
#ifndef CONFIG_TCG_INTERPRETER
|
||||||
|
if (tcg_splitwx_diff == 0) {
|
||||||
|
need_prot |= PAGE_EXEC;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for (size_t i = 0, n = region.n; i < n; i++) {
|
||||||
|
void *start, *end;
|
||||||
|
|
||||||
|
tcg_region_bounds(i, &start, &end);
|
||||||
|
if (have_prot != need_prot) {
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
if (need_prot == (PAGE_READ | PAGE_WRITE | PAGE_EXEC)) {
|
||||||
|
rc = qemu_mprotect_rwx(start, end - start);
|
||||||
|
} else if (need_prot == (PAGE_READ | PAGE_WRITE)) {
|
||||||
|
rc = qemu_mprotect_rw(start, end - start);
|
||||||
|
} else {
|
||||||
|
g_assert_not_reached();
|
||||||
|
}
|
||||||
|
if (rc) {
|
||||||
|
error_setg_errno(&error_fatal, errno,
|
||||||
|
"mprotect of jit buffer");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (have_prot != 0) {
|
||||||
|
/* Guard pages are nice for bug detection but are not essential. */
|
||||||
|
(void)qemu_mprotect_none(end, page_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tcg_region_trees_init();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Leave the initial context initialized to the first region.
|
||||||
|
* This will be the context into which we generate the prologue.
|
||||||
|
* It is also the only context for CONFIG_USER_ONLY.
|
||||||
|
*/
|
||||||
|
tcg_region_initial_alloc__locked(&tcg_init_ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
void tcg_region_prologue_set(TCGContext *s)
|
||||||
|
{
|
||||||
|
/* Deduct the prologue from the first region. */
|
||||||
|
g_assert(region.start_aligned == s->code_gen_buffer);
|
||||||
|
region.after_prologue = s->code_ptr;
|
||||||
|
|
||||||
|
/* Recompute boundaries of the first region. */
|
||||||
|
tcg_region_assign(s, 0);
|
||||||
|
|
||||||
|
/* Register the balance of the buffer with gdb. */
|
||||||
|
tcg_register_jit(tcg_splitwx_to_rx(region.after_prologue),
|
||||||
|
region.start_aligned + region.total_size -
|
||||||
|
region.after_prologue);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns the size (in bytes) of all translated code (i.e. from all regions)
|
||||||
|
* currently in the cache.
|
||||||
|
* See also: tcg_code_capacity()
|
||||||
|
* Do not confuse with tcg_current_code_size(); that one applies to a single
|
||||||
|
* TCG context.
|
||||||
|
*/
|
||||||
|
size_t tcg_code_size(void)
|
||||||
|
{
|
||||||
|
unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
|
||||||
|
unsigned int i;
|
||||||
|
size_t total;
|
||||||
|
|
||||||
|
qemu_mutex_lock(®ion.lock);
|
||||||
|
total = region.agg_size_full;
|
||||||
|
for (i = 0; i < n_ctxs; i++) {
|
||||||
|
const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
|
||||||
|
size_t size;
|
||||||
|
|
||||||
|
size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
|
||||||
|
g_assert(size <= s->code_gen_buffer_size);
|
||||||
|
total += size;
|
||||||
|
}
|
||||||
|
qemu_mutex_unlock(®ion.lock);
|
||||||
|
return total;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns the code capacity (in bytes) of the entire cache, i.e. including all
|
||||||
|
* regions.
|
||||||
|
* See also: tcg_code_size()
|
||||||
|
*/
|
||||||
|
size_t tcg_code_capacity(void)
|
||||||
|
{
|
||||||
|
size_t guard_size, capacity;
|
||||||
|
|
||||||
|
/* no need for synchronization; these variables are set at init time */
|
||||||
|
guard_size = region.stride - region.size;
|
||||||
|
capacity = region.total_size;
|
||||||
|
capacity -= (region.n - 1) * guard_size;
|
||||||
|
capacity -= region.n * TCG_HIGHWATER;
|
||||||
|
|
||||||
|
return capacity;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t tcg_tb_phys_invalidate_count(void)
|
||||||
|
{
|
||||||
|
unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
|
||||||
|
unsigned int i;
|
||||||
|
size_t total = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < n_ctxs; i++) {
|
||||||
|
const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
|
||||||
|
|
||||||
|
total += qatomic_read(&s->tb_phys_invalidate_count);
|
||||||
|
}
|
||||||
|
return total;
|
||||||
|
}
|
|
@ -34,6 +34,7 @@
|
||||||
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
||||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 20
|
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 20
|
||||||
#define TCG_TARGET_NB_REGS 32
|
#define TCG_TARGET_NB_REGS 32
|
||||||
|
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
TCG_REG_ZERO,
|
TCG_REG_ZERO,
|
||||||
|
|
|
@ -28,6 +28,9 @@
|
||||||
#define TCG_TARGET_INSN_UNIT_SIZE 2
|
#define TCG_TARGET_INSN_UNIT_SIZE 2
|
||||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19
|
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19
|
||||||
|
|
||||||
|
/* We have a +- 4GB range on the branches; leave some slop. */
|
||||||
|
#define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB)
|
||||||
|
|
||||||
typedef enum TCGReg {
|
typedef enum TCGReg {
|
||||||
TCG_REG_R0 = 0,
|
TCG_REG_R0 = 0,
|
||||||
TCG_REG_R1,
|
TCG_REG_R1,
|
||||||
|
|
|
@ -30,6 +30,7 @@
|
||||||
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
||||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
|
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
|
||||||
#define TCG_TARGET_NB_REGS 32
|
#define TCG_TARGET_NB_REGS 32
|
||||||
|
#define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
TCG_REG_G0 = 0,
|
TCG_REG_G0 = 0,
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
/*
|
||||||
|
* Internal declarations for Tiny Code Generator for QEMU
|
||||||
|
*
|
||||||
|
* Copyright (c) 2008 Fabrice Bellard
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TCG_INTERNAL_H
|
||||||
|
#define TCG_INTERNAL_H 1
|
||||||
|
|
||||||
|
#define TCG_HIGHWATER 1024
|
||||||
|
|
||||||
|
extern TCGContext tcg_init_ctx;
|
||||||
|
extern TCGContext **tcg_ctxs;
|
||||||
|
extern unsigned int tcg_cur_ctxs;
|
||||||
|
extern unsigned int tcg_max_ctxs;
|
||||||
|
|
||||||
|
void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus);
|
||||||
|
bool tcg_region_alloc(TCGContext *s);
|
||||||
|
void tcg_region_initial_alloc(TCGContext *s);
|
||||||
|
void tcg_region_prologue_set(TCGContext *s);
|
||||||
|
|
||||||
|
#endif /* TCG_INTERNAL_H */
|
649
tcg/tcg.c
649
tcg/tcg.c
|
@ -43,11 +43,6 @@
|
||||||
#define NO_CPU_IO_DEFS
|
#define NO_CPU_IO_DEFS
|
||||||
|
|
||||||
#include "exec/exec-all.h"
|
#include "exec/exec-all.h"
|
||||||
|
|
||||||
#if !defined(CONFIG_USER_ONLY)
|
|
||||||
#include "hw/boards.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "tcg/tcg-op.h"
|
#include "tcg/tcg-op.h"
|
||||||
|
|
||||||
#if UINTPTR_MAX == UINT32_MAX
|
#if UINTPTR_MAX == UINT32_MAX
|
||||||
|
@ -63,6 +58,7 @@
|
||||||
|
|
||||||
#include "elf.h"
|
#include "elf.h"
|
||||||
#include "exec/log.h"
|
#include "exec/log.h"
|
||||||
|
#include "tcg-internal.h"
|
||||||
|
|
||||||
/* Forward declarations for functions declared in tcg-target.c.inc and
|
/* Forward declarations for functions declared in tcg-target.c.inc and
|
||||||
used here. */
|
used here. */
|
||||||
|
@ -153,10 +149,12 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
|
||||||
static int tcg_out_ldst_finalize(TCGContext *s);
|
static int tcg_out_ldst_finalize(TCGContext *s);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define TCG_HIGHWATER 1024
|
TCGContext tcg_init_ctx;
|
||||||
|
__thread TCGContext *tcg_ctx;
|
||||||
|
|
||||||
static TCGContext **tcg_ctxs;
|
TCGContext **tcg_ctxs;
|
||||||
static unsigned int n_tcg_ctxs;
|
unsigned int tcg_cur_ctxs;
|
||||||
|
unsigned int tcg_max_ctxs;
|
||||||
TCGv_env cpu_env = 0;
|
TCGv_env cpu_env = 0;
|
||||||
const void *tcg_code_gen_epilogue;
|
const void *tcg_code_gen_epilogue;
|
||||||
uintptr_t tcg_splitwx_diff;
|
uintptr_t tcg_splitwx_diff;
|
||||||
|
@ -165,42 +163,6 @@ uintptr_t tcg_splitwx_diff;
|
||||||
tcg_prologue_fn *tcg_qemu_tb_exec;
|
tcg_prologue_fn *tcg_qemu_tb_exec;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct tcg_region_tree {
|
|
||||||
QemuMutex lock;
|
|
||||||
GTree *tree;
|
|
||||||
/* padding to avoid false sharing is computed at run-time */
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We divide code_gen_buffer into equally-sized "regions" that TCG threads
|
|
||||||
* dynamically allocate from as demand dictates. Given appropriate region
|
|
||||||
* sizing, this minimizes flushes even when some TCG threads generate a lot
|
|
||||||
* more code than others.
|
|
||||||
*/
|
|
||||||
struct tcg_region_state {
|
|
||||||
QemuMutex lock;
|
|
||||||
|
|
||||||
/* fields set at init time */
|
|
||||||
void *start;
|
|
||||||
void *start_aligned;
|
|
||||||
void *end;
|
|
||||||
size_t n;
|
|
||||||
size_t size; /* size of one region */
|
|
||||||
size_t stride; /* .size + guard size */
|
|
||||||
|
|
||||||
/* fields protected by the lock */
|
|
||||||
size_t current; /* current region index */
|
|
||||||
size_t agg_size_full; /* aggregate size of full regions */
|
|
||||||
};
|
|
||||||
|
|
||||||
static struct tcg_region_state region;
|
|
||||||
/*
|
|
||||||
* This is an array of struct tcg_region_tree's, with padding.
|
|
||||||
* We use void * to simplify the computation of region_trees[i]; each
|
|
||||||
* struct is found every tree_size bytes.
|
|
||||||
*/
|
|
||||||
static void *region_trees;
|
|
||||||
static size_t tree_size;
|
|
||||||
static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
|
static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
|
||||||
static TCGRegSet tcg_target_call_clobber_regs;
|
static TCGRegSet tcg_target_call_clobber_regs;
|
||||||
|
|
||||||
|
@ -457,456 +419,6 @@ static const TCGTargetOpDef constraint_sets[] = {
|
||||||
|
|
||||||
#include "tcg-target.c.inc"
|
#include "tcg-target.c.inc"
|
||||||
|
|
||||||
/* compare a pointer @ptr and a tb_tc @s */
|
|
||||||
static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
|
|
||||||
{
|
|
||||||
if (ptr >= s->ptr + s->size) {
|
|
||||||
return 1;
|
|
||||||
} else if (ptr < s->ptr) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
|
|
||||||
{
|
|
||||||
const struct tb_tc *a = ap;
|
|
||||||
const struct tb_tc *b = bp;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* When both sizes are set, we know this isn't a lookup.
|
|
||||||
* This is the most likely case: every TB must be inserted; lookups
|
|
||||||
* are a lot less frequent.
|
|
||||||
*/
|
|
||||||
if (likely(a->size && b->size)) {
|
|
||||||
if (a->ptr > b->ptr) {
|
|
||||||
return 1;
|
|
||||||
} else if (a->ptr < b->ptr) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
/* a->ptr == b->ptr should happen only on deletions */
|
|
||||||
g_assert(a->size == b->size);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* All lookups have either .size field set to 0.
|
|
||||||
* From the glib sources we see that @ap is always the lookup key. However
|
|
||||||
* the docs provide no guarantee, so we just mark this case as likely.
|
|
||||||
*/
|
|
||||||
if (likely(a->size == 0)) {
|
|
||||||
return ptr_cmp_tb_tc(a->ptr, b);
|
|
||||||
}
|
|
||||||
return ptr_cmp_tb_tc(b->ptr, a);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void tcg_region_trees_init(void)
|
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
|
|
||||||
region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
|
|
||||||
for (i = 0; i < region.n; i++) {
|
|
||||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
|
||||||
|
|
||||||
qemu_mutex_init(&rt->lock);
|
|
||||||
rt->tree = g_tree_new(tb_tc_cmp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
|
|
||||||
{
|
|
||||||
size_t region_idx;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Like tcg_splitwx_to_rw, with no assert. The pc may come from
|
|
||||||
* a signal handler over which the caller has no control.
|
|
||||||
*/
|
|
||||||
if (!in_code_gen_buffer(p)) {
|
|
||||||
p -= tcg_splitwx_diff;
|
|
||||||
if (!in_code_gen_buffer(p)) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (p < region.start_aligned) {
|
|
||||||
region_idx = 0;
|
|
||||||
} else {
|
|
||||||
ptrdiff_t offset = p - region.start_aligned;
|
|
||||||
|
|
||||||
if (offset > region.stride * (region.n - 1)) {
|
|
||||||
region_idx = region.n - 1;
|
|
||||||
} else {
|
|
||||||
region_idx = offset / region.stride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return region_trees + region_idx * tree_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
void tcg_tb_insert(TranslationBlock *tb)
|
|
||||||
{
|
|
||||||
struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
|
|
||||||
|
|
||||||
g_assert(rt != NULL);
|
|
||||||
qemu_mutex_lock(&rt->lock);
|
|
||||||
g_tree_insert(rt->tree, &tb->tc, tb);
|
|
||||||
qemu_mutex_unlock(&rt->lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
void tcg_tb_remove(TranslationBlock *tb)
|
|
||||||
{
|
|
||||||
struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
|
|
||||||
|
|
||||||
g_assert(rt != NULL);
|
|
||||||
qemu_mutex_lock(&rt->lock);
|
|
||||||
g_tree_remove(rt->tree, &tb->tc);
|
|
||||||
qemu_mutex_unlock(&rt->lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Find the TB 'tb' such that
|
|
||||||
* tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
|
|
||||||
* Return NULL if not found.
|
|
||||||
*/
|
|
||||||
TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
|
|
||||||
{
|
|
||||||
struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
|
|
||||||
TranslationBlock *tb;
|
|
||||||
struct tb_tc s = { .ptr = (void *)tc_ptr };
|
|
||||||
|
|
||||||
if (rt == NULL) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
qemu_mutex_lock(&rt->lock);
|
|
||||||
tb = g_tree_lookup(rt->tree, &s);
|
|
||||||
qemu_mutex_unlock(&rt->lock);
|
|
||||||
return tb;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void tcg_region_tree_lock_all(void)
|
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
for (i = 0; i < region.n; i++) {
|
|
||||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
|
||||||
|
|
||||||
qemu_mutex_lock(&rt->lock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void tcg_region_tree_unlock_all(void)
|
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
for (i = 0; i < region.n; i++) {
|
|
||||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
|
||||||
|
|
||||||
qemu_mutex_unlock(&rt->lock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
|
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
tcg_region_tree_lock_all();
|
|
||||||
for (i = 0; i < region.n; i++) {
|
|
||||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
|
||||||
|
|
||||||
g_tree_foreach(rt->tree, func, user_data);
|
|
||||||
}
|
|
||||||
tcg_region_tree_unlock_all();
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t tcg_nb_tbs(void)
|
|
||||||
{
|
|
||||||
size_t nb_tbs = 0;
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
tcg_region_tree_lock_all();
|
|
||||||
for (i = 0; i < region.n; i++) {
|
|
||||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
|
||||||
|
|
||||||
nb_tbs += g_tree_nnodes(rt->tree);
|
|
||||||
}
|
|
||||||
tcg_region_tree_unlock_all();
|
|
||||||
return nb_tbs;
|
|
||||||
}
|
|
||||||
|
|
||||||
static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
|
|
||||||
{
|
|
||||||
TranslationBlock *tb = v;
|
|
||||||
|
|
||||||
tb_destroy(tb);
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void tcg_region_tree_reset_all(void)
|
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
tcg_region_tree_lock_all();
|
|
||||||
for (i = 0; i < region.n; i++) {
|
|
||||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
|
||||||
|
|
||||||
g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
|
|
||||||
/* Increment the refcount first so that destroy acts as a reset */
|
|
||||||
g_tree_ref(rt->tree);
|
|
||||||
g_tree_destroy(rt->tree);
|
|
||||||
}
|
|
||||||
tcg_region_tree_unlock_all();
|
|
||||||
}
|
|
||||||
|
|
||||||
static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
|
|
||||||
{
|
|
||||||
void *start, *end;
|
|
||||||
|
|
||||||
start = region.start_aligned + curr_region * region.stride;
|
|
||||||
end = start + region.size;
|
|
||||||
|
|
||||||
if (curr_region == 0) {
|
|
||||||
start = region.start;
|
|
||||||
}
|
|
||||||
if (curr_region == region.n - 1) {
|
|
||||||
end = region.end;
|
|
||||||
}
|
|
||||||
|
|
||||||
*pstart = start;
|
|
||||||
*pend = end;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void tcg_region_assign(TCGContext *s, size_t curr_region)
|
|
||||||
{
|
|
||||||
void *start, *end;
|
|
||||||
|
|
||||||
tcg_region_bounds(curr_region, &start, &end);
|
|
||||||
|
|
||||||
s->code_gen_buffer = start;
|
|
||||||
s->code_gen_ptr = start;
|
|
||||||
s->code_gen_buffer_size = end - start;
|
|
||||||
s->code_gen_highwater = end - TCG_HIGHWATER;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool tcg_region_alloc__locked(TCGContext *s)
|
|
||||||
{
|
|
||||||
if (region.current == region.n) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
tcg_region_assign(s, region.current);
|
|
||||||
region.current++;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Request a new region once the one in use has filled up.
|
|
||||||
* Returns true on error.
|
|
||||||
*/
|
|
||||||
static bool tcg_region_alloc(TCGContext *s)
|
|
||||||
{
|
|
||||||
bool err;
|
|
||||||
/* read the region size now; alloc__locked will overwrite it on success */
|
|
||||||
size_t size_full = s->code_gen_buffer_size;
|
|
||||||
|
|
||||||
qemu_mutex_lock(®ion.lock);
|
|
||||||
err = tcg_region_alloc__locked(s);
|
|
||||||
if (!err) {
|
|
||||||
region.agg_size_full += size_full - TCG_HIGHWATER;
|
|
||||||
}
|
|
||||||
qemu_mutex_unlock(®ion.lock);
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Perform a context's first region allocation.
|
|
||||||
* This function does _not_ increment region.agg_size_full.
|
|
||||||
*/
|
|
||||||
static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
|
|
||||||
{
|
|
||||||
return tcg_region_alloc__locked(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Call from a safe-work context */
|
|
||||||
void tcg_region_reset_all(void)
|
|
||||||
{
|
|
||||||
unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
qemu_mutex_lock(®ion.lock);
|
|
||||||
region.current = 0;
|
|
||||||
region.agg_size_full = 0;
|
|
||||||
|
|
||||||
for (i = 0; i < n_ctxs; i++) {
|
|
||||||
TCGContext *s = qatomic_read(&tcg_ctxs[i]);
|
|
||||||
bool err = tcg_region_initial_alloc__locked(s);
|
|
||||||
|
|
||||||
g_assert(!err);
|
|
||||||
}
|
|
||||||
qemu_mutex_unlock(®ion.lock);
|
|
||||||
|
|
||||||
tcg_region_tree_reset_all();
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_USER_ONLY
|
|
||||||
static size_t tcg_n_regions(void)
|
|
||||||
{
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
/*
|
|
||||||
* It is likely that some vCPUs will translate more code than others, so we
|
|
||||||
* first try to set more regions than max_cpus, with those regions being of
|
|
||||||
* reasonable size. If that's not possible we make do by evenly dividing
|
|
||||||
* the code_gen_buffer among the vCPUs.
|
|
||||||
*/
|
|
||||||
static size_t tcg_n_regions(void)
|
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
/* Use a single region if all we have is one vCPU thread */
|
|
||||||
#if !defined(CONFIG_USER_ONLY)
|
|
||||||
MachineState *ms = MACHINE(qdev_get_machine());
|
|
||||||
unsigned int max_cpus = ms->smp.max_cpus;
|
|
||||||
#endif
|
|
||||||
if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Try to have more regions than max_cpus, with each region being >= 2 MB */
|
|
||||||
for (i = 8; i > 0; i--) {
|
|
||||||
size_t regions_per_thread = i;
|
|
||||||
size_t region_size;
|
|
||||||
|
|
||||||
region_size = tcg_init_ctx.code_gen_buffer_size;
|
|
||||||
region_size /= max_cpus * regions_per_thread;
|
|
||||||
|
|
||||||
if (region_size >= 2 * 1024u * 1024) {
|
|
||||||
return max_cpus * regions_per_thread;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* If we can't, then just allocate one region per vCPU thread */
|
|
||||||
return max_cpus;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Initializes region partitioning.
|
|
||||||
*
|
|
||||||
* Called at init time from the parent thread (i.e. the one calling
|
|
||||||
* tcg_context_init), after the target's TCG globals have been set.
|
|
||||||
*
|
|
||||||
* Region partitioning works by splitting code_gen_buffer into separate regions,
|
|
||||||
* and then assigning regions to TCG threads so that the threads can translate
|
|
||||||
* code in parallel without synchronization.
|
|
||||||
*
|
|
||||||
* In softmmu the number of TCG threads is bounded by max_cpus, so we use at
|
|
||||||
* least max_cpus regions in MTTCG. In !MTTCG we use a single region.
|
|
||||||
* Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
|
|
||||||
* must have been parsed before calling this function, since it calls
|
|
||||||
* qemu_tcg_mttcg_enabled().
|
|
||||||
*
|
|
||||||
* In user-mode we use a single region. Having multiple regions in user-mode
|
|
||||||
* is not supported, because the number of vCPU threads (recall that each thread
|
|
||||||
* spawned by the guest corresponds to a vCPU thread) is only bounded by the
|
|
||||||
* OS, and usually this number is huge (tens of thousands is not uncommon).
|
|
||||||
* Thus, given this large bound on the number of vCPU threads and the fact
|
|
||||||
* that code_gen_buffer is allocated at compile-time, we cannot guarantee
|
|
||||||
* that the availability of at least one region per vCPU thread.
|
|
||||||
*
|
|
||||||
* However, this user-mode limitation is unlikely to be a significant problem
|
|
||||||
* in practice. Multi-threaded guests share most if not all of their translated
|
|
||||||
* code, which makes parallel code generation less appealing than in softmmu.
|
|
||||||
*/
|
|
||||||
void tcg_region_init(void)
|
|
||||||
{
|
|
||||||
void *buf = tcg_init_ctx.code_gen_buffer;
|
|
||||||
void *aligned;
|
|
||||||
size_t size = tcg_init_ctx.code_gen_buffer_size;
|
|
||||||
size_t page_size = qemu_real_host_page_size;
|
|
||||||
size_t region_size;
|
|
||||||
size_t n_regions;
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
n_regions = tcg_n_regions();
|
|
||||||
|
|
||||||
/* The first region will be 'aligned - buf' bytes larger than the others */
|
|
||||||
aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
|
|
||||||
g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
|
|
||||||
/*
|
|
||||||
* Make region_size a multiple of page_size, using aligned as the start.
|
|
||||||
* As a result of this we might end up with a few extra pages at the end of
|
|
||||||
* the buffer; we will assign those to the last region.
|
|
||||||
*/
|
|
||||||
region_size = (size - (aligned - buf)) / n_regions;
|
|
||||||
region_size = QEMU_ALIGN_DOWN(region_size, page_size);
|
|
||||||
|
|
||||||
/* A region must have at least 2 pages; one code, one guard */
|
|
||||||
g_assert(region_size >= 2 * page_size);
|
|
||||||
|
|
||||||
/* init the region struct */
|
|
||||||
qemu_mutex_init(®ion.lock);
|
|
||||||
region.n = n_regions;
|
|
||||||
region.size = region_size - page_size;
|
|
||||||
region.stride = region_size;
|
|
||||||
region.start = buf;
|
|
||||||
region.start_aligned = aligned;
|
|
||||||
/* page-align the end, since its last page will be a guard page */
|
|
||||||
region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
|
|
||||||
/* account for that last guard page */
|
|
||||||
region.end -= page_size;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Set guard pages in the rw buffer, as that's the one into which
|
|
||||||
* buffer overruns could occur. Do not set guard pages in the rx
|
|
||||||
* buffer -- let that one use hugepages throughout.
|
|
||||||
*/
|
|
||||||
for (i = 0; i < region.n; i++) {
|
|
||||||
void *start, *end;
|
|
||||||
|
|
||||||
tcg_region_bounds(i, &start, &end);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
|
|
||||||
* rejects a permission change from RWX -> NONE. Guard pages are
|
|
||||||
* nice for bug detection but are not essential; ignore any failure.
|
|
||||||
*/
|
|
||||||
(void)qemu_mprotect_none(end, page_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
tcg_region_trees_init();
|
|
||||||
|
|
||||||
/* In user-mode we support only one ctx, so do the initial allocation now */
|
|
||||||
#ifdef CONFIG_USER_ONLY
|
|
||||||
{
|
|
||||||
bool err = tcg_region_initial_alloc__locked(tcg_ctx);
|
|
||||||
|
|
||||||
g_assert(!err);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_TCG
|
|
||||||
const void *tcg_splitwx_to_rx(void *rw)
|
|
||||||
{
|
|
||||||
/* Pass NULL pointers unchanged. */
|
|
||||||
if (rw) {
|
|
||||||
g_assert(in_code_gen_buffer(rw));
|
|
||||||
rw += tcg_splitwx_diff;
|
|
||||||
}
|
|
||||||
return rw;
|
|
||||||
}
|
|
||||||
|
|
||||||
void *tcg_splitwx_to_rw(const void *rx)
|
|
||||||
{
|
|
||||||
/* Pass NULL pointers unchanged. */
|
|
||||||
if (rx) {
|
|
||||||
rx -= tcg_splitwx_diff;
|
|
||||||
/* Assert that we end with a pointer in the rw region. */
|
|
||||||
g_assert(in_code_gen_buffer(rx));
|
|
||||||
}
|
|
||||||
return (void *)rx;
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_DEBUG_TCG */
|
|
||||||
|
|
||||||
static void alloc_tcg_plugin_context(TCGContext *s)
|
static void alloc_tcg_plugin_context(TCGContext *s)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_PLUGIN
|
#ifdef CONFIG_PLUGIN
|
||||||
|
@ -939,10 +451,8 @@ void tcg_register_thread(void)
|
||||||
#else
|
#else
|
||||||
void tcg_register_thread(void)
|
void tcg_register_thread(void)
|
||||||
{
|
{
|
||||||
MachineState *ms = MACHINE(qdev_get_machine());
|
|
||||||
TCGContext *s = g_malloc(sizeof(*s));
|
TCGContext *s = g_malloc(sizeof(*s));
|
||||||
unsigned int i, n;
|
unsigned int i, n;
|
||||||
bool err;
|
|
||||||
|
|
||||||
*s = tcg_init_ctx;
|
*s = tcg_init_ctx;
|
||||||
|
|
||||||
|
@ -956,79 +466,19 @@ void tcg_register_thread(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Claim an entry in tcg_ctxs */
|
/* Claim an entry in tcg_ctxs */
|
||||||
n = qatomic_fetch_inc(&n_tcg_ctxs);
|
n = qatomic_fetch_inc(&tcg_cur_ctxs);
|
||||||
g_assert(n < ms->smp.max_cpus);
|
g_assert(n < tcg_max_ctxs);
|
||||||
qatomic_set(&tcg_ctxs[n], s);
|
qatomic_set(&tcg_ctxs[n], s);
|
||||||
|
|
||||||
if (n > 0) {
|
if (n > 0) {
|
||||||
alloc_tcg_plugin_context(s);
|
alloc_tcg_plugin_context(s);
|
||||||
|
tcg_region_initial_alloc(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
tcg_ctx = s;
|
tcg_ctx = s;
|
||||||
qemu_mutex_lock(®ion.lock);
|
|
||||||
err = tcg_region_initial_alloc__locked(tcg_ctx);
|
|
||||||
g_assert(!err);
|
|
||||||
qemu_mutex_unlock(®ion.lock);
|
|
||||||
}
|
}
|
||||||
#endif /* !CONFIG_USER_ONLY */
|
#endif /* !CONFIG_USER_ONLY */
|
||||||
|
|
||||||
/*
|
|
||||||
* Returns the size (in bytes) of all translated code (i.e. from all regions)
|
|
||||||
* currently in the cache.
|
|
||||||
* See also: tcg_code_capacity()
|
|
||||||
* Do not confuse with tcg_current_code_size(); that one applies to a single
|
|
||||||
* TCG context.
|
|
||||||
*/
|
|
||||||
size_t tcg_code_size(void)
|
|
||||||
{
|
|
||||||
unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
|
|
||||||
unsigned int i;
|
|
||||||
size_t total;
|
|
||||||
|
|
||||||
qemu_mutex_lock(®ion.lock);
|
|
||||||
total = region.agg_size_full;
|
|
||||||
for (i = 0; i < n_ctxs; i++) {
|
|
||||||
const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
|
|
||||||
size_t size;
|
|
||||||
|
|
||||||
size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
|
|
||||||
g_assert(size <= s->code_gen_buffer_size);
|
|
||||||
total += size;
|
|
||||||
}
|
|
||||||
qemu_mutex_unlock(®ion.lock);
|
|
||||||
return total;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Returns the code capacity (in bytes) of the entire cache, i.e. including all
|
|
||||||
* regions.
|
|
||||||
* See also: tcg_code_size()
|
|
||||||
*/
|
|
||||||
size_t tcg_code_capacity(void)
|
|
||||||
{
|
|
||||||
size_t guard_size, capacity;
|
|
||||||
|
|
||||||
/* no need for synchronization; these variables are set at init time */
|
|
||||||
guard_size = region.stride - region.size;
|
|
||||||
capacity = region.end + guard_size - region.start;
|
|
||||||
capacity -= region.n * (guard_size + TCG_HIGHWATER);
|
|
||||||
return capacity;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t tcg_tb_phys_invalidate_count(void)
|
|
||||||
{
|
|
||||||
unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
|
|
||||||
unsigned int i;
|
|
||||||
size_t total = 0;
|
|
||||||
|
|
||||||
for (i = 0; i < n_ctxs; i++) {
|
|
||||||
const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
|
|
||||||
|
|
||||||
total += qatomic_read(&s->tb_phys_invalidate_count);
|
|
||||||
}
|
|
||||||
return total;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* pool based memory allocation */
|
/* pool based memory allocation */
|
||||||
void *tcg_malloc_internal(TCGContext *s, int size)
|
void *tcg_malloc_internal(TCGContext *s, int size)
|
||||||
{
|
{
|
||||||
|
@ -1101,8 +551,9 @@ static void process_op_defs(TCGContext *s);
|
||||||
static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
|
static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
|
||||||
TCGReg reg, const char *name);
|
TCGReg reg, const char *name);
|
||||||
|
|
||||||
void tcg_context_init(TCGContext *s)
|
static void tcg_context_init(unsigned max_cpus)
|
||||||
{
|
{
|
||||||
|
TCGContext *s = &tcg_init_ctx;
|
||||||
int op, total_args, n, i;
|
int op, total_args, n, i;
|
||||||
TCGOpDef *def;
|
TCGOpDef *def;
|
||||||
TCGArgConstraint *args_ct;
|
TCGArgConstraint *args_ct;
|
||||||
|
@ -1167,11 +618,11 @@ void tcg_context_init(TCGContext *s)
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_USER_ONLY
|
#ifdef CONFIG_USER_ONLY
|
||||||
tcg_ctxs = &tcg_ctx;
|
tcg_ctxs = &tcg_ctx;
|
||||||
n_tcg_ctxs = 1;
|
tcg_cur_ctxs = 1;
|
||||||
|
tcg_max_ctxs = 1;
|
||||||
#else
|
#else
|
||||||
MachineState *ms = MACHINE(qdev_get_machine());
|
tcg_max_ctxs = max_cpus;
|
||||||
unsigned int max_cpus = ms->smp.max_cpus;
|
tcg_ctxs = g_new0(TCGContext *, max_cpus);
|
||||||
tcg_ctxs = g_new(TCGContext *, max_cpus);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
|
tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
|
||||||
|
@ -1179,6 +630,12 @@ void tcg_context_init(TCGContext *s)
|
||||||
cpu_env = temp_tcgv_ptr(ts);
|
cpu_env = temp_tcgv_ptr(ts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
|
||||||
|
{
|
||||||
|
tcg_context_init(max_cpus);
|
||||||
|
tcg_region_init(tb_size, splitwx, max_cpus);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate TBs right before their corresponding translated code, making
|
* Allocate TBs right before their corresponding translated code, making
|
||||||
* sure that TBs and code are on different cache lines.
|
* sure that TBs and code are on different cache lines.
|
||||||
|
@ -1206,32 +663,16 @@ TranslationBlock *tcg_tb_alloc(TCGContext *s)
|
||||||
|
|
||||||
void tcg_prologue_init(TCGContext *s)
|
void tcg_prologue_init(TCGContext *s)
|
||||||
{
|
{
|
||||||
size_t prologue_size, total_size;
|
size_t prologue_size;
|
||||||
void *buf0, *buf1;
|
|
||||||
|
|
||||||
/* Put the prologue at the beginning of code_gen_buffer. */
|
s->code_ptr = s->code_gen_ptr;
|
||||||
buf0 = s->code_gen_buffer;
|
s->code_buf = s->code_gen_ptr;
|
||||||
total_size = s->code_gen_buffer_size;
|
|
||||||
s->code_ptr = buf0;
|
|
||||||
s->code_buf = buf0;
|
|
||||||
s->data_gen_ptr = NULL;
|
s->data_gen_ptr = NULL;
|
||||||
|
|
||||||
/*
|
|
||||||
* The region trees are not yet configured, but tcg_splitwx_to_rx
|
|
||||||
* needs the bounds for an assert.
|
|
||||||
*/
|
|
||||||
region.start = buf0;
|
|
||||||
region.end = buf0 + total_size;
|
|
||||||
|
|
||||||
#ifndef CONFIG_TCG_INTERPRETER
|
#ifndef CONFIG_TCG_INTERPRETER
|
||||||
tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
|
tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Compute a high-water mark, at which we voluntarily flush the buffer
|
|
||||||
and start over. The size here is arbitrary, significantly larger
|
|
||||||
than we expect the code generation for any one opcode to require. */
|
|
||||||
s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
|
|
||||||
|
|
||||||
#ifdef TCG_TARGET_NEED_POOL_LABELS
|
#ifdef TCG_TARGET_NEED_POOL_LABELS
|
||||||
s->pool_labels = NULL;
|
s->pool_labels = NULL;
|
||||||
#endif
|
#endif
|
||||||
|
@ -1248,32 +689,25 @@ void tcg_prologue_init(TCGContext *s)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
buf1 = s->code_ptr;
|
prologue_size = tcg_current_code_size(s);
|
||||||
|
|
||||||
#ifndef CONFIG_TCG_INTERPRETER
|
#ifndef CONFIG_TCG_INTERPRETER
|
||||||
flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
|
flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
|
||||||
tcg_ptr_byte_diff(buf1, buf0));
|
(uintptr_t)s->code_buf, prologue_size);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Deduct the prologue from the buffer. */
|
tcg_region_prologue_set(s);
|
||||||
prologue_size = tcg_current_code_size(s);
|
|
||||||
s->code_gen_ptr = buf1;
|
|
||||||
s->code_gen_buffer = buf1;
|
|
||||||
s->code_buf = buf1;
|
|
||||||
total_size -= prologue_size;
|
|
||||||
s->code_gen_buffer_size = total_size;
|
|
||||||
|
|
||||||
tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
|
|
||||||
|
|
||||||
#ifdef DEBUG_DISAS
|
#ifdef DEBUG_DISAS
|
||||||
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
|
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
|
||||||
FILE *logfile = qemu_log_lock();
|
FILE *logfile = qemu_log_lock();
|
||||||
qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
|
qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
|
||||||
if (s->data_gen_ptr) {
|
if (s->data_gen_ptr) {
|
||||||
size_t code_size = s->data_gen_ptr - buf0;
|
size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
|
||||||
size_t data_size = prologue_size - code_size;
|
size_t data_size = prologue_size - code_size;
|
||||||
size_t i;
|
size_t i;
|
||||||
|
|
||||||
log_disas(buf0, code_size);
|
log_disas(s->code_gen_ptr, code_size);
|
||||||
|
|
||||||
for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
|
for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
|
||||||
if (sizeof(tcg_target_ulong) == 8) {
|
if (sizeof(tcg_target_ulong) == 8) {
|
||||||
|
@ -1287,7 +721,7 @@ void tcg_prologue_init(TCGContext *s)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log_disas(buf0, prologue_size);
|
log_disas(s->code_gen_ptr, prologue_size);
|
||||||
}
|
}
|
||||||
qemu_log("\n");
|
qemu_log("\n");
|
||||||
qemu_log_flush();
|
qemu_log_flush();
|
||||||
|
@ -2649,6 +2083,19 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void tcg_remove_ops_after(TCGOp *op)
|
||||||
|
{
|
||||||
|
TCGContext *s = tcg_ctx;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
TCGOp *last = tcg_last_op();
|
||||||
|
if (last == op) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
tcg_op_remove(s, last);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static TCGOp *tcg_op_alloc(TCGOpcode opc)
|
static TCGOp *tcg_op_alloc(TCGOpcode opc)
|
||||||
{
|
{
|
||||||
TCGContext *s = tcg_ctx;
|
TCGContext *s = tcg_ctx;
|
||||||
|
@ -4480,7 +3927,7 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
|
||||||
static inline
|
static inline
|
||||||
void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
|
void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
|
||||||
{
|
{
|
||||||
unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
|
unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
|
||||||
for (i = 0; i < n_ctxs; i++) {
|
for (i = 0; i < n_ctxs; i++) {
|
||||||
|
@ -4543,7 +3990,7 @@ void tcg_dump_op_count(void)
|
||||||
|
|
||||||
int64_t tcg_cpu_exec_time(void)
|
int64_t tcg_cpu_exec_time(void)
|
||||||
{
|
{
|
||||||
unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
|
unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
int64_t ret = 0;
|
int64_t ret = 0;
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,7 @@
|
||||||
#define TCG_TARGET_INTERPRETER 1
|
#define TCG_TARGET_INTERPRETER 1
|
||||||
#define TCG_TARGET_INSN_UNIT_SIZE 1
|
#define TCG_TARGET_INSN_UNIT_SIZE 1
|
||||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
|
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
|
||||||
|
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
|
||||||
|
|
||||||
#if UINTPTR_MAX == UINT32_MAX
|
#if UINTPTR_MAX == UINT32_MAX
|
||||||
# define TCG_TARGET_REG_BITS 32
|
# define TCG_TARGET_REG_BITS 32
|
||||||
|
|
|
@ -97,6 +97,15 @@ static int qemu_mprotect__osdep(void *addr, size_t size, int prot)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int qemu_mprotect_rw(void *addr, size_t size)
|
||||||
|
{
|
||||||
|
#ifdef _WIN32
|
||||||
|
return qemu_mprotect__osdep(addr, size, PAGE_READWRITE);
|
||||||
|
#else
|
||||||
|
return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
int qemu_mprotect_rwx(void *addr, size_t size)
|
int qemu_mprotect_rwx(void *addr, size_t size)
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
|
Loading…
Reference in New Issue