accel/tcg: Move HMP info jit and info opcount code

Move all of it into accel/tcg/monitor.c.  This puts everything
about tcg that is only used by the monitor in the same place.

Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2023-07-03 15:22:38 +02:00
parent 3e01f1147a
commit 24a4d59aa7
7 changed files with 154 additions and 158 deletions

View File

@ -321,21 +321,6 @@ static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
}
}
void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
{
CPUState *cpu;
size_t full = 0, part = 0, elide = 0;
CPU_FOREACH(cpu) {
full += qatomic_read(&cpu->neg.tlb.c.full_flush_count);
part += qatomic_read(&cpu->neg.tlb.c.part_flush_count);
elide += qatomic_read(&cpu->neg.tlb.c.elide_flush_count);
}
*pfull = full;
*ppart = part;
*pelide = elide;
}
static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
{
uint16_t asked = data.host_int;

View File

@ -14,8 +14,6 @@
extern int64_t max_delay;
extern int64_t max_advance;
void dump_exec_info(GString *buf);
/*
* Return true if CS is not running in parallel with other cpus, either
* because there are no other cpus or we are within an exclusive context.

View File

@ -8,6 +8,7 @@
#include "qemu/osdep.h"
#include "qemu/accel.h"
#include "qemu/qht.h"
#include "qapi/error.h"
#include "qapi/type-helpers.h"
#include "qapi/qapi-commands-machine.h"
@ -17,6 +18,7 @@
#include "sysemu/tcg.h"
#include "tcg/tcg.h"
#include "internal-common.h"
#include "tb-context.h"
static void dump_drift_info(GString *buf)
@ -50,6 +52,153 @@ static void dump_accel_info(GString *buf)
one_insn_per_tb ? "on" : "off");
}
static void print_qht_statistics(struct qht_stats hst, GString *buf)
{
uint32_t hgram_opts;
size_t hgram_bins;
char *hgram;
if (!hst.head_buckets) {
return;
}
g_string_append_printf(buf, "TB hash buckets %zu/%zu "
"(%0.2f%% head buckets used)\n",
hst.used_head_buckets, hst.head_buckets,
(double)hst.used_head_buckets /
hst.head_buckets * 100);
hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT;
if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
hgram_opts |= QDIST_PR_NODECIMAL;
}
hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. "
"Histogram: %s\n",
qdist_avg(&hst.occupancy) * 100, hgram);
g_free(hgram);
hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
if (hgram_bins > 10) {
hgram_bins = 10;
} else {
hgram_bins = 0;
hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
}
hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. "
"Histogram: %s\n",
qdist_avg(&hst.chain), hgram);
g_free(hgram);
}
struct tb_tree_stats {
size_t nb_tbs;
size_t host_size;
size_t target_size;
size_t max_target_size;
size_t direct_jmp_count;
size_t direct_jmp2_count;
size_t cross_page;
};
static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
{
const TranslationBlock *tb = value;
struct tb_tree_stats *tst = data;
tst->nb_tbs++;
tst->host_size += tb->tc.size;
tst->target_size += tb->size;
if (tb->size > tst->max_target_size) {
tst->max_target_size = tb->size;
}
if (tb->page_addr[1] != -1) {
tst->cross_page++;
}
if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
tst->direct_jmp_count++;
if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
tst->direct_jmp2_count++;
}
}
return false;
}
static void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
{
CPUState *cpu;
size_t full = 0, part = 0, elide = 0;
CPU_FOREACH(cpu) {
full += qatomic_read(&cpu->neg.tlb.c.full_flush_count);
part += qatomic_read(&cpu->neg.tlb.c.part_flush_count);
elide += qatomic_read(&cpu->neg.tlb.c.elide_flush_count);
}
*pfull = full;
*ppart = part;
*pelide = elide;
}
static void tcg_dump_info(GString *buf)
{
g_string_append_printf(buf, "[TCG profiler not compiled]\n");
}
static void dump_exec_info(GString *buf)
{
struct tb_tree_stats tst = {};
struct qht_stats hst;
size_t nb_tbs, flush_full, flush_part, flush_elide;
tcg_tb_foreach(tb_tree_stats_iter, &tst);
nb_tbs = tst.nb_tbs;
/* XXX: avoid using doubles ? */
g_string_append_printf(buf, "Translation buffer state:\n");
/*
* Report total code size including the padding and TB structs;
* otherwise users might think "-accel tcg,tb-size" is not honoured.
* For avg host size we use the precise numbers from tb_tree_stats though.
*/
g_string_append_printf(buf, "gen code size %zu/%zu\n",
tcg_code_size(), tcg_code_capacity());
g_string_append_printf(buf, "TB count %zu\n", nb_tbs);
g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n",
nb_tbs ? tst.target_size / nb_tbs : 0,
tst.max_target_size);
g_string_append_printf(buf, "TB avg host size %zu bytes "
"(expansion ratio: %0.1f)\n",
nb_tbs ? tst.host_size / nb_tbs : 0,
tst.target_size ?
(double)tst.host_size / tst.target_size : 0);
g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
tst.cross_page,
nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
g_string_append_printf(buf, "direct jump count %zu (%zu%%) "
"(2 jumps=%zu %zu%%)\n",
tst.direct_jmp_count,
nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
tst.direct_jmp2_count,
nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
qht_statistics_init(&tb_ctx.htable, &hst);
print_qht_statistics(hst, buf);
qht_statistics_destroy(&hst);
g_string_append_printf(buf, "\nStatistics:\n");
g_string_append_printf(buf, "TB flush count %u\n",
qatomic_read(&tb_ctx.tb_flush_count));
g_string_append_printf(buf, "TB invalidate count %u\n",
qatomic_read(&tb_ctx.tb_phys_invalidate_count));
tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full);
g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide);
tcg_dump_info(buf);
}
HumanReadableText *qmp_x_query_jit(Error **errp)
{
g_autoptr(GString) buf = g_string_new("");
@ -66,6 +215,11 @@ HumanReadableText *qmp_x_query_jit(Error **errp)
return human_readable_text_from_str(buf);
}
static void tcg_dump_op_count(GString *buf)
{
g_string_append_printf(buf, "[TCG profiler not compiled]\n");
}
HumanReadableText *qmp_x_query_opcount(Error **errp)
{
g_autoptr(GString) buf = g_string_new("");

View File

@ -645,133 +645,6 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
cpu_loop_exit_noexc(cpu);
}
static void print_qht_statistics(struct qht_stats hst, GString *buf)
{
uint32_t hgram_opts;
size_t hgram_bins;
char *hgram;
if (!hst.head_buckets) {
return;
}
g_string_append_printf(buf, "TB hash buckets %zu/%zu "
"(%0.2f%% head buckets used)\n",
hst.used_head_buckets, hst.head_buckets,
(double)hst.used_head_buckets /
hst.head_buckets * 100);
hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT;
if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
hgram_opts |= QDIST_PR_NODECIMAL;
}
hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. "
"Histogram: %s\n",
qdist_avg(&hst.occupancy) * 100, hgram);
g_free(hgram);
hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
if (hgram_bins > 10) {
hgram_bins = 10;
} else {
hgram_bins = 0;
hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
}
hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. "
"Histogram: %s\n",
qdist_avg(&hst.chain), hgram);
g_free(hgram);
}
struct tb_tree_stats {
size_t nb_tbs;
size_t host_size;
size_t target_size;
size_t max_target_size;
size_t direct_jmp_count;
size_t direct_jmp2_count;
size_t cross_page;
};
static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
{
const TranslationBlock *tb = value;
struct tb_tree_stats *tst = data;
tst->nb_tbs++;
tst->host_size += tb->tc.size;
tst->target_size += tb->size;
if (tb->size > tst->max_target_size) {
tst->max_target_size = tb->size;
}
if (tb_page_addr1(tb) != -1) {
tst->cross_page++;
}
if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
tst->direct_jmp_count++;
if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
tst->direct_jmp2_count++;
}
}
return false;
}
void dump_exec_info(GString *buf)
{
struct tb_tree_stats tst = {};
struct qht_stats hst;
size_t nb_tbs, flush_full, flush_part, flush_elide;
tcg_tb_foreach(tb_tree_stats_iter, &tst);
nb_tbs = tst.nb_tbs;
/* XXX: avoid using doubles ? */
g_string_append_printf(buf, "Translation buffer state:\n");
/*
* Report total code size including the padding and TB structs;
* otherwise users might think "-accel tcg,tb-size" is not honoured.
* For avg host size we use the precise numbers from tb_tree_stats though.
*/
g_string_append_printf(buf, "gen code size %zu/%zu\n",
tcg_code_size(), tcg_code_capacity());
g_string_append_printf(buf, "TB count %zu\n", nb_tbs);
g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n",
nb_tbs ? tst.target_size / nb_tbs : 0,
tst.max_target_size);
g_string_append_printf(buf, "TB avg host size %zu bytes "
"(expansion ratio: %0.1f)\n",
nb_tbs ? tst.host_size / nb_tbs : 0,
tst.target_size ?
(double)tst.host_size / tst.target_size : 0);
g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
tst.cross_page,
nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
g_string_append_printf(buf, "direct jump count %zu (%zu%%) "
"(2 jumps=%zu %zu%%)\n",
tst.direct_jmp_count,
nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
tst.direct_jmp2_count,
nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
qht_statistics_init(&tb_ctx.htable, &hst);
print_qht_statistics(hst, buf);
qht_statistics_destroy(&hst);
g_string_append_printf(buf, "\nStatistics:\n");
g_string_append_printf(buf, "TB flush count %u\n",
qatomic_read(&tb_ctx.tb_flush_count));
g_string_append_printf(buf, "TB invalidate count %u\n",
qatomic_read(&tb_ctx.tb_phys_invalidate_count));
tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full);
g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide);
tcg_dump_info(buf);
}
#else /* CONFIG_USER_ONLY */
void cpu_interrupt(CPUState *cpu, int mask)

View File

@ -26,6 +26,5 @@
/* cputlb.c */
void tlb_protect_code(ram_addr_t ram_addr);
void tlb_unprotect_code(ram_addr_t ram_addr);
void tlb_flush_counts(size_t *full, size_t *part, size_t *elide);
#endif
#endif

View File

@ -846,9 +846,6 @@ static inline TCGv_ptr tcg_temp_new_ptr(void)
return temp_tcgv_ptr(t);
}
void tcg_dump_info(GString *buf);
void tcg_dump_op_count(GString *buf);
#define TCG_CT_CONST 1 /* any constant of register size */
typedef struct TCGArgConstraint {

View File

@ -5927,11 +5927,6 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
}
void tcg_dump_op_count(GString *buf)
{
g_string_append_printf(buf, "[TCG profiler not compiled]\n");
}
int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
{
int i, start_words, num_insns;
@ -6128,11 +6123,6 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
return tcg_current_code_size(s);
}
void tcg_dump_info(GString *buf)
{
g_string_append_printf(buf, "[TCG profiler not compiled]\n");
}
#ifdef ELF_HOST_MACHINE
/* In order to use this feature, the backend needs to do three things: