diff --git a/include/qemu/qsp.h b/include/qemu/qsp.h index 209480b687..f8c6c9648a 100644 --- a/include/qemu/qsp.h +++ b/include/qemu/qsp.h @@ -24,5 +24,6 @@ void qsp_report(FILE *f, fprintf_function cpu_fprintf, size_t max, bool qsp_is_enabled(void); void qsp_enable(void); void qsp_disable(void); +void qsp_reset(void); #endif /* QEMU_QSP_H */ diff --git a/util/qsp.c b/util/qsp.c index 80dbd4c368..4b1308b081 100644 --- a/util/qsp.c +++ b/util/qsp.c @@ -47,6 +47,11 @@ * an intermediate hash table. This would simplify the code only slightly, but * would perform badly if there were many threads and objects to track. * + * - Wrap operations on qsp entries with RCU read-side critical sections, so + * that qsp_reset() can delete entries. Unfortunately, the overhead of calling + * rcu_read_lock/unlock slows down atomic_add-bench -m by 24%. Having + * a snapshot that is updated on qsp_reset() avoids this overhead. + * * Related Work: * - Lennart Poettering's mutrace: http://0pointer.de/blog/projects/mutrace.html * - Lozi, David, Thomas, Lawall and Muller. "Remote Core Locking: Migrating @@ -57,6 +62,7 @@ #include "qemu/thread.h" #include "qemu/timer.h" #include "qemu/qht.h" +#include "qemu/rcu.h" #include "exec/tb-hash-xx.h" enum QSPType { @@ -88,6 +94,12 @@ struct QSPEntry { }; typedef struct QSPEntry QSPEntry; +struct QSPSnapshot { + struct rcu_head rcu; + struct qht ht; +}; +typedef struct QSPSnapshot QSPSnapshot; + /* initial sizing for hash tables */ #define QSP_INITIAL_SIZE 64 @@ -107,6 +119,7 @@ static __thread int qsp_thread; static struct qht qsp_callsite_ht; static struct qht qsp_ht; +static QSPSnapshot *qsp_snapshot; static bool qsp_initialized, qsp_initializing; static const char * const qsp_typenames[] = { @@ -505,15 +518,69 @@ static void qsp_aggregate(struct qht *global_ht, void *p, uint32_t h, void *up) qsp_entry_aggregate(agg, e); } +static void qsp_iter_diff(struct qht *orig, void *p, uint32_t hash, void *htp) +{ + struct qht *ht = htp; + QSPEntry *old = p; + QSPEntry *new; + + new = qht_lookup(ht, old, hash); + /* entries are never deleted, so we must have this one */ + g_assert(new != NULL); + /* our reading of the stats happened after the snapshot was taken */ + g_assert(new->n_acqs >= old->n_acqs); + g_assert(new->ns >= old->ns); + + new->n_acqs -= old->n_acqs; + new->ns -= old->ns; + + /* No point in reporting an empty entry */ + if (new->n_acqs == 0 && new->ns == 0) { + bool removed = qht_remove(ht, new, hash); + + g_assert(removed); + g_free(new); + } +} + +static void qsp_diff(struct qht *orig, struct qht *new) +{ + qht_iter(orig, qsp_iter_diff, new); +} + +static void qsp_ht_delete(struct qht *ht, void *p, uint32_t h, void *htp) +{ + g_free(p); +} + static void qsp_mktree(GTree *tree) { + QSPSnapshot *snap; struct qht ht; + /* + * First, see if there's a prior snapshot, so that we read the global hash + * table _after_ the snapshot has been created, which guarantees that + * the entries we'll read will be a superset of the snapshot's entries. + * + * We must remain in an RCU read-side critical section until we're done + * with the snapshot. + */ + rcu_read_lock(); + snap = atomic_rcu_read(&qsp_snapshot); + /* Aggregate all results from the global hash table into a local one */ qht_init(&ht, qsp_entry_no_thread_cmp, QSP_INITIAL_SIZE, QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES); qht_iter(&qsp_ht, qsp_aggregate, &ht); + /* compute the difference wrt the snapshot, if any */ + if (snap) { + qsp_diff(&snap->ht, &ht); + } + /* done with the snapshot; RCU can reclaim it */ + rcu_read_unlock(); + /* sort the hash table elements by using a tree */ qht_iter(&ht, qsp_sort, tree); @@ -652,3 +719,30 @@ void qsp_report(FILE *f, fprintf_function cpu_fprintf, size_t max, pr_report(&rep, f, cpu_fprintf); report_destroy(&rep); } + +static void qsp_snapshot_destroy(QSPSnapshot *snap) +{ + qht_iter(&snap->ht, qsp_ht_delete, NULL); + qht_destroy(&snap->ht); + g_free(snap); +} + +void qsp_reset(void) +{ + QSPSnapshot *new = g_new(QSPSnapshot, 1); + QSPSnapshot *old; + + qsp_init(); + + qht_init(&new->ht, qsp_entry_cmp, QSP_INITIAL_SIZE, + QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES); + + /* take a snapshot of the current state */ + qht_iter(&qsp_ht, qsp_aggregate, &new->ht); + + /* replace the previous snapshot, if any */ + old = atomic_xchg(&qsp_snapshot, new); + if (old) { + call_rcu(old, qsp_snapshot_destroy, rcu); + } +}