qemu-e2k/tests/atomic_add-bench.c
Emilio G. Cota 9d5cff3df5 tests/atomic_add-bench: add -p to enable sync profiler
When used together with -m, this allows us to benchmark the
profiler's performance impact on qemu_mutex_lock.

Signed-off-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2018-08-23 18:46:25 +02:00

183 lines
4.2 KiB
C

#include "qemu/osdep.h"
#include "qemu/thread.h"
#include "qemu/host-utils.h"
#include "qemu/processor.h"
struct thread_info {
uint64_t r;
} QEMU_ALIGNED(64);
struct count {
QemuMutex lock;
unsigned long val;
} QEMU_ALIGNED(64);
static QemuThread *threads;
static struct thread_info *th_info;
static unsigned int n_threads = 1;
static unsigned int n_ready_threads;
static struct count *counts;
static unsigned int duration = 1;
static unsigned int range = 1024;
static bool use_mutex;
static bool test_start;
static bool test_stop;
static const char commands_string[] =
" -n = number of threads\n"
" -m = use mutexes instead of atomic increments\n"
" -p = enable sync profiler\n"
" -d = duration in seconds\n"
" -r = range (will be rounded up to pow2)";
static void usage_complete(char *argv[])
{
fprintf(stderr, "Usage: %s [options]\n", argv[0]);
fprintf(stderr, "options:\n%s\n", commands_string);
}
/*
* From: https://en.wikipedia.org/wiki/Xorshift
* This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
* guaranteed to be >= INT_MAX).
*/
static uint64_t xorshift64star(uint64_t x)
{
x ^= x >> 12; /* a */
x ^= x << 25; /* b */
x ^= x >> 27; /* c */
return x * UINT64_C(2685821657736338717);
}
static void *thread_func(void *arg)
{
struct thread_info *info = arg;
atomic_inc(&n_ready_threads);
while (!atomic_read(&test_start)) {
cpu_relax();
}
while (!atomic_read(&test_stop)) {
unsigned int index;
info->r = xorshift64star(info->r);
index = info->r & (range - 1);
if (use_mutex) {
qemu_mutex_lock(&counts[index].lock);
counts[index].val += 1;
qemu_mutex_unlock(&counts[index].lock);
} else {
atomic_inc(&counts[index].val);
}
}
return NULL;
}
static void run_test(void)
{
unsigned int remaining;
unsigned int i;
while (atomic_read(&n_ready_threads) != n_threads) {
cpu_relax();
}
atomic_set(&test_start, true);
do {
remaining = sleep(duration);
} while (remaining);
atomic_set(&test_stop, true);
for (i = 0; i < n_threads; i++) {
qemu_thread_join(&threads[i]);
}
}
static void create_threads(void)
{
unsigned int i;
threads = g_new(QemuThread, n_threads);
th_info = g_new(struct thread_info, n_threads);
counts = qemu_memalign(64, sizeof(*counts) * range);
memset(counts, 0, sizeof(*counts) * range);
for (i = 0; i < range; i++) {
qemu_mutex_init(&counts[i].lock);
}
for (i = 0; i < n_threads; i++) {
struct thread_info *info = &th_info[i];
info->r = (i + 1) ^ time(NULL);
qemu_thread_create(&threads[i], NULL, thread_func, info,
QEMU_THREAD_JOINABLE);
}
}
static void pr_params(void)
{
printf("Parameters:\n");
printf(" # of threads: %u\n", n_threads);
printf(" duration: %u\n", duration);
printf(" ops' range: %u\n", range);
}
static void pr_stats(void)
{
unsigned long long val = 0;
unsigned int i;
double tx;
for (i = 0; i < range; i++) {
val += counts[i].val;
}
tx = val / duration / 1e6;
printf("Results:\n");
printf("Duration: %u s\n", duration);
printf(" Throughput: %.2f Mops/s\n", tx);
printf(" Throughput/thread: %.2f Mops/s/thread\n", tx / n_threads);
}
static void parse_args(int argc, char *argv[])
{
int c;
for (;;) {
c = getopt(argc, argv, "hd:n:mpr:");
if (c < 0) {
break;
}
switch (c) {
case 'h':
usage_complete(argv);
exit(0);
case 'd':
duration = atoi(optarg);
break;
case 'n':
n_threads = atoi(optarg);
break;
case 'm':
use_mutex = true;
break;
case 'p':
qsp_enable();
break;
case 'r':
range = pow2ceil(atoi(optarg));
break;
}
}
}
int main(int argc, char *argv[])
{
parse_args(argc, argv);
pr_params();
create_threads();
run_test();
pr_stats();
return 0;
}