d1d1b206b0
ELF thread local storage is about 10% faster on tests/test-coroutine's perf/cost test. The timing on my machine is 190ns per iteration with pthread TLS, 170 with ELF TLS. Based on a patch by Kevin Wolf and Peter Lieven, but redone to follow the model of coroutine-win32.c (including the important "noinline" attribute!). Platforms without thread-local storage (OpenBSD probably?) will need a new-enough GCC for this to compile, in order to use the same emutls support that Windows already relies on. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Fam Zheng <famz@redhat.com> Message-id: 1417518350-6167-2-git-send-email-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
195 lines
5.1 KiB
C
195 lines
5.1 KiB
C
/*
|
|
* ucontext coroutine initialization code
|
|
*
|
|
* Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>
|
|
* Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com>
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.0 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
|
|
#ifdef _FORTIFY_SOURCE
|
|
#undef _FORTIFY_SOURCE
|
|
#endif
|
|
#include <stdlib.h>
|
|
#include <setjmp.h>
|
|
#include <stdint.h>
|
|
#include <ucontext.h>
|
|
#include "qemu-common.h"
|
|
#include "block/coroutine_int.h"
|
|
|
|
#ifdef CONFIG_VALGRIND_H
|
|
#include <valgrind/valgrind.h>
|
|
#endif
|
|
|
|
typedef struct {
|
|
Coroutine base;
|
|
void *stack;
|
|
sigjmp_buf env;
|
|
|
|
#ifdef CONFIG_VALGRIND_H
|
|
unsigned int valgrind_stack_id;
|
|
#endif
|
|
|
|
} CoroutineUContext;
|
|
|
|
/**
|
|
* Per-thread coroutine bookkeeping
|
|
*/
|
|
static __thread CoroutineUContext leader;
|
|
static __thread Coroutine *current;
|
|
|
|
/*
|
|
* va_args to makecontext() must be type 'int', so passing
|
|
* the pointer we need may require several int args. This
|
|
* union is a quick hack to let us do that
|
|
*/
|
|
union cc_arg {
|
|
void *p;
|
|
int i[2];
|
|
};
|
|
|
|
static void coroutine_trampoline(int i0, int i1)
|
|
{
|
|
union cc_arg arg;
|
|
CoroutineUContext *self;
|
|
Coroutine *co;
|
|
|
|
arg.i[0] = i0;
|
|
arg.i[1] = i1;
|
|
self = arg.p;
|
|
co = &self->base;
|
|
|
|
/* Initialize longjmp environment and switch back the caller */
|
|
if (!sigsetjmp(self->env, 0)) {
|
|
siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
|
|
}
|
|
|
|
while (true) {
|
|
co->entry(co->entry_arg);
|
|
qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
|
|
}
|
|
}
|
|
|
|
Coroutine *qemu_coroutine_new(void)
|
|
{
|
|
const size_t stack_size = 1 << 20;
|
|
CoroutineUContext *co;
|
|
ucontext_t old_uc, uc;
|
|
sigjmp_buf old_env;
|
|
union cc_arg arg = {0};
|
|
|
|
/* The ucontext functions preserve signal masks which incurs a
|
|
* system call overhead. sigsetjmp(buf, 0)/siglongjmp() does not
|
|
* preserve signal masks but only works on the current stack.
|
|
* Since we need a way to create and switch to a new stack, use
|
|
* the ucontext functions for that but sigsetjmp()/siglongjmp() for
|
|
* everything else.
|
|
*/
|
|
|
|
if (getcontext(&uc) == -1) {
|
|
abort();
|
|
}
|
|
|
|
co = g_malloc0(sizeof(*co));
|
|
co->stack = g_malloc(stack_size);
|
|
co->base.entry_arg = &old_env; /* stash away our jmp_buf */
|
|
|
|
uc.uc_link = &old_uc;
|
|
uc.uc_stack.ss_sp = co->stack;
|
|
uc.uc_stack.ss_size = stack_size;
|
|
uc.uc_stack.ss_flags = 0;
|
|
|
|
#ifdef CONFIG_VALGRIND_H
|
|
co->valgrind_stack_id =
|
|
VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size);
|
|
#endif
|
|
|
|
arg.p = co;
|
|
|
|
makecontext(&uc, (void (*)(void))coroutine_trampoline,
|
|
2, arg.i[0], arg.i[1]);
|
|
|
|
/* swapcontext() in, siglongjmp() back out */
|
|
if (!sigsetjmp(old_env, 0)) {
|
|
swapcontext(&old_uc, &uc);
|
|
}
|
|
return &co->base;
|
|
}
|
|
|
|
#ifdef CONFIG_VALGRIND_H
|
|
#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
|
|
/* Work around an unused variable in the valgrind.h macro... */
|
|
#pragma GCC diagnostic push
|
|
#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
|
|
#endif
|
|
static inline void valgrind_stack_deregister(CoroutineUContext *co)
|
|
{
|
|
VALGRIND_STACK_DEREGISTER(co->valgrind_stack_id);
|
|
}
|
|
#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
|
|
#pragma GCC diagnostic pop
|
|
#endif
|
|
#endif
|
|
|
|
void qemu_coroutine_delete(Coroutine *co_)
|
|
{
|
|
CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
|
|
|
|
#ifdef CONFIG_VALGRIND_H
|
|
valgrind_stack_deregister(co);
|
|
#endif
|
|
|
|
g_free(co->stack);
|
|
g_free(co);
|
|
}
|
|
|
|
/* This function is marked noinline to prevent GCC from inlining it
|
|
* into coroutine_trampoline(). If we allow it to do that then it
|
|
* hoists the code to get the address of the TLS variable "current"
|
|
* out of the while() loop. This is an invalid transformation because
|
|
* the sigsetjmp() call may be called when running thread A but
|
|
* return in thread B, and so we might be in a different thread
|
|
* context each time round the loop.
|
|
*/
|
|
CoroutineAction __attribute__((noinline))
|
|
qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
|
|
CoroutineAction action)
|
|
{
|
|
CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
|
|
CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
|
|
int ret;
|
|
|
|
current = to_;
|
|
|
|
ret = sigsetjmp(from->env, 0);
|
|
if (ret == 0) {
|
|
siglongjmp(to->env, action);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
Coroutine *qemu_coroutine_self(void)
|
|
{
|
|
if (!current) {
|
|
current = &leader.base;
|
|
}
|
|
return current;
|
|
}
|
|
|
|
bool qemu_in_coroutine(void)
|
|
{
|
|
return current && current->caller;
|
|
}
|