rt: Inline a bunch of stack switching code

This commit is contained in:
Brian Anderson 2012-03-21 14:00:37 -07:00
parent d5968d9f38
commit b78af4f7c4
2 changed files with 143 additions and 138 deletions

View File

@ -13,53 +13,6 @@
#include "globals.h"
#include "rust_upcall.h"
// The amount of extra space at the end of each stack segment, available
// to the rt, compiler and dynamic linker for running small functions
// FIXME: We want this to be 128 but need to slim the red zone calls down
#define RZ_LINUX_32 (1024*2)
#define RZ_LINUX_64 (1024*2)
#define RZ_MAC_32 (1024*20)
#define RZ_MAC_64 (1024*20)
#define RZ_WIN_32 (1024*20)
#define RZ_BSD_32 (1024*20)
#define RZ_BSD_64 (1024*20)
#ifdef __linux__
#ifdef __i386__
#define RED_ZONE_SIZE RZ_LINUX_32
#endif
#ifdef __x86_64__
#define RED_ZONE_SIZE RZ_LINUX_64
#endif
#endif
#ifdef __APPLE__
#ifdef __i386__
#define RED_ZONE_SIZE RZ_MAC_32
#endif
#ifdef __x86_64__
#define RED_ZONE_SIZE RZ_MAC_64
#endif
#endif
#ifdef __WIN32__
#ifdef __i386__
#define RED_ZONE_SIZE RZ_WIN_32
#endif
#ifdef __x86_64__
#define RED_ZONE_SIZE RZ_WIN_64
#endif
#endif
#ifdef __FreeBSD__
#ifdef __i386__
#define RED_ZONE_SIZE RZ_BSD_32
#endif
#ifdef __x86_64__
#define RED_ZONE_SIZE RZ_BSD_64
#endif
#endif
extern "C" CDECL void
record_sp(void *limit);
// Tasks
rust_task::rust_task(rust_task_thread *thread, rust_task_state state,
rust_task *spawner, const char *name,
@ -494,14 +447,6 @@ rust_task::get_next_stack_size(size_t min, size_t current, size_t requested) {
return sz;
}
// The amount of stack in a segment available to Rust code
static size_t
user_stack_size(stk_seg *stk) {
return (size_t)(stk->end
- (uintptr_t)&stk->data[0]
- RED_ZONE_SIZE);
}
void
rust_task::free_stack(stk_seg *stk) {
LOGPTR(thread, "freeing stk segment", (uintptr_t)stk);
@ -509,37 +454,11 @@ rust_task::free_stack(stk_seg *stk) {
destroy_stack(&local_region, stk);
}
struct new_stack_args {
rust_task *task;
size_t requested_sz;
};
void
new_stack_slow(new_stack_args *args) {
args->task->new_stack(args->requested_sz);
}
// NB: This runs on the Rust stack
// This is the new stack fast path, in which we
// reuse the next cached stack segment
void
rust_task::new_stack_fast(size_t requested_sz) {
// The minimum stack size, in bytes, of a Rust stack, excluding red zone
size_t min_sz = thread->min_stack_size;
// Try to reuse an existing stack segment
if (stk != NULL && stk->next != NULL) {
size_t next_sz = user_stack_size(stk->next);
if (min_sz <= next_sz && requested_sz <= next_sz) {
stk = stk->next;
return;
}
}
new_stack_args args = {this, requested_sz};
call_on_c_stack(&args, (void*)new_stack_slow);
}
void
rust_task::new_stack(size_t requested_sz) {
LOG(this, mem, "creating new stack for task %" PRIxPTR, this);
@ -596,63 +515,6 @@ rust_task::new_stack(size_t requested_sz) {
total_stack_sz += user_stack_size(new_stk);
}
void *
rust_task::next_stack(size_t stk_sz, void *args_addr, size_t args_sz) {
stk_seg *maybe_next_stack = NULL;
if (stk != NULL) {
maybe_next_stack = stk->prev;
}
new_stack_fast(stk_sz + args_sz);
A(thread, stk->end - (uintptr_t)stk->data >= stk_sz + args_sz,
"Did not receive enough stack");
uint8_t *new_sp = (uint8_t*)stk->end;
// Push the function arguments to the new stack
new_sp = align_down(new_sp - args_sz);
// When reusing a stack segment we need to tell valgrind that this area of
// memory is accessible before writing to it, because the act of popping
// the stack previously made all of the stack inaccessible.
if (maybe_next_stack == stk) {
// I don't know exactly where the region ends that valgrind needs us
// to mark accessible. On x86_64 these extra bytes aren't needed, but
// on i386 we get errors without.
int fudge_bytes = 16;
reuse_valgrind_stack(stk, new_sp - fudge_bytes);
}
memcpy(new_sp, args_addr, args_sz);
record_stack_limit();
return new_sp;
}
// NB: This runs on the Rust stack
void
rust_task::prev_stack() {
// We're not going to actually delete anything now because that would
// require switching to the C stack and be costly. Instead we'll just move
// up the link list and clean up later, either in new_stack or after our
// turn ends on the scheduler.
stk = stk->prev;
record_stack_limit();
}
void
rust_task::record_stack_limit() {
I(thread, stk);
// The function prolog compares the amount of stack needed to the end of
// the stack. As an optimization, when the frame size is less than 256
// bytes, it will simply compare %esp to to the stack limit instead of
// subtracting the frame size. As a result we need our stack limit to
// account for those 256 bytes.
const unsigned LIMIT_OFFSET = 256;
A(thread,
(uintptr_t)stk->end - RED_ZONE_SIZE
- (uintptr_t)stk->data >= LIMIT_OFFSET,
"Stack size must be greater than LIMIT_OFFSET");
record_sp(stk->data + LIMIT_OFFSET + RED_ZONE_SIZE);
}
void
rust_task::cleanup_after_turn() {
// Delete any spare stack segments that were left

View File

@ -17,6 +17,50 @@
#include "rust_stack.h"
#include "rust_port_selector.h"
// The amount of extra space at the end of each stack segment, available
// to the rt, compiler and dynamic linker for running small functions
// FIXME: We want this to be 128 but need to slim the red zone calls down
#define RZ_LINUX_32 (1024*2)
#define RZ_LINUX_64 (1024*2)
#define RZ_MAC_32 (1024*20)
#define RZ_MAC_64 (1024*20)
#define RZ_WIN_32 (1024*20)
#define RZ_BSD_32 (1024*20)
#define RZ_BSD_64 (1024*20)
#ifdef __linux__
#ifdef __i386__
#define RED_ZONE_SIZE RZ_LINUX_32
#endif
#ifdef __x86_64__
#define RED_ZONE_SIZE RZ_LINUX_64
#endif
#endif
#ifdef __APPLE__
#ifdef __i386__
#define RED_ZONE_SIZE RZ_MAC_32
#endif
#ifdef __x86_64__
#define RED_ZONE_SIZE RZ_MAC_64
#endif
#endif
#ifdef __WIN32__
#ifdef __i386__
#define RED_ZONE_SIZE RZ_WIN_32
#endif
#ifdef __x86_64__
#define RED_ZONE_SIZE RZ_WIN_64
#endif
#endif
#ifdef __FreeBSD__
#ifdef __i386__
#define RED_ZONE_SIZE RZ_BSD_32
#endif
#ifdef __x86_64__
#define RED_ZONE_SIZE RZ_BSD_64
#endif
#endif
struct rust_box;
struct frame_glue_fns {
@ -303,6 +347,105 @@ rust_task::return_c_stack() {
next_c_sp = 0;
}
// NB: This runs on the Rust stack
inline void *
rust_task::next_stack(size_t stk_sz, void *args_addr, size_t args_sz) {
stk_seg *maybe_next_stack = NULL;
if (stk != NULL) {
maybe_next_stack = stk->prev;
}
new_stack_fast(stk_sz + args_sz);
A(thread, stk->end - (uintptr_t)stk->data >= stk_sz + args_sz,
"Did not receive enough stack");
uint8_t *new_sp = (uint8_t*)stk->end;
// Push the function arguments to the new stack
new_sp = align_down(new_sp - args_sz);
// When reusing a stack segment we need to tell valgrind that this area of
// memory is accessible before writing to it, because the act of popping
// the stack previously made all of the stack inaccessible.
if (maybe_next_stack == stk) {
// I don't know exactly where the region ends that valgrind needs us
// to mark accessible. On x86_64 these extra bytes aren't needed, but
// on i386 we get errors without.
int fudge_bytes = 16;
reuse_valgrind_stack(stk, new_sp - fudge_bytes);
}
memcpy(new_sp, args_addr, args_sz);
record_stack_limit();
return new_sp;
}
// The amount of stack in a segment available to Rust code
inline size_t
user_stack_size(stk_seg *stk) {
return (size_t)(stk->end
- (uintptr_t)&stk->data[0]
- RED_ZONE_SIZE);
}
struct new_stack_args {
rust_task *task;
size_t requested_sz;
};
void
new_stack_slow(new_stack_args *args);
// NB: This runs on the Rust stack
// This is the new stack fast path, in which we
// reuse the next cached stack segment
inline void
rust_task::new_stack_fast(size_t requested_sz) {
// The minimum stack size, in bytes, of a Rust stack, excluding red zone
size_t min_sz = thread->min_stack_size;
// Try to reuse an existing stack segment
if (stk != NULL && stk->next != NULL) {
size_t next_sz = user_stack_size(stk->next);
if (min_sz <= next_sz && requested_sz <= next_sz) {
stk = stk->next;
return;
}
}
new_stack_args args = {this, requested_sz};
call_on_c_stack(&args, (void*)new_stack_slow);
}
// NB: This runs on the Rust stack
inline void
rust_task::prev_stack() {
// We're not going to actually delete anything now because that would
// require switching to the C stack and be costly. Instead we'll just move
// up the link list and clean up later, either in new_stack or after our
// turn ends on the scheduler.
stk = stk->prev;
record_stack_limit();
}
extern "C" CDECL void
record_sp(void *limit);
inline void
rust_task::record_stack_limit() {
I(thread, stk);
// The function prolog compares the amount of stack needed to the end of
// the stack. As an optimization, when the frame size is less than 256
// bytes, it will simply compare %esp to to the stack limit instead of
// subtracting the frame size. As a result we need our stack limit to
// account for those 256 bytes.
const unsigned LIMIT_OFFSET = 256;
A(thread,
(uintptr_t)stk->end - RED_ZONE_SIZE
- (uintptr_t)stk->data >= LIMIT_OFFSET,
"Stack size must be greater than LIMIT_OFFSET");
record_sp(stk->data + LIMIT_OFFSET + RED_ZONE_SIZE);
}
//
// Local Variables:
// mode: C++