rt: Inline a bunch of stack switching code

2012-03-21 14:00:37 -07:00 · 2012-03-21 14:00:37 -07:00 · b78af4f7c4
commit b78af4f7c4
parent d5968d9f38
2 changed files with 143 additions and 138 deletions
--- a/src/rt/rust_task.cpp
+++ b/src/rt/rust_task.cpp
@ -13,53 +13,6 @@
 #include "globals.h"
 #include "rust_upcall.h"

-// The amount of extra space at the end of each stack segment, available
-// to the rt, compiler and dynamic linker for running small functions
-// FIXME: We want this to be 128 but need to slim the red zone calls down
-#define RZ_LINUX_32 (1024*2)
-#define RZ_LINUX_64 (1024*2)
-#define RZ_MAC_32   (1024*20)
-#define RZ_MAC_64   (1024*20)
-#define RZ_WIN_32   (1024*20)
-#define RZ_BSD_32   (1024*20)
-#define RZ_BSD_64   (1024*20)
-
-#ifdef __linux__
-#ifdef __i386__
-#define RED_ZONE_SIZE RZ_LINUX_32
-#endif
-#ifdef __x86_64__
-#define RED_ZONE_SIZE RZ_LINUX_64
-#endif
-#endif
-#ifdef __APPLE__
-#ifdef __i386__
-#define RED_ZONE_SIZE RZ_MAC_32
-#endif
-#ifdef __x86_64__
-#define RED_ZONE_SIZE RZ_MAC_64
-#endif
-#endif
-#ifdef __WIN32__
-#ifdef __i386__
-#define RED_ZONE_SIZE RZ_WIN_32
-#endif
-#ifdef __x86_64__
-#define RED_ZONE_SIZE RZ_WIN_64
-#endif
-#endif
-#ifdef __FreeBSD__
-#ifdef __i386__
-#define RED_ZONE_SIZE RZ_BSD_32
-#endif
-#ifdef __x86_64__
-#define RED_ZONE_SIZE RZ_BSD_64
-#endif
-#endif
-
-extern "C" CDECL void
-record_sp(void *limit);
-
 // Tasks
 rust_task::rust_task(rust_task_thread *thread, rust_task_state state,
                     rust_task *spawner, const char *name,
@ -494,14 +447,6 @@ rust_task::get_next_stack_size(size_t min, size_t current, size_t requested) {
    return sz;
 }

-// The amount of stack in a segment available to Rust code
-static size_t
-user_stack_size(stk_seg *stk) {
-    return (size_t)(stk->end
-                    - (uintptr_t)&stk->data[0]
-                    - RED_ZONE_SIZE);
-}
-
 void
 rust_task::free_stack(stk_seg *stk) {
    LOGPTR(thread, "freeing stk segment", (uintptr_t)stk);
@ -509,37 +454,11 @@ rust_task::free_stack(stk_seg *stk) {
    destroy_stack(&local_region, stk);
 }

-struct new_stack_args {
-    rust_task *task;
-    size_t requested_sz;
-};
-
 void
 new_stack_slow(new_stack_args *args) {
    args->task->new_stack(args->requested_sz);
 }

-// NB: This runs on the Rust stack
-// This is the new stack fast path, in which we
-// reuse the next cached stack segment
-void
-rust_task::new_stack_fast(size_t requested_sz) {
-    // The minimum stack size, in bytes, of a Rust stack, excluding red zone
-    size_t min_sz = thread->min_stack_size;
-
-    // Try to reuse an existing stack segment
-    if (stk != NULL && stk->next != NULL) {
-        size_t next_sz = user_stack_size(stk->next);
-        if (min_sz <= next_sz && requested_sz <= next_sz) {
-            stk = stk->next;
-            return;
-        }
-    }
-
-    new_stack_args args = {this, requested_sz};
-    call_on_c_stack(&args, (void*)new_stack_slow);
-}
-
 void
 rust_task::new_stack(size_t requested_sz) {
    LOG(this, mem, "creating new stack for task %" PRIxPTR, this);
@ -596,63 +515,6 @@ rust_task::new_stack(size_t requested_sz) {
    total_stack_sz += user_stack_size(new_stk);
 }

-void *
-rust_task::next_stack(size_t stk_sz, void *args_addr, size_t args_sz) {
-    stk_seg *maybe_next_stack = NULL;
-    if (stk != NULL) {
-        maybe_next_stack = stk->prev;
-    }
-
-    new_stack_fast(stk_sz + args_sz);
-    A(thread, stk->end - (uintptr_t)stk->data >= stk_sz + args_sz,
-      "Did not receive enough stack");
-    uint8_t *new_sp = (uint8_t*)stk->end;
-    // Push the function arguments to the new stack
-    new_sp = align_down(new_sp - args_sz);
-
-    // When reusing a stack segment we need to tell valgrind that this area of
-    // memory is accessible before writing to it, because the act of popping
-    // the stack previously made all of the stack inaccessible.
-    if (maybe_next_stack == stk) {
-        // I don't know exactly where the region ends that valgrind needs us
-        // to mark accessible. On x86_64 these extra bytes aren't needed, but
-        // on i386 we get errors without.
-        int fudge_bytes = 16;
-        reuse_valgrind_stack(stk, new_sp - fudge_bytes);
-    }
-
-    memcpy(new_sp, args_addr, args_sz);
-    record_stack_limit();
-    return new_sp;
-}
-
-// NB: This runs on the Rust stack
-void
-rust_task::prev_stack() {
-    // We're not going to actually delete anything now because that would
-    // require switching to the C stack and be costly. Instead we'll just move
-    // up the link list and clean up later, either in new_stack or after our
-    // turn ends on the scheduler.
-    stk = stk->prev;
-    record_stack_limit();
-}
-
-void
-rust_task::record_stack_limit() {
-    I(thread, stk);
-    // The function prolog compares the amount of stack needed to the end of
-    // the stack. As an optimization, when the frame size is less than 256
-    // bytes, it will simply compare %esp to to the stack limit instead of
-    // subtracting the frame size. As a result we need our stack limit to
-    // account for those 256 bytes.
-    const unsigned LIMIT_OFFSET = 256;
-    A(thread,
-      (uintptr_t)stk->end - RED_ZONE_SIZE
-      - (uintptr_t)stk->data >= LIMIT_OFFSET,
-      "Stack size must be greater than LIMIT_OFFSET");
-    record_sp(stk->data + LIMIT_OFFSET + RED_ZONE_SIZE);
-}
-
 void
 rust_task::cleanup_after_turn() {
    // Delete any spare stack segments that were left
--- a/src/rt/rust_task.h
+++ b/src/rt/rust_task.h
@ -17,6 +17,50 @@
 #include "rust_stack.h"
 #include "rust_port_selector.h"

+// The amount of extra space at the end of each stack segment, available
+// to the rt, compiler and dynamic linker for running small functions
+// FIXME: We want this to be 128 but need to slim the red zone calls down
+#define RZ_LINUX_32 (1024*2)
+#define RZ_LINUX_64 (1024*2)
+#define RZ_MAC_32   (1024*20)
+#define RZ_MAC_64   (1024*20)
+#define RZ_WIN_32   (1024*20)
+#define RZ_BSD_32   (1024*20)
+#define RZ_BSD_64   (1024*20)
+
+#ifdef __linux__
+#ifdef __i386__
+#define RED_ZONE_SIZE RZ_LINUX_32
+#endif
+#ifdef __x86_64__
+#define RED_ZONE_SIZE RZ_LINUX_64
+#endif
+#endif
+#ifdef __APPLE__
+#ifdef __i386__
+#define RED_ZONE_SIZE RZ_MAC_32
+#endif
+#ifdef __x86_64__
+#define RED_ZONE_SIZE RZ_MAC_64
+#endif
+#endif
+#ifdef __WIN32__
+#ifdef __i386__
+#define RED_ZONE_SIZE RZ_WIN_32
+#endif
+#ifdef __x86_64__
+#define RED_ZONE_SIZE RZ_WIN_64
+#endif
+#endif
+#ifdef __FreeBSD__
+#ifdef __i386__
+#define RED_ZONE_SIZE RZ_BSD_32
+#endif
+#ifdef __x86_64__
+#define RED_ZONE_SIZE RZ_BSD_64
+#endif
+#endif
+
 struct rust_box;

 struct frame_glue_fns {
@ -303,6 +347,105 @@ rust_task::return_c_stack() {
    next_c_sp = 0;
 }

+// NB: This runs on the Rust stack
+inline void *
+rust_task::next_stack(size_t stk_sz, void *args_addr, size_t args_sz) {
+    stk_seg *maybe_next_stack = NULL;
+    if (stk != NULL) {
+        maybe_next_stack = stk->prev;
+    }
+
+    new_stack_fast(stk_sz + args_sz);
+    A(thread, stk->end - (uintptr_t)stk->data >= stk_sz + args_sz,
+      "Did not receive enough stack");
+    uint8_t *new_sp = (uint8_t*)stk->end;
+    // Push the function arguments to the new stack
+    new_sp = align_down(new_sp - args_sz);
+
+    // When reusing a stack segment we need to tell valgrind that this area of
+    // memory is accessible before writing to it, because the act of popping
+    // the stack previously made all of the stack inaccessible.
+    if (maybe_next_stack == stk) {
+        // I don't know exactly where the region ends that valgrind needs us
+        // to mark accessible. On x86_64 these extra bytes aren't needed, but
+        // on i386 we get errors without.
+        int fudge_bytes = 16;
+        reuse_valgrind_stack(stk, new_sp - fudge_bytes);
+    }
+
+    memcpy(new_sp, args_addr, args_sz);
+    record_stack_limit();
+    return new_sp;
+}
+
+// The amount of stack in a segment available to Rust code
+inline size_t
+user_stack_size(stk_seg *stk) {
+    return (size_t)(stk->end
+                    - (uintptr_t)&stk->data[0]
+                    - RED_ZONE_SIZE);
+}
+
+struct new_stack_args {
+    rust_task *task;
+    size_t requested_sz;
+};
+
+void
+new_stack_slow(new_stack_args *args);
+
+// NB: This runs on the Rust stack
+// This is the new stack fast path, in which we
+// reuse the next cached stack segment
+inline void
+rust_task::new_stack_fast(size_t requested_sz) {
+    // The minimum stack size, in bytes, of a Rust stack, excluding red zone
+    size_t min_sz = thread->min_stack_size;
+
+    // Try to reuse an existing stack segment
+    if (stk != NULL && stk->next != NULL) {
+        size_t next_sz = user_stack_size(stk->next);
+        if (min_sz <= next_sz && requested_sz <= next_sz) {
+            stk = stk->next;
+            return;
+        }
+    }
+
+    new_stack_args args = {this, requested_sz};
+    call_on_c_stack(&args, (void*)new_stack_slow);
+}
+
+// NB: This runs on the Rust stack
+inline void
+rust_task::prev_stack() {
+    // We're not going to actually delete anything now because that would
+    // require switching to the C stack and be costly. Instead we'll just move
+    // up the link list and clean up later, either in new_stack or after our
+    // turn ends on the scheduler.
+    stk = stk->prev;
+    record_stack_limit();
+}
+
+extern "C" CDECL void
+record_sp(void *limit);
+
+inline void
+rust_task::record_stack_limit() {
+    I(thread, stk);
+    // The function prolog compares the amount of stack needed to the end of
+    // the stack. As an optimization, when the frame size is less than 256
+    // bytes, it will simply compare %esp to to the stack limit instead of
+    // subtracting the frame size. As a result we need our stack limit to
+    // account for those 256 bytes.
+    const unsigned LIMIT_OFFSET = 256;
+    A(thread,
+      (uintptr_t)stk->end - RED_ZONE_SIZE
+      - (uintptr_t)stk->data >= LIMIT_OFFSET,
+      "Stack size must be greater than LIMIT_OFFSET");
+    record_sp(stk->data + LIMIT_OFFSET + RED_ZONE_SIZE);
+}
+
+
 //
 // Local Variables:
 // mode: C++