Remove two allocations from spawning a green task

Two unfortunate allocations were wrapping a proc() in a proc() with GreenTask::build_start_wrapper, and then boxing this proc in a ~proc() inside of Context::new(). Both of these allocations were a direct result from two conditions: 1. The Context::new() function has a nice api of taking a procedure argument to start up a new context with. This inherently required an allocation by build_start_wrapper because extra code needed to be run around the edges of a user-provided proc() for a new task. 2. The initial bootstrap code only understood how to pass one argument to the next function. By modifying the assembly and entry points to understand more than one argument, more information is passed through in registers instead of allocating a pointer-sized context. This is sadly where I end up throwing mips under a bus because I have no idea what's going on in the mips context switching code and don't know how to modify it. Closes #7767 cc #11389
2014-02-10 16:13:50 -08:00 · 2014-02-10 16:13:50 -08:00 · 301ff0c2df
parent 21a064d5a3
commit 301ff0c2df
9 changed files with 175 additions and 120 deletions
--- a/mk/crates.mk
+++ b/mk/crates.mk
@ -57,7 +57,7 @@ TOOLS := compiletest rustdoc rustc

 DEPS_std := native:rustrt native:compiler-rt
 DEPS_extra := std term sync serialize getopts collections
-DEPS_green := std
+DEPS_green := std native:context_switch
 DEPS_rustuv := std native:uv native:uv_support
 DEPS_native := std
 DEPS_syntax := std extra term serialize collections
--- a/mk/rt.mk
+++ b/mk/rt.mk
@ -35,7 +35,7 @@
 # that's per-target so you're allowed to conditionally add files based on the
 # target.
 ################################################################################
-NATIVE_LIBS := rustrt sundown uv_support morestack miniz
+NATIVE_LIBS := rustrt sundown uv_support morestack miniz context_switch

 # $(1) is the target triple
 define NATIVE_LIBRARIES
@ -54,9 +54,10 @@ NATIVE_DEPS_rustrt_$(1) := rust_builtin.c \
 			rust_android_dummy.c \
 			rust_test_helpers.c \
 			rust_try.ll \
-			arch/$$(HOST_$(1))/_context.S \
 			arch/$$(HOST_$(1))/record_sp.S
 NATIVE_DEPS_morestack_$(1) := arch/$$(HOST_$(1))/morestack.S
+NATIVE_DEPS_context_switch_$(1) := \
+			arch/$$(HOST_$(1))/_context.S

 ################################################################################
 # You shouldn't find it that necessary to edit anything below this line.
--- a/src/libgreen/context.rs
+++ b/src/libgreen/context.rs
@ -8,12 +8,12 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

-use std::libc::c_void;
 use std::uint;
 use std::cast::{transmute, transmute_mut_unsafe,
                transmute_region, transmute_mut_region};
 use stack::Stack;
 use std::unstable::stack;
+use std::unstable::raw;

 // FIXME #7761: Registers is boxed so that it is 16-byte aligned, for storing
 // SSE regs.  It would be marginally better not to do this. In C++ we
@ -22,47 +22,33 @@ use std::unstable::stack;
 // the registers are sometimes empty, but the discriminant would
 // then misalign the regs again.
 pub struct Context {
-    /// The context entry point, saved here for later destruction
-    priv start: Option<~proc()>,
    /// Hold the registers while the task or scheduler is suspended
    priv regs: ~Registers,
    /// Lower bound and upper bound for the stack
    priv stack_bounds: Option<(uint, uint)>,
 }

+pub type InitFn = extern "C" fn(uint, *(), *()) -> !;
+
 impl Context {
    pub fn empty() -> Context {
        Context {
-            start: None,
            regs: new_regs(),
            stack_bounds: None,
        }
    }

    /// Create a new context that will resume execution by running proc()
-    pub fn new(start: proc(), stack: &mut Stack) -> Context {
-        // The C-ABI function that is the task entry point
-        //
-        // Note that this function is a little sketchy. We're taking a
-        // procedure, transmuting it to a stack-closure, and then calling to
-        // closure. This leverages the fact that the representation of these two
-        // types is the same.
-        //
-        // The reason that we're doing this is that this procedure is expected
-        // to never return. The codegen which frees the environment of the
-        // procedure occurs *after* the procedure has completed, and this means
-        // that we'll never actually free the procedure.
-        //
-        // To solve this, we use this transmute (to not trigger the procedure
-        // deallocation here), and then store a copy of the procedure in the
-        // `Context` structure returned. When the `Context` is deallocated, then
-        // the entire procedure box will be deallocated as well.
-        extern fn task_start_wrapper(f: &proc()) {
-            unsafe {
-                let f: &|| = transmute(f);
-                (*f)()
-            }
-        }
+    ///
+    /// The `init` function will be run with `arg` and the `start` procedure
+    /// split up into code and env pointers. It is required that the `init`
+    /// function never return.
+    ///
+    /// FIXME: this is basically an awful the interface. The main reason for
+    ///        this is to reduce the number of allocations made when a green
+    ///        task is spawned as much as possible
+    pub fn new(init: InitFn, arg: uint, start: proc(),
+               stack: &mut Stack) -> Context {

        let sp: *uint = stack.end();
        let sp: *mut uint = unsafe { transmute_mut_unsafe(sp) };
@ -74,14 +60,10 @@ impl Context {
                                transmute_region(&*regs));
        };

-        // FIXME #7767: Putting main into a ~ so it's a thin pointer and can
-        // be passed to the spawn function.  Another unfortunate
-        // allocation
-        let start = ~start;
-
        initialize_call_frame(&mut *regs,
-                              task_start_wrapper as *c_void,
-                              unsafe { transmute(&*start) },
+                              init,
+                              arg,
+                              unsafe { transmute(start) },
                              sp);

        // Scheduler tasks don't have a stack in the "we allocated it" sense,
@ -96,7 +78,6 @@ impl Context {
            Some((stack_base as uint, sp as uint))
        };
        return Context {
-            start: Some(start),
            regs: regs,
            stack_bounds: bounds,
        }
@ -138,7 +119,7 @@ impl Context {
    }
 }

-#[link(name = "rustrt", kind = "static")]
+#[link(name = "context_switch", kind = "static")]
 extern {
    fn rust_swap_registers(out_regs: *mut Registers, in_regs: *Registers);
 }
@ -185,13 +166,17 @@ fn new_regs() -> ~Registers {
 }

 #[cfg(target_arch = "x86")]
-fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void,
-                         sp: *mut uint) {
+fn initialize_call_frame(regs: &mut Registers, fptr: InitFn, arg: uint,
+                         procedure: raw::Procedure, sp: *mut uint) {

+    // x86 has interesting stack alignment requirements, so do some alignment
+    // plus some offsetting to figure out what the actual stack should be.
    let sp = align_down(sp);
    let sp = mut_offset(sp, -4);

-    unsafe { *sp = arg as uint };
+    unsafe { *mut_offset(sp, 2) = procedure.env as uint };
+    unsafe { *mut_offset(sp, 1) = procedure.code as uint };
+    unsafe { *mut_offset(sp, 0) = arg as uint };
    let sp = mut_offset(sp, -1);
    unsafe { *sp = 0 }; // The final return address

@ -215,14 +200,18 @@ fn new_regs() -> ~Registers { ~([0, .. 34]) }
 fn new_regs() -> ~Registers { ~([0, .. 22]) }

 #[cfg(target_arch = "x86_64")]
-fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void,
-                         sp: *mut uint) {
+fn initialize_call_frame(regs: &mut Registers, fptr: InitFn, arg: uint,
+                         procedure: raw::Procedure, sp: *mut uint) {
+    extern { fn rust_bootstrap_green_task(); }

    // Redefinitions from rt/arch/x86_64/regs.h
-    static RUSTRT_ARG0: uint = 3;
    static RUSTRT_RSP: uint = 1;
    static RUSTRT_IP: uint = 8;
    static RUSTRT_RBP: uint = 2;
+    static RUSTRT_R12: uint = 4;
+    static RUSTRT_R13: uint = 5;
+    static RUSTRT_R14: uint = 6;
+    static RUSTRT_R15: uint = 7;

    let sp = align_down(sp);
    let sp = mut_offset(sp, -1);
@ -231,13 +220,23 @@ fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void,
    unsafe { *sp = 0; }

    rtdebug!("creating call frame");
-    rtdebug!("fptr {}", fptr);
-    rtdebug!("arg {}", arg);
+    rtdebug!("fptr {:#x}", fptr as uint);
+    rtdebug!("arg {:#x}", arg);
    rtdebug!("sp {}", sp);

-    regs[RUSTRT_ARG0] = arg as uint;
+    // These registers are frobbed by rust_bootstrap_green_task into the right
+    // location so we can invoke the "real init function", `fptr`.
+    regs[RUSTRT_R12] = arg as uint;
+    regs[RUSTRT_R13] = procedure.code as uint;
+    regs[RUSTRT_R14] = procedure.env as uint;
+    regs[RUSTRT_R15] = fptr as uint;
+
+    // These registers are picked up by the regulard context switch paths. These
+    // will put us in "mostly the right context" except for frobbing all the
+    // arguments to the right place. We have the small trampoline code inside of
+    // rust_bootstrap_green_task to do that.
    regs[RUSTRT_RSP] = sp as uint;
-    regs[RUSTRT_IP] = fptr as uint;
+    regs[RUSTRT_IP] = rust_bootstrap_green_task as uint;

    // Last base pointer on the stack should be 0
    regs[RUSTRT_RBP] = 0;
@ -250,8 +249,10 @@ type Registers = [uint, ..32];
 fn new_regs() -> ~Registers { ~([0, .. 32]) }

 #[cfg(target_arch = "arm")]
-fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void,
-                         sp: *mut uint) {
+fn initialize_call_frame(regs: &mut Registers, fptr: InitFn, arg: uint,
+                         procedure: raw::Procedure, sp: *mut uint) {
+    extern { fn rust_bootstrap_green_task(); }
+
    let sp = align_down(sp);
    // sp of arm eabi is 8-byte aligned
    let sp = mut_offset(sp, -2);
@ -259,9 +260,15 @@ fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void,
    // The final return address. 0 indicates the bottom of the stack
    unsafe { *sp = 0; }

-    regs[0] = arg as uint;   // r0
-    regs[13] = sp as uint;   // #53 sp, r13
-    regs[14] = fptr as uint; // #60 pc, r15 --> lr
+    // ARM uses the same technique as x86_64 to have a landing pad for the start
+    // of all new green tasks. Neither r1/r2 are saved on a context switch, so
+    // the shim will copy r3/r4 into r1/r2 and then execute the function in r5
+    regs[0] = arg as uint;              // r0
+    regs[3] = procedure.code as uint;   // r3
+    regs[4] = procedure.env as uint;    // r4
+    regs[5] = fptr as uint;             // r5
+    regs[13] = sp as uint;                          // #52 sp, r13
+    regs[14] = rust_bootstrap_green_task as uint;   // #56 pc, r14 --> lr
 }

 #[cfg(target_arch = "mips")]
@ -271,8 +278,8 @@ type Registers = [uint, ..32];
 fn new_regs() -> ~Registers { ~([0, .. 32]) }

 #[cfg(target_arch = "mips")]
-fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void,
-                         sp: *mut uint) {
+fn initialize_call_frame(regs: &mut Registers, fptr: InitFn, arg: uint,
+                         procedure: raw::Procedure, sp: *mut uint) {
    let sp = align_down(sp);
    // sp of mips o32 is 8-byte aligned
    let sp = mut_offset(sp, -2);
--- a/src/libgreen/coroutine.rs
+++ b/src/libgreen/coroutine.rs
@ -11,8 +11,6 @@
 // Coroutines represent nothing more than a context and a stack
 // segment.

-use std::rt::env;
-
 use context::Context;
 use stack::{StackPool, Stack};

@ -31,22 +29,6 @@ pub struct Coroutine {
 }

 impl Coroutine {
-    pub fn new(stack_pool: &mut StackPool,
-               stack_size: Option<uint>,
-               start: proc())
-               -> Coroutine {
-        let stack_size = match stack_size {
-            Some(size) => size,
-            None => env::min_stack()
-        };
-        let mut stack = stack_pool.take_stack(stack_size);
-        let initial_context = Context::new(start, &mut stack);
-        Coroutine {
-            current_stack_segment: stack,
-            saved_context: initial_context
-        }
-    }
-
    pub fn empty() -> Coroutine {
        Coroutine {
            current_stack_segment: unsafe { Stack::dummy_stack() },
--- a/src/libgreen/sched.rs
+++ b/src/libgreen/sched.rs
@ -756,7 +756,7 @@ impl Scheduler {

    /// Called by a running task to end execution, after which it will
    /// be recycled by the scheduler for reuse in a new task.
-    pub fn terminate_current_task(mut ~self, cur: ~GreenTask) {
+    pub fn terminate_current_task(mut ~self, cur: ~GreenTask) -> ! {
        // Similar to deschedule running task and then, but cannot go through
        // the task-blocking path. The task is already dying.
        let stask = self.sched_task.take_unwrap();
--- a/src/libgreen/task.rs
+++ b/src/libgreen/task.rs
@ -19,13 +19,16 @@
 //! values.

 use std::cast;
+use std::rt::env;
 use std::rt::Runtime;
-use std::rt::rtio;
 use std::rt::local::Local;
+use std::rt::rtio;
 use std::rt::task::{Task, BlockedTask, SendMessage};
 use std::task::TaskOpts;
 use std::unstable::mutex::Mutex;
+use std::unstable::raw;

+use context::Context;
 use coroutine::Coroutine;
 use sched::{Scheduler, SchedHandle, RunOnce};
 use stack::StackPool;
@ -75,6 +78,50 @@ pub enum Home {
    HomeSched(SchedHandle),
 }

+/// Trampoline code for all new green tasks which are running around. This
+/// function is passed through to Context::new as the initial rust landing pad
+/// for all green tasks. This code is actually called after the initial context
+/// switch onto a green thread.
+///
+/// The first argument to this function is the `~GreenTask` pointer, and the
+/// next two arguments are the user-provided procedure for running code.
+///
+/// The goal for having this weird-looking function is to reduce the number of
+/// allocations done on a green-task startup as much as possible.
+extern fn bootstrap_green_task(task: uint, code: *(), env: *()) -> ! {
+    // Acquire ownership of the `proc()`
+    let start: proc() = unsafe {
+        cast::transmute(raw::Procedure { code: code, env: env })
+    };
+
+    // Acquire ownership of the `~GreenTask`
+    let mut task: ~GreenTask = unsafe { cast::transmute(task) };
+
+    // First code after swap to this new context. Run our cleanup job
+    task.pool_id = {
+        let sched = task.sched.get_mut_ref();
+        sched.run_cleanup_job();
+        sched.task_state.increment();
+        sched.pool_id
+    };
+
+    // Convert our green task to a libstd task and then execute the code
+    // requested. This is the "try/catch" block for this green task and
+    // is the wrapper for *all* code run in the task.
+    let mut start = Some(start);
+    let task = task.swap().run(|| start.take_unwrap()());
+
+    // Once the function has exited, it's time to run the termination
+    // routine. This means we need to context switch one more time but
+    // clean ourselves up on the other end. Since we have no way of
+    // preserving a handle to the GreenTask down to this point, this
+    // unfortunately must call `GreenTask::convert`. In order to avoid
+    // this we could add a `terminate` function to the `Runtime` trait
+    // in libstd, but that seems less appropriate since the coversion
+    // method exists.
+    GreenTask::convert(task).terminate()
+}
+
 impl GreenTask {
    /// Creates a new green task which is not homed to any particular scheduler
    /// and will not have any contained Task structure.
@ -89,9 +136,20 @@ impl GreenTask {
                     stack_size: Option<uint>,
                     home: Home,
                     start: proc()) -> ~GreenTask {
+        // Allocate ourselves a GreenTask structure
        let mut ops = GreenTask::new_typed(None, TypeGreen(Some(home)));
-        let start = GreenTask::build_start_wrapper(start, ops.as_uint());
-        ops.coroutine = Some(Coroutine::new(stack_pool, stack_size, start));
+
+        // Allocate a stack for us to run on
+        let stack_size = stack_size.unwrap_or_else(|| env::min_stack());
+        let mut stack = stack_pool.take_stack(stack_size);
+        let context = Context::new(bootstrap_green_task, ops.as_uint(), start,
+                                   &mut stack);
+
+        // Package everything up in a coroutine and return
+        ops.coroutine = Some(Coroutine {
+            current_stack_segment: stack,
+            saved_context: context,
+        });
        return ops;
    }

@ -156,46 +214,6 @@ impl GreenTask {
        }
    }

-    /// Builds a function which is the actual starting execution point for a
-    /// rust task. This function is the glue necessary to execute the libstd
-    /// task and then clean up the green thread after it exits.
-    ///
-    /// The second argument to this function is actually a transmuted copy of
-    /// the `GreenTask` pointer. Context switches in the scheduler silently
-    /// transfer ownership of the `GreenTask` to the other end of the context
-    /// switch, so because this is the first code that is running in this task,
-    /// it must first re-acquire ownership of the green task.
-    pub fn build_start_wrapper(start: proc(), ops: uint) -> proc() {
-        proc() {
-            // First code after swap to this new context. Run our
-            // cleanup job after we have re-acquired ownership of the green
-            // task.
-            let mut task: ~GreenTask = unsafe { GreenTask::from_uint(ops) };
-            task.pool_id = {
-                let sched = task.sched.get_mut_ref();
-                sched.run_cleanup_job();
-                sched.task_state.increment();
-                sched.pool_id
-            };
-
-            // Convert our green task to a libstd task and then execute the code
-            // requested. This is the "try/catch" block for this green task and
-            // is the wrapper for *all* code run in the task.
-            let mut start = Some(start);
-            let task = task.swap().run(|| start.take_unwrap()());
-
-            // Once the function has exited, it's time to run the termination
-            // routine. This means we need to context switch one more time but
-            // clean ourselves up on the other end. Since we have no way of
-            // preserving a handle to the GreenTask down to this point, this
-            // unfortunately must call `GreenTask::convert`. In order to avoid
-            // this we could add a `terminate` function to the `Runtime` trait
-            // in libstd, but that seems less appropriate since the coversion
-            // method exists.
-            GreenTask::convert(task).terminate();
-        }
-    }
-
    pub fn give_home(&mut self, new_home: Home) {
        match self.task_type {
            TypeGreen(ref mut home) => { *home = Some(new_home); }
@ -278,9 +296,9 @@ impl GreenTask {
        Local::put(self.swap());
    }

-    fn terminate(mut ~self) {
+    fn terminate(mut ~self) -> ! {
        let sched = self.sched.take_unwrap();
-        sched.terminate_current_task(self);
+        sched.terminate_current_task(self)
    }

    // This function is used to remotely wakeup this green task back on to its
--- a/src/libstd/unstable/raw.rs
+++ b/src/libstd/unstable/raw.rs
@ -41,6 +41,12 @@ pub struct Closure {
    env: *(),
 }

+/// The representation of a Rust procedure (`proc()`)
+pub struct Procedure {
+    code: *(),
+    env: *(),
+}
+
 /// This trait is meant to map equivalences between raw structs and their
 /// corresponding rust values.
 pub trait Repr<T> {
--- a/src/rt/arch/arm/_context.S
+++ b/src/rt/arch/arm/_context.S
@ -51,3 +51,11 @@ rust_swap_registers:
 	msr cpsr_cxsf, r2

 	mov pc, lr
+
+// For reasons of this existence, see the comments in x86_64/_context.S
+.globl rust_bootstrap_green_task
+rust_bootstrap_green_task:
+        mov r0, r0
+        mov r1, r3
+        mov r2, r4
+        mov pc, r5
--- a/src/rt/arch/x86_64/_context.S
+++ b/src/rt/arch/x86_64/_context.S
@ -157,3 +157,36 @@ SWAP_REGISTERS:
        // Jump to the instruction pointer
        // found in regs:
        jmp *(RUSTRT_IP*8)(ARG1)
+
+// This function below, rust_bootstrap_green_task, is used to initialize a green
+// task. This code is the very first code that is run whenever a green task
+// starts. The only assumptions that this code makes is that it has a register
+// context previously set up by Context::new() and some values are in some
+// special registers.
+//
+// In theory the register context could be set up and then the context switching
+// would plop us directly into some 'extern "C" fn', but not all platforms have
+// the argument registers saved throughout a context switch (linux doesn't save
+// rdi/rsi, the first two argument registers). Instead of modifying all context
+// switches, instead the initial data for starting a green thread is shoved into
+// unrelated registers (r12/13, etc) which always need to be saved on context
+// switches anyway.
+//
+// With this strategy we get the benefit of being able to pass a fair bit of
+// contextual data from the start of a green task to its init function, as well
+// as not hindering any context switches.
+//
+// If you alter this code in any way, you likely need to update
+// src/libgreen/context.rs as well.
+
+#if defined(__APPLE__)
+#define BOOTSTRAP _rust_bootstrap_green_task
+#else
+#define BOOTSTRAP rust_bootstrap_green_task
+#endif
+.globl BOOTSTRAP
+BOOTSTRAP:
+    mov %r12, RUSTRT_ARG0_S
+    mov %r13, RUSTRT_ARG1_S
+    mov %r14, RUSTRT_ARG2_S
+    jmpq *%r15