diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 1a830cbd7015..dfdbf6403895 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -126,6 +126,7 @@ void foo(void) #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) BLANK(); OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); + OFFSET(LGUEST_DATA_irq_pending, lguest_data, irq_pending); OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir); BLANK(); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 37b8c1d3e022..514f4d0d2bfa 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -179,7 +179,7 @@ static void lguest_end_context_switch(struct task_struct *next) paravirt_end_context_switch(next); } -/*G:033 +/*G:032 * After that diversion we return to our first native-instruction * replacements: four functions for interrupt control. * @@ -199,41 +199,28 @@ static unsigned long save_fl(void) { return lguest_data.irq_enabled; } -PV_CALLEE_SAVE_REGS_THUNK(save_fl); - -/* restore_flags() just sets the flags back to the value given. */ -static void restore_fl(unsigned long flags) -{ - lguest_data.irq_enabled = flags; - mb(); - /* Null hcall forces interrupt delivery now, if irq_pending is - * set to X86_EFLAGS_IF (ie. an interrupt is pending, and flags - * enables interrupts. */ - if (flags & lguest_data.irq_pending) - kvm_hypercall0(LHCALL_SEND_INTERRUPTS); -} -PV_CALLEE_SAVE_REGS_THUNK(restore_fl); /* Interrupts go off... */ static void irq_disable(void) { lguest_data.irq_enabled = 0; } + +/* Let's pause a moment. Remember how I said these are called so often? + * Jeremy Fitzhardinge optimized them so hard early in 2009 that he had to + * break some rules. In particular, these functions are assumed to save their + * own registers if they need to: normal C functions assume they can trash the + * eax register. To use normal C functions, we use + * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the + * C function, then restores it. */ +PV_CALLEE_SAVE_REGS_THUNK(save_fl); PV_CALLEE_SAVE_REGS_THUNK(irq_disable); - -/* Interrupts go on... */ -static void irq_enable(void) -{ - lguest_data.irq_enabled = X86_EFLAGS_IF; - mb(); - /* Null hcall forces interrupt delivery now. */ - if (lguest_data.irq_pending) - kvm_hypercall0(LHCALL_SEND_INTERRUPTS); - -} -PV_CALLEE_SAVE_REGS_THUNK(irq_enable); - /*:*/ + +/* These are in i386_head.S */ +extern void lg_irq_enable(void); +extern void lg_restore_fl(unsigned long flags); + /*M:003 Note that we don't check for outstanding interrupts when we re-enable * them (or when we unmask an interrupt). This seems to work for the moment, * since interrupts are rare and we'll just get the interrupt on the next timer @@ -1041,9 +1028,9 @@ __init void lguest_init(void) /* interrupt-related operations */ pv_irq_ops.init_IRQ = lguest_init_IRQ; pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); - pv_irq_ops.restore_fl = PV_CALLEE_SAVE(restore_fl); + pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl); pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable); - pv_irq_ops.irq_enable = PV_CALLEE_SAVE(irq_enable); + pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable); pv_irq_ops.safe_halt = lguest_safe_halt; /* init-time operations */ diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index 3e0c5545d59c..a9c8cfe61cd4 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S @@ -47,7 +47,63 @@ ENTRY(lguest_entry) LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled) LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) -/*:*/ + +/*G:033 But using those wrappers is inefficient (we'll see why that doesn't + * matter for save_fl and irq_disable later). If we write our routines + * carefully in assembler, we can avoid clobbering any registers and avoid + * jumping through the wrapper functions. + * + * I skipped over our first piece of assembler, but this one is worth studying + * in a bit more detail so I'll describe in easy stages. First, the routine + * to enable interrupts: */ +ENTRY(lg_irq_enable) + /* The reverse of irq_disable, this sets lguest_data.irq_enabled to + * X86_EFLAGS_IF (ie. "Interrupts enabled"). */ + movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled + /* But now we need to check if the Host wants to know: there might have + * been interrupts waiting to be delivered, in which case it will have + * set lguest_data.irq_pending to X86_EFLAGS_IF. If it's not zero, we + * jump to send_interrupts, otherwise we're done. */ + testl $0, lguest_data+LGUEST_DATA_irq_pending + jnz send_interrupts + /* One cool thing about x86 is that you can do many things without using + * a register. In this case, the normal path hasn't needed to save or + * restore any registers at all! */ + ret +send_interrupts: + /* OK, now we need a register: eax is used for the hypercall number, + * which is LHCALL_SEND_INTERRUPTS. + * + * We used not to bother with this pending detection at all, which was + * much simpler. Sooner or later the Host would realize it had to + * send us an interrupt. But that turns out to make performance 7 + * times worse on a simple tcp benchmark. So now we do this the hard + * way. */ + pushl %eax + movl $LHCALL_SEND_INTERRUPTS, %eax + /* This is a vmcall instruction (same thing that KVM uses). Older + * assembler versions might not know the "vmcall" instruction, so we + * create one manually here. */ + .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ + popl %eax + ret + +/* Finally, the "popf" or "restore flags" routine. The %eax register holds the + * flags (in practice, either X86_EFLAGS_IF or 0): if it's X86_EFLAGS_IF we're + * enabling interrupts again, if it's 0 we're leaving them off. */ +ENTRY(lg_restore_fl) + /* This is just "lguest_data.irq_enabled = flags;" */ + movl %eax, lguest_data+LGUEST_DATA_irq_enabled + /* Now, if the %eax value has enabled interrupts and + * lguest_data.irq_pending is set, we want to tell the Host so it can + * deliver any outstanding interrupts. Fortunately, both values will + * be X86_EFLAGS_IF (ie. 512) in that case, and the "testl" + * instruction will AND them together for us. If both are set, we + * jump to send_interrupts. */ + testl lguest_data+LGUEST_DATA_irq_pending, %eax + jnz send_interrupts + /* Again, the normal path has used no extra registers. Clever, huh? */ + ret /* These demark the EIP range where host should never deliver interrupts. */ .global lguest_noirq_start