diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 1e45ff292bc9..3c73dc865ead 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -245,6 +245,9 @@ syscall_exit: restore_all: movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS + # Warning: OLDSS(%esp) contains the wrong/random values if we + # are returning to the kernel. + # See comments in process.c:copy_thread() for details. movb OLDSS(%esp), %ah movb CS(%esp), %al andl $(VM_MASK | (4 << 8) | 3), %eax diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index c36fedf40e95..36145efc61b5 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -405,7 +405,17 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, childregs->esp = esp; p->thread.esp = (unsigned long) childregs; - p->thread.esp0 = (unsigned long) (childregs+1); + /* + * The below -8 is to reserve 8 bytes on top of the ring0 stack. + * This is necessary to guarantee that the entire "struct pt_regs" + * is accessable even if the CPU haven't stored the SS/ESP registers + * on the stack (interrupt gate does not save these registers + * when switching to the same priv ring). + * Therefore beware: accessing the xss/esp fields of the + * "struct pt_regs" is possible, but they may contain the + * completely wrong values. + */ + p->thread.esp0 = (unsigned long) (childregs+1) - 8; p->thread.eip = (unsigned long) ret_from_fork;