Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86-64: move clts into batch cpu state updates when preloading fpu
  x86-64: move unlazy_fpu() into lazy cpu state part of context switch
  x86-32: make sure clts is batched during context switch
  x86: split out core __math_state_restore
This commit is contained in:
Linus Torvalds 2009-09-14 07:58:08 -07:00
commit 625037cc40
4 changed files with 61 additions and 33 deletions

View File

@ -26,6 +26,7 @@ extern void fpu_init(void);
extern void mxcsr_feature_mask_init(void);
extern int init_fpu(struct task_struct *child);
extern asmlinkage void math_state_restore(void);
extern void __math_state_restore(void);
extern void init_thread_xstate(void);
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);

View File

@ -350,14 +350,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*next = &next_p->thread;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
bool preload_fpu;
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
/*
* If the task has used fpu the last 5 timeslices, just do a full
* restore of the math state immediately to avoid the trap; the
* chances of needing FPU soon are obviously high now
*/
preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
__unlazy_fpu(prev_p);
/* we're going to use this soon, after a few expensive things */
if (next_p->fpu_counter > 5)
if (preload_fpu)
prefetch(next->xstate);
/*
@ -398,6 +405,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
__switch_to_xtra(prev_p, next_p, tss);
/* If we're going to preload the fpu context, make sure clts
is run while we're batching the cpu state updates. */
if (preload_fpu)
clts();
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
@ -407,15 +419,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
arch_end_context_switch(next_p);
/* If the task has used fpu the last 5 timeslices, just do a full
* restore of the math state immediately to avoid the trap; the
* chances of needing FPU soon are obviously high now
*
* tsk_used_math() checks prevent calling math_state_restore(),
* which can sleep in the case of !tsk_used_math()
*/
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
math_state_restore();
if (preload_fpu)
__math_state_restore();
/*
* Restore %gs if needed (which is common)

View File

@ -386,9 +386,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
unsigned fsindex, gsindex;
bool preload_fpu;
/*
* If the task has used fpu the last 5 timeslices, just do a full
* restore of the math state immediately to avoid the trap; the
* chances of needing FPU soon are obviously high now
*/
preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
/* we're going to use this soon, after a few expensive things */
if (next_p->fpu_counter > 5)
if (preload_fpu)
prefetch(next->xstate);
/*
@ -419,6 +427,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
load_TLS(next, cpu);
/* Must be after DS reload */
unlazy_fpu(prev_p);
/* Make sure cpu is ready for new context */
if (preload_fpu)
clts();
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
@ -459,9 +474,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
prev->gsindex = gsindex;
/* Must be after DS reload */
unlazy_fpu(prev_p);
/*
* Switch the PDA and FPU contexts.
*/
@ -480,15 +492,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p, tss);
/* If the task has used fpu the last 5 timeslices, just do a full
* restore of the math state immediately to avoid the trap; the
* chances of needing FPU soon are obviously high now
*
* tsk_used_math() checks prevent calling math_state_restore(),
* which can sleep in the case of !tsk_used_math()
/*
* Preload the FPU context, now that we've determined that the
* task is likely to be using it.
*/
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
math_state_restore();
if (preload_fpu)
__math_state_restore();
return prev_p;
}

View File

@ -794,6 +794,28 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
{
}
/*
* __math_state_restore assumes that cr0.TS is already clear and the
* fpu state is all ready for use. Used during context switch.
*/
void __math_state_restore(void)
{
struct thread_info *thread = current_thread_info();
struct task_struct *tsk = thread->task;
/*
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
*/
if (unlikely(restore_fpu_checking(tsk))) {
stts();
force_sig(SIGSEGV, tsk);
return;
}
thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
tsk->fpu_counter++;
}
/*
* 'math_state_restore()' saves the current math information in the
* old math state array, and gets the new ones from the current task
@ -825,17 +847,8 @@ asmlinkage void math_state_restore(void)
}
clts(); /* Allow maths ops (or we recurse) */
/*
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
*/
if (unlikely(restore_fpu_checking(tsk))) {
stts();
force_sig(SIGSEGV, tsk);
return;
}
thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
tsk->fpu_counter++;
__math_state_restore();
}
EXPORT_SYMBOL_GPL(math_state_restore);