Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, fpu: Merge fpu_save_init()
  x86-32, fpu: Rewrite fpu_save_init()
  x86, fpu: Remove PSHUFB_XMM5_* macros
  x86, fpu: Remove unnecessary ifdefs from i387 code.
  x86-32, fpu: Remove math_emulate stub
  x86-64, fpu: Simplify constraints for fxsave/fxtstor
  x86-64, fpu: Fix %cs value in convert_from_fxsr()
  x86-64, fpu: Disable preemption when using TS_USEDFPU
  x86, fpu: Merge __save_init_fpu()
  x86, fpu: Merge tolerant_fwait()
  x86, fpu: Merge fpu_init()
  x86: Use correct type for %cr4
  x86, xsave: Disable xsave in i387 emulation mode

Fixed up fxsaveq-induced conflict in arch/x86/include/asm/i387.h
This commit is contained in:
Linus Torvalds 2010-10-21 13:34:32 -07:00
commit b6f7e38dbb
6 changed files with 85 additions and 202 deletions

View File

@ -55,6 +55,12 @@ extern int save_i387_xstate_ia32(void __user *buf);
extern int restore_i387_xstate_ia32(void __user *buf);
#endif
#ifdef CONFIG_MATH_EMULATION
extern void finit_soft_fpu(struct i387_soft_struct *soft);
#else
static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
#endif
#define X87_FSW_ES (1 << 7) /* Exception Summary */
static __always_inline __pure bool use_xsaveopt(void)
@ -67,6 +73,11 @@ static __always_inline __pure bool use_xsave(void)
return static_cpu_has(X86_FEATURE_XSAVE);
}
static __always_inline __pure bool use_fxsr(void)
{
return static_cpu_has(X86_FEATURE_FXSR);
}
extern void __sanitize_i387_state(struct task_struct *);
static inline void sanitize_i387_state(struct task_struct *tsk)
@ -77,19 +88,11 @@ static inline void sanitize_i387_state(struct task_struct *tsk)
}
#ifdef CONFIG_X86_64
/* Ignore delayed exceptions from user space */
static inline void tolerant_fwait(void)
{
asm volatile("1: fwait\n"
"2:\n"
_ASM_EXTABLE(1b, 2b));
}
static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
{
int err;
/* See comment in fxsave() below. */
asm volatile("1: rex64/fxrstor (%[fx])\n\t"
"2:\n"
".section .fixup,\"ax\"\n"
@ -98,44 +101,10 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
".previous\n"
_ASM_EXTABLE(1b, 3b)
: [err] "=r" (err)
#if 0 /* See comment in fxsave() below. */
: [fx] "r" (fx), "m" (*fx), "0" (0));
#else
: [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
#endif
: [fx] "R" (fx), "m" (*fx), "0" (0));
return err;
}
/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
is pending. Clear the x87 state here by setting it to fixed
values. The kernel data segment can be sometimes 0 and sometimes
new user value. Both should be ok.
Use the PDA as safe address because it should be already in L1. */
static inline void fpu_clear(struct fpu *fpu)
{
struct xsave_struct *xstate = &fpu->state->xsave;
struct i387_fxsave_struct *fx = &fpu->state->fxsave;
/*
* xsave header may indicate the init state of the FP.
*/
if (use_xsave() &&
!(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
return;
if (unlikely(fx->swd & X87_FSW_ES))
asm volatile("fnclex");
alternative_input(ASM_NOP8 ASM_NOP2,
" emms\n" /* clear stack tags */
" fildl %%gs:0", /* load to clear state */
X86_FEATURE_FXSAVE_LEAK);
}
static inline void clear_fpu_state(struct task_struct *tsk)
{
fpu_clear(&tsk->thread.fpu);
}
static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
{
int err;
@ -149,6 +118,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
if (unlikely(err))
return -EFAULT;
/* See comment in fxsave() below. */
asm volatile("1: rex64/fxsave (%[fx])\n\t"
"2:\n"
".section .fixup,\"ax\"\n"
@ -157,11 +127,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
".previous\n"
_ASM_EXTABLE(1b, 3b)
: [err] "=r" (err), "=m" (*fx)
#if 0 /* See comment in fxsave() below. */
: [fx] "r" (fx), "0" (0));
#else
: [fx] "cdaSDb" (fx), "0" (0));
#endif
: [fx] "R" (fx), "0" (0));
if (unlikely(err) &&
__clear_user(fx, sizeof(struct i387_fxsave_struct)))
err = -EFAULT;
@ -175,56 +141,29 @@ static inline void fpu_fxsave(struct fpu *fpu)
uses any extended registers for addressing, a second REX prefix
will be generated (to the assembler, rex64 followed by semicolon
is a separate instruction), and hence the 64-bitness is lost. */
#ifdef CONFIG_AS_FXSAVEQ
/* Using "fxsaveq %0" would be the ideal choice, but is only supported
starting with gas 2.16. */
__asm__ __volatile__("fxsaveq %0"
: "=m" (fpu->state->fxsave));
#elif 0
#else
/* Using, as a workaround, the properly prefixed form below isn't
accepted by any binutils version so far released, complaining that
the same type of prefix is used twice if an extended register is
needed for addressing (fix submitted to mainline 2005-11-21). */
__asm__ __volatile__("rex64/fxsave %0"
: "=m" (fpu->state->fxsave));
#else
/* This, however, we can work around by forcing the compiler to select
needed for addressing (fix submitted to mainline 2005-11-21).
asm volatile("rex64/fxsave %0"
: "=m" (fpu->state->fxsave));
This, however, we can work around by forcing the compiler to select
an addressing mode that doesn't require extended registers. */
__asm__ __volatile__("rex64/fxsave (%1)"
: "=m" (fpu->state->fxsave)
: "cdaSDb" (&fpu->state->fxsave));
asm volatile("rex64/fxsave (%[fx])"
: "=m" (fpu->state->fxsave)
: [fx] "R" (&fpu->state->fxsave));
#endif
}
static inline void fpu_save_init(struct fpu *fpu)
{
if (use_xsave())
fpu_xsave(fpu);
else
fpu_fxsave(fpu);
fpu_clear(fpu);
}
static inline void __save_init_fpu(struct task_struct *tsk)
{
fpu_save_init(&tsk->thread.fpu);
task_thread_info(tsk)->status &= ~TS_USEDFPU;
}
#else /* CONFIG_X86_32 */
#ifdef CONFIG_MATH_EMULATION
extern void finit_soft_fpu(struct i387_soft_struct *soft);
#else
static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
#endif
static inline void tolerant_fwait(void)
{
asm volatile("fnclex ; fwait");
}
/* perform fxrstor iff the processor has extended states, otherwise frstor */
static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
{
@ -241,6 +180,14 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
return 0;
}
static inline void fpu_fxsave(struct fpu *fpu)
{
asm volatile("fxsave %[fx]"
: [fx] "=m" (fpu->state->fxsave));
}
#endif /* CONFIG_X86_64 */
/* We need a safe address that is cheap to find and that is already
in L1 during context switch. The best choices are unfortunately
different for UP and SMP */
@ -256,47 +203,33 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
static inline void fpu_save_init(struct fpu *fpu)
{
if (use_xsave()) {
struct xsave_struct *xstate = &fpu->state->xsave;
struct i387_fxsave_struct *fx = &fpu->state->fxsave;
fpu_xsave(fpu);
/*
* xsave header may indicate the init state of the FP.
*/
if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
goto end;
if (unlikely(fx->swd & X87_FSW_ES))
asm volatile("fnclex");
/*
* we can do a simple return here or be paranoid :)
*/
goto clear_state;
if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
return;
} else if (use_fxsr()) {
fpu_fxsave(fpu);
} else {
asm volatile("fsave %[fx]; fwait"
: [fx] "=m" (fpu->state->fsave));
return;
}
/* Use more nops than strictly needed in case the compiler
varies code */
alternative_input(
"fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4,
"fxsave %[fx]\n"
"bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
X86_FEATURE_FXSR,
[fx] "m" (fpu->state->fxsave),
[fsw] "m" (fpu->state->fxsave.swd) : "memory");
clear_state:
if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES))
asm volatile("fnclex");
/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
is pending. Clear the x87 state here by setting it to fixed
values. safe_address is a random variable that should be in L1 */
alternative_input(
GENERIC_NOP8 GENERIC_NOP2,
ASM_NOP8 ASM_NOP2,
"emms\n\t" /* clear stack tags */
"fildl %[addr]", /* set F?P to defined value */
"fildl %P[addr]", /* set F?P to defined value */
X86_FEATURE_FXSAVE_LEAK,
[addr] "m" (safe_address));
end:
;
}
static inline void __save_init_fpu(struct task_struct *tsk)
@ -305,9 +238,6 @@ static inline void __save_init_fpu(struct task_struct *tsk)
task_thread_info(tsk)->status &= ~TS_USEDFPU;
}
#endif /* CONFIG_X86_64 */
static inline int fpu_fxrstor_checking(struct fpu *fpu)
{
return fxrstor_checking(&fpu->state->fxsave);
@ -344,7 +274,10 @@ static inline void __unlazy_fpu(struct task_struct *tsk)
static inline void __clear_fpu(struct task_struct *tsk)
{
if (task_thread_info(tsk)->status & TS_USEDFPU) {
tolerant_fwait();
/* Ignore delayed exceptions from user space */
asm volatile("1: fwait\n"
"2:\n"
_ASM_EXTABLE(1b, 2b));
task_thread_info(tsk)->status &= ~TS_USEDFPU;
stts();
}
@ -405,19 +338,6 @@ static inline void irq_ts_restore(int TS_state)
stts();
}
#ifdef CONFIG_X86_64
static inline void save_init_fpu(struct task_struct *tsk)
{
__save_init_fpu(tsk);
stts();
}
#define unlazy_fpu __unlazy_fpu
#define clear_fpu __clear_fpu
#else /* CONFIG_X86_32 */
/*
* These disable preemption on their own and are safe
*/
@ -443,8 +363,6 @@ static inline void clear_fpu(struct task_struct *tsk)
preempt_enable();
}
#endif /* CONFIG_X86_64 */
/*
* i387 state interaction
*/
@ -508,7 +426,4 @@ extern void fpu_finit(struct fpu *fpu);
#endif /* __ASSEMBLY__ */
#define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5
#define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5
#endif /* _ASM_X86_I387_H */

View File

@ -604,7 +604,7 @@ extern unsigned long mmu_cr4_features;
static inline void set_in_cr4(unsigned long mask)
{
unsigned cr4;
unsigned long cr4;
mmu_cr4_features |= mask;
cr4 = read_cr4();
@ -614,7 +614,7 @@ static inline void set_in_cr4(unsigned long mask)
static inline void clear_in_cr4(unsigned long mask)
{
unsigned cr4;
unsigned long cr4;
mmu_cr4_features &= ~mask;
cr4 = read_cr4();

View File

@ -1269,13 +1269,6 @@ void __cpuinit cpu_init(void)
clear_all_debug_regs();
dbg_restore_debug_regs();
/*
* Force FPU initialization:
*/
current_thread_info()->status = 0;
clear_used_math();
mxcsr_feature_mask_init();
fpu_init();
xsave_init();
}

View File

@ -68,19 +68,22 @@ static void __cpuinit init_thread_xstate(void)
*/
if (!HAVE_HWFP) {
/*
* Disable xsave as we do not support it if i387
* emulation is enabled.
*/
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
xstate_size = sizeof(struct i387_soft_struct);
return;
}
if (cpu_has_fxsr)
xstate_size = sizeof(struct i387_fxsave_struct);
#ifdef CONFIG_X86_32
else
xstate_size = sizeof(struct i387_fsave_struct);
#endif
}
#ifdef CONFIG_X86_64
/*
* Called at bootup to set up the initial FPU state that is later cloned
* into all processes.
@ -88,12 +91,21 @@ static void __cpuinit init_thread_xstate(void)
void __cpuinit fpu_init(void)
{
unsigned long oldcr0 = read_cr0();
unsigned long cr0;
unsigned long cr4_mask = 0;
set_in_cr4(X86_CR4_OSFXSR);
set_in_cr4(X86_CR4_OSXMMEXCPT);
if (cpu_has_fxsr)
cr4_mask |= X86_CR4_OSFXSR;
if (cpu_has_xmm)
cr4_mask |= X86_CR4_OSXMMEXCPT;
if (cr4_mask)
set_in_cr4(cr4_mask);
write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */
cr0 = read_cr0();
cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
if (!HAVE_HWFP)
cr0 |= X86_CR0_EM;
write_cr0(cr0);
if (!smp_processor_id())
init_thread_xstate();
@ -104,24 +116,12 @@ void __cpuinit fpu_init(void)
clear_used_math();
}
#else /* CONFIG_X86_64 */
void __cpuinit fpu_init(void)
{
if (!smp_processor_id())
init_thread_xstate();
}
#endif /* CONFIG_X86_32 */
void fpu_finit(struct fpu *fpu)
{
#ifdef CONFIG_X86_32
if (!HAVE_HWFP) {
finit_soft_fpu(&fpu->state->soft);
return;
}
#endif
if (cpu_has_fxsr) {
struct i387_fxsave_struct *fx = &fpu->state->fxsave;
@ -386,19 +386,17 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
#ifdef CONFIG_X86_64
env->fip = fxsave->rip;
env->foo = fxsave->rdp;
/*
* should be actually ds/cs at fpu exception time, but
* that information is not available in 64bit mode.
*/
env->fcs = task_pt_regs(tsk)->cs;
if (tsk == current) {
/*
* should be actually ds/cs at fpu exception time, but
* that information is not available in 64bit mode.
*/
asm("mov %%ds, %[fos]" : [fos] "=r" (env->fos));
asm("mov %%cs, %[fcs]" : [fcs] "=r" (env->fcs));
savesegment(ds, env->fos);
} else {
struct pt_regs *regs = task_pt_regs(tsk);
env->fos = 0xffff0000 | tsk->thread.ds;
env->fcs = regs->cs;
env->fos = tsk->thread.ds;
}
env->fos |= 0xffff0000;
#else
env->fip = fxsave->fip;
env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);

View File

@ -424,7 +424,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
load_TLS(next, cpu);
/* Must be after DS reload */
unlazy_fpu(prev_p);
__unlazy_fpu(prev_p);
/* Make sure cpu is ready for new context */
if (preload_fpu)

View File

@ -776,21 +776,10 @@ asmlinkage void math_state_restore(void)
}
EXPORT_SYMBOL_GPL(math_state_restore);
#ifndef CONFIG_MATH_EMULATION
void math_emulate(struct math_emu_info *info)
{
printk(KERN_EMERG
"math-emulation not enabled and no coprocessor found.\n");
printk(KERN_EMERG "killing %s.\n", current->comm);
force_sig(SIGFPE, current);
schedule();
}
#endif /* CONFIG_MATH_EMULATION */
dotraplinkage void __kprobes
do_device_not_available(struct pt_regs *regs, long error_code)
{
#ifdef CONFIG_X86_32
#ifdef CONFIG_MATH_EMULATION
if (read_cr0() & X86_CR0_EM) {
struct math_emu_info info = { };
@ -798,12 +787,12 @@ do_device_not_available(struct pt_regs *regs, long error_code)
info.regs = regs;
math_emulate(&info);
} else {
math_state_restore(); /* interrupts still off */
conditional_sti(regs);
return;
}
#else
math_state_restore();
#endif
math_state_restore(); /* interrupts still off */
#ifdef CONFIG_X86_32
conditional_sti(regs);
#endif
}
@ -881,18 +870,6 @@ void __init trap_init(void)
#endif
#ifdef CONFIG_X86_32
if (cpu_has_fxsr) {
printk(KERN_INFO "Enabling fast FPU save and restore... ");
set_in_cr4(X86_CR4_OSFXSR);
printk("done.\n");
}
if (cpu_has_xmm) {
printk(KERN_INFO
"Enabling unmasked SIMD FPU exception support... ");
set_in_cr4(X86_CR4_OSXMMEXCPT);
printk("done.\n");
}
set_system_trap_gate(SYSCALL_VECTOR, &system_call);
set_bit(SYSCALL_VECTOR, used_vectors);
#endif