x86: move fpu_counter into ARCH specific thread_struct

Only a couple of arches (sh/x86) use fpu_counter in task_struct so it can
be moved out into ARCH specific thread_struct, reducing the size of
task_struct for other arches.

Compile tested i386_defconfig + gcc 4.7.3

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Paul Mundt <paul.mundt@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Vineet Gupta 2013-11-12 15:08:46 -08:00 committed by Linus Torvalds
parent 616c05d110
commit c375f15a43
6 changed files with 19 additions and 10 deletions

View File

@ -365,7 +365,7 @@ static inline void drop_fpu(struct task_struct *tsk)
* Forget coprocessor state..
*/
preempt_disable();
tsk->fpu_counter = 0;
tsk->thread.fpu_counter = 0;
__drop_fpu(tsk);
clear_used_math();
preempt_enable();
@ -424,7 +424,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
* or if the past 5 consecutive context-switches used math.
*/
fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
new->fpu_counter > 5);
new->thread.fpu_counter > 5);
if (__thread_has_fpu(old)) {
if (!__save_init_fpu(old))
cpu = ~0;
@ -433,16 +433,16 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
/* Don't change CR0.TS if we just switch! */
if (fpu.preload) {
new->fpu_counter++;
new->thread.fpu_counter++;
__thread_set_has_fpu(new);
prefetch(new->thread.fpu.state);
} else if (!use_eager_fpu())
stts();
} else {
old->fpu_counter = 0;
old->thread.fpu_counter = 0;
old->thread.fpu.last_cpu = ~0;
if (fpu.preload) {
new->fpu_counter++;
new->thread.fpu_counter++;
if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
fpu.preload = 0;
else

View File

@ -488,6 +488,15 @@ struct thread_struct {
unsigned long iopl;
/* Max allowed port in the bitmap, in bytes: */
unsigned io_bitmap_max;
/*
* fpu_counter contains the number of consecutive context switches
* that the FPU is used. If this is over a threshold, the lazy fpu
* saving becomes unlazy to save the trap. This is an unsigned char
* so that after 256 times the counter wraps and the behavior turns
* lazy again; this to deal with bursty apps that only use FPU for
* a short time
*/
unsigned char fpu_counter;
};
/*

View File

@ -100,7 +100,7 @@ void unlazy_fpu(struct task_struct *tsk)
__save_init_fpu(tsk);
__thread_fpu_end(tsk);
} else
tsk->fpu_counter = 0;
tsk->thread.fpu_counter = 0;
preempt_enable();
}
EXPORT_SYMBOL(unlazy_fpu);

View File

@ -153,7 +153,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
childregs->orig_ax = -1;
childregs->cs = __KERNEL_CS | get_kernel_rpl();
childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
p->fpu_counter = 0;
p->thread.fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
return 0;
@ -166,7 +166,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.ip = (unsigned long) ret_from_fork;
task_user_gs(p) = get_user_gs(current_pt_regs());
p->fpu_counter = 0;
p->thread.fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
tsk = current;
err = -ENOMEM;

View File

@ -163,7 +163,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.sp = (unsigned long) childregs;
p->thread.usersp = me->thread.usersp;
set_tsk_thread_flag(p, TIF_FORK);
p->fpu_counter = 0;
p->thread.fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);

View File

@ -653,7 +653,7 @@ void math_state_restore(void)
return;
}
tsk->fpu_counter++;
tsk->thread.fpu_counter++;
}
EXPORT_SYMBOL_GPL(math_state_restore);