linux/arch/sh/math-emu/math.c
Stuart Menefy d3ea9fa0a5 sh: Minor optimisations to FPU handling
A number of small optimisations to FPU handling, in particular:

 - move the task USEDFPU flag from the thread_info flags field (which
   is accessed asynchronously to the thread) to a new status field,
   which is only accessed by the thread itself. This allows locking to
   be removed in most cases, or can be reduced to a preempt_lock().
   This mimics the i386 behaviour.

 - move the modification of regs->sr and thread_info->status flags out
   of save_fpu() to __unlazy_fpu(). This gives the compiler a better
   chance to optimise things, as well as making save_fpu() symmetrical
   with restore_fpu() and init_fpu().

 - implement prepare_to_copy(), so that when creating a thread, we can
   unlazy the FPU prior to copying the thread data structures.

Also make sure that the FPU is disabled while in the kernel, in
particular while booting, and for newly created kernel threads,

In a very artificial benchmark, the execution time for 2500000
context switches was reduced from 50 to 45 seconds.

Signed-off-by: Stuart Menefy <stuart.menefy@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
2009-11-24 17:45:38 +09:00

630 lines
13 KiB
C

/*
* arch/sh/math-emu/math.c
*
* Copyright (C) 2006 Takashi YOSHII <takasi-y@ops.dti.ne.jp>
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*/
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/signal.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/io.h>
#include "sfp-util.h"
#include <math-emu/soft-fp.h>
#include <math-emu/single.h>
#include <math-emu/double.h>
#define FPUL (fregs->fpul)
#define FPSCR (fregs->fpscr)
#define FPSCR_RM (FPSCR&3)
#define FPSCR_DN ((FPSCR>>18)&1)
#define FPSCR_PR ((FPSCR>>19)&1)
#define FPSCR_SZ ((FPSCR>>20)&1)
#define FPSCR_FR ((FPSCR>>21)&1)
#define FPSCR_MASK 0x003fffffUL
#define BANK(n) (n^(FPSCR_FR?16:0))
#define FR ((unsigned long*)(fregs->fp_regs))
#define FR0 (FR[BANK(0)])
#define FRn (FR[BANK(n)])
#define FRm (FR[BANK(m)])
#define DR ((unsigned long long*)(fregs->fp_regs))
#define DRn (DR[BANK(n)/2])
#define DRm (DR[BANK(m)/2])
#define XREG(n) (n^16)
#define XFn (FR[BANK(XREG(n))])
#define XFm (FR[BANK(XREG(m))])
#define XDn (DR[BANK(XREG(n))/2])
#define XDm (DR[BANK(XREG(m))/2])
#define R0 (regs->regs[0])
#define Rn (regs->regs[n])
#define Rm (regs->regs[m])
#define WRITE(d,a) ({if(put_user(d, (typeof (d)*)a)) return -EFAULT;})
#define READ(d,a) ({if(get_user(d, (typeof (d)*)a)) return -EFAULT;})
#define PACK_S(r,f) FP_PACK_SP(&r,f)
#define UNPACK_S(f,r) FP_UNPACK_SP(f,&r)
#define PACK_D(r,f) \
{u32 t[2]; FP_PACK_DP(t,f); ((u32*)&r)[0]=t[1]; ((u32*)&r)[1]=t[0];}
#define UNPACK_D(f,r) \
{u32 t[2]; t[0]=((u32*)&r)[1]; t[1]=((u32*)&r)[0]; FP_UNPACK_DP(f,t);}
// 2 args instructions.
#define BOTH_PRmn(op,x) \
FP_DECL_EX; if(FPSCR_PR) op(D,x,DRm,DRn); else op(S,x,FRm,FRn);
#define CMP_X(SZ,R,M,N) do{ \
FP_DECL_##SZ(Fm); FP_DECL_##SZ(Fn); \
UNPACK_##SZ(Fm, M); UNPACK_##SZ(Fn, N); \
FP_CMP_##SZ(R, Fn, Fm, 2); }while(0)
#define EQ_X(SZ,R,M,N) do{ \
FP_DECL_##SZ(Fm); FP_DECL_##SZ(Fn); \
UNPACK_##SZ(Fm, M); UNPACK_##SZ(Fn, N); \
FP_CMP_EQ_##SZ(R, Fn, Fm); }while(0)
#define CMP(OP) ({ int r; BOTH_PRmn(OP##_X,r); r; })
static int
fcmp_gt(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m, int n)
{
if (CMP(CMP) > 0)
regs->sr |= 1;
else
regs->sr &= ~1;
return 0;
}
static int
fcmp_eq(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m, int n)
{
if (CMP(CMP /*EQ*/) == 0)
regs->sr |= 1;
else
regs->sr &= ~1;
return 0;
}
#define ARITH_X(SZ,OP,M,N) do{ \
FP_DECL_##SZ(Fm); FP_DECL_##SZ(Fn); FP_DECL_##SZ(Fr); \
UNPACK_##SZ(Fm, M); UNPACK_##SZ(Fn, N); \
FP_##OP##_##SZ(Fr, Fn, Fm); \
PACK_##SZ(N, Fr); }while(0)
static int
fadd(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m, int n)
{
BOTH_PRmn(ARITH_X, ADD);
return 0;
}
static int
fsub(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m, int n)
{
BOTH_PRmn(ARITH_X, SUB);
return 0;
}
static int
fmul(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m, int n)
{
BOTH_PRmn(ARITH_X, MUL);
return 0;
}
static int
fdiv(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m, int n)
{
BOTH_PRmn(ARITH_X, DIV);
return 0;
}
static int
fmac(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m, int n)
{
FP_DECL_EX;
FP_DECL_S(Fr);
FP_DECL_S(Ft);
FP_DECL_S(F0);
FP_DECL_S(Fm);
FP_DECL_S(Fn);
UNPACK_S(F0, FR0);
UNPACK_S(Fm, FRm);
UNPACK_S(Fn, FRn);
FP_MUL_S(Ft, Fm, F0);
FP_ADD_S(Fr, Fn, Ft);
PACK_S(FRn, Fr);
return 0;
}
// to process fmov's extension (odd n for DR access XD).
#define FMOV_EXT(x) if(x&1) x+=16-1
static int
fmov_idx_reg(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
int n)
{
if (FPSCR_SZ) {
FMOV_EXT(n);
READ(FRn, Rm + R0 + 4);
n++;
READ(FRn, Rm + R0);
} else {
READ(FRn, Rm + R0);
}
return 0;
}
static int
fmov_mem_reg(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
int n)
{
if (FPSCR_SZ) {
FMOV_EXT(n);
READ(FRn, Rm + 4);
n++;
READ(FRn, Rm);
} else {
READ(FRn, Rm);
}
return 0;
}
static int
fmov_inc_reg(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
int n)
{
if (FPSCR_SZ) {
FMOV_EXT(n);
READ(FRn, Rm + 4);
n++;
READ(FRn, Rm);
Rm += 8;
} else {
READ(FRn, Rm);
Rm += 4;
}
return 0;
}
static int
fmov_reg_idx(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
int n)
{
if (FPSCR_SZ) {
FMOV_EXT(m);
WRITE(FRm, Rn + R0 + 4);
m++;
WRITE(FRm, Rn + R0);
} else {
WRITE(FRm, Rn + R0);
}
return 0;
}
static int
fmov_reg_mem(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
int n)
{
if (FPSCR_SZ) {
FMOV_EXT(m);
WRITE(FRm, Rn + 4);
m++;
WRITE(FRm, Rn);
} else {
WRITE(FRm, Rn);
}
return 0;
}
static int
fmov_reg_dec(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
int n)
{
if (FPSCR_SZ) {
FMOV_EXT(m);
Rn -= 8;
WRITE(FRm, Rn + 4);
m++;
WRITE(FRm, Rn);
} else {
Rn -= 4;
WRITE(FRm, Rn);
}
return 0;
}
static int
fmov_reg_reg(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
int n)
{
if (FPSCR_SZ) {
FMOV_EXT(m);
FMOV_EXT(n);
DRn = DRm;
} else {
FRn = FRm;
}
return 0;
}
static int
fnop_mn(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m, int n)
{
return -EINVAL;
}
// 1 arg instructions.
#define NOTYETn(i) static int i(struct sh_fpu_soft_struct *fregs, int n) \
{ printk( #i " not yet done.\n"); return 0; }
NOTYETn(ftrv)
NOTYETn(fsqrt)
NOTYETn(fipr)
NOTYETn(fsca)
NOTYETn(fsrra)
#define EMU_FLOAT_X(SZ,N) do { \
FP_DECL_##SZ(Fn); \
FP_FROM_INT_##SZ(Fn, FPUL, 32, int); \
PACK_##SZ(N, Fn); }while(0)
static int ffloat(struct sh_fpu_soft_struct *fregs, int n)
{
FP_DECL_EX;
if (FPSCR_PR)
EMU_FLOAT_X(D, DRn);
else
EMU_FLOAT_X(S, FRn);
return 0;
}
#define EMU_FTRC_X(SZ,N) do { \
FP_DECL_##SZ(Fn); \
UNPACK_##SZ(Fn, N); \
FP_TO_INT_##SZ(FPUL, Fn, 32, 1); }while(0)
static int ftrc(struct sh_fpu_soft_struct *fregs, int n)
{
FP_DECL_EX;
if (FPSCR_PR)
EMU_FTRC_X(D, DRn);
else
EMU_FTRC_X(S, FRn);
return 0;
}
static int fcnvsd(struct sh_fpu_soft_struct *fregs, int n)
{
FP_DECL_EX;
FP_DECL_S(Fn);
FP_DECL_D(Fr);
UNPACK_S(Fn, FPUL);
FP_CONV(D, S, 2, 1, Fr, Fn);
PACK_D(DRn, Fr);
return 0;
}
static int fcnvds(struct sh_fpu_soft_struct *fregs, int n)
{
FP_DECL_EX;
FP_DECL_D(Fn);
FP_DECL_S(Fr);
UNPACK_D(Fn, DRn);
FP_CONV(S, D, 1, 2, Fr, Fn);
PACK_S(FPUL, Fr);
return 0;
}
static int fxchg(struct sh_fpu_soft_struct *fregs, int flag)
{
FPSCR ^= flag;
return 0;
}
static int fsts(struct sh_fpu_soft_struct *fregs, int n)
{
FRn = FPUL;
return 0;
}
static int flds(struct sh_fpu_soft_struct *fregs, int n)
{
FPUL = FRn;
return 0;
}
static int fneg(struct sh_fpu_soft_struct *fregs, int n)
{
FRn ^= (1 << (_FP_W_TYPE_SIZE - 1));
return 0;
}
static int fabs(struct sh_fpu_soft_struct *fregs, int n)
{
FRn &= ~(1 << (_FP_W_TYPE_SIZE - 1));
return 0;
}
static int fld0(struct sh_fpu_soft_struct *fregs, int n)
{
FRn = 0;
return 0;
}
static int fld1(struct sh_fpu_soft_struct *fregs, int n)
{
FRn = (_FP_EXPBIAS_S << (_FP_FRACBITS_S - 1));
return 0;
}
static int fnop_n(struct sh_fpu_soft_struct *fregs, int n)
{
return -EINVAL;
}
/// Instruction decoders.
static int id_fxfd(struct sh_fpu_soft_struct *, int);
static int id_fnxd(struct sh_fpu_soft_struct *, struct pt_regs *, int, int);
static int (*fnxd[])(struct sh_fpu_soft_struct *, int) = {
fsts, flds, ffloat, ftrc, fneg, fabs, fsqrt, fsrra,
fld0, fld1, fcnvsd, fcnvds, fnop_n, fnop_n, fipr, id_fxfd
};
static int (*fnmx[])(struct sh_fpu_soft_struct *, struct pt_regs *, int, int) = {
fadd, fsub, fmul, fdiv, fcmp_eq, fcmp_gt, fmov_idx_reg, fmov_reg_idx,
fmov_mem_reg, fmov_inc_reg, fmov_reg_mem, fmov_reg_dec,
fmov_reg_reg, id_fnxd, fmac, fnop_mn};
static int id_fxfd(struct sh_fpu_soft_struct *fregs, int x)
{
const int flag[] = { FPSCR_SZ, FPSCR_PR, FPSCR_FR, 0 };
switch (x & 3) {
case 3:
fxchg(fregs, flag[x >> 2]);
break;
case 1:
ftrv(fregs, x - 1);
break;
default:
fsca(fregs, x);
}
return 0;
}
static int
id_fnxd(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int x, int n)
{
return (fnxd[x])(fregs, n);
}
static int
id_fnmx(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, u16 code)
{
int n = (code >> 8) & 0xf, m = (code >> 4) & 0xf, x = code & 0xf;
return (fnmx[x])(fregs, regs, m, n);
}
static int
id_sys(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, u16 code)
{
int n = ((code >> 8) & 0xf);
unsigned long *reg = (code & 0x0010) ? &FPUL : &FPSCR;
switch (code & 0xf0ff) {
case 0x005a:
case 0x006a:
Rn = *reg;
break;
case 0x405a:
case 0x406a:
*reg = Rn;
break;
case 0x4052:
case 0x4062:
Rn -= 4;
WRITE(*reg, Rn);
break;
case 0x4056:
case 0x4066:
READ(*reg, Rn);
Rn += 4;
break;
default:
return -EINVAL;
}
return 0;
}
static int fpu_emulate(u16 code, struct sh_fpu_soft_struct *fregs, struct pt_regs *regs)
{
if ((code & 0xf000) == 0xf000)
return id_fnmx(fregs, regs, code);
else
return id_sys(fregs, regs, code);
}
/**
* denormal_to_double - Given denormalized float number,
* store double float
*
* @fpu: Pointer to sh_fpu_hard structure
* @n: Index to FP register
*/
static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
{
unsigned long du, dl;
unsigned long x = fpu->fpul;
int exp = 1023 - 126;
if (x != 0 && (x & 0x7f800000) == 0) {
du = (x & 0x80000000);
while ((x & 0x00800000) == 0) {
x <<= 1;
exp--;
}
x &= 0x007fffff;
du |= (exp << 20) | (x >> 3);
dl = x << 29;
fpu->fp_regs[n] = du;
fpu->fp_regs[n+1] = dl;
}
}
/**
* ieee_fpe_handler - Handle denormalized number exception
*
* @regs: Pointer to register structure
*
* Returns 1 when it's handled (should not cause exception).
*/
static int ieee_fpe_handler(struct pt_regs *regs)
{
unsigned short insn = *(unsigned short *)regs->pc;
unsigned short finsn;
unsigned long nextpc;
siginfo_t info;
int nib[4] = {
(insn >> 12) & 0xf,
(insn >> 8) & 0xf,
(insn >> 4) & 0xf,
insn & 0xf};
if (nib[0] == 0xb ||
(nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
regs->pr = regs->pc + 4;
if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
finsn = *(unsigned short *) (regs->pc + 2);
} else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
if (regs->sr & 1)
nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
else
nextpc = regs->pc + 4;
finsn = *(unsigned short *) (regs->pc + 2);
} else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
if (regs->sr & 1)
nextpc = regs->pc + 4;
else
nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
finsn = *(unsigned short *) (regs->pc + 2);
} else if (nib[0] == 0x4 && nib[3] == 0xb &&
(nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
nextpc = regs->regs[nib[1]];
finsn = *(unsigned short *) (regs->pc + 2);
} else if (nib[0] == 0x0 && nib[3] == 0x3 &&
(nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
nextpc = regs->pc + 4 + regs->regs[nib[1]];
finsn = *(unsigned short *) (regs->pc + 2);
} else if (insn == 0x000b) { /* rts */
nextpc = regs->pr;
finsn = *(unsigned short *) (regs->pc + 2);
} else {
nextpc = regs->pc + 2;
finsn = insn;
}
if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
struct task_struct *tsk = current;
if ((tsk->thread.fpu.hard.fpscr & (1 << 17))) {
/* FPU error */
denormal_to_double (&tsk->thread.fpu.hard,
(finsn >> 8) & 0xf);
tsk->thread.fpu.hard.fpscr &=
~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
task_thread_info(tsk)->status |= TS_USEDFPU;
} else {
info.si_signo = SIGFPE;
info.si_errno = 0;
info.si_code = FPE_FLTINV;
info.si_addr = (void __user *)regs->pc;
force_sig_info(SIGFPE, &info, tsk);
}
regs->pc = nextpc;
return 1;
}
return 0;
}
asmlinkage void do_fpu_error(unsigned long r4, unsigned long r5,
unsigned long r6, unsigned long r7,
struct pt_regs regs)
{
struct task_struct *tsk = current;
siginfo_t info;
if (ieee_fpe_handler (&regs))
return;
regs.pc += 2;
info.si_signo = SIGFPE;
info.si_errno = 0;
info.si_code = FPE_FLTINV;
info.si_addr = (void __user *)regs.pc;
force_sig_info(SIGFPE, &info, tsk);
}
/**
* fpu_init - Initialize FPU registers
* @fpu: Pointer to software emulated FPU registers.
*/
static void fpu_init(struct sh_fpu_soft_struct *fpu)
{
int i;
fpu->fpscr = FPSCR_INIT;
fpu->fpul = 0;
for (i = 0; i < 16; i++) {
fpu->fp_regs[i] = 0;
fpu->xfp_regs[i]= 0;
}
}
/**
* do_fpu_inst - Handle reserved instructions for FPU emulation
* @inst: instruction code.
* @regs: registers on stack.
*/
int do_fpu_inst(unsigned short inst, struct pt_regs *regs)
{
struct task_struct *tsk = current;
struct sh_fpu_soft_struct *fpu = &(tsk->thread.fpu.soft);
if (!(task_thread_info(tsk)->status & TS_USEDFPU)) {
/* initialize once. */
fpu_init(fpu);
task_thread_info(tsk)->status |= TS_USEDFPU;
}
return fpu_emulate(inst, fpu, regs);
}