xtensa: fix fast_syscall_spill_registers

The original implementation could clobber registers under certain conditions.

The Xtensa processor architecture uses windowed registers and the original
implementation was using a4 as a temporary register, which under certain
conditions could be register a0 of the oldest window frame, and didn't always
restore the content correctly.

By moving the _spill_registers routine inside the fast system call, it frees
up one more register (the return address is not required anymore) for the
spill routine.

Signed-off-by: Chris Zankel <chris@zankel.net>
This commit is contained in:
Chris Zankel 2014-01-28 22:09:51 -08:00
parent 3251f1e27a
commit 6b5a1f74e5
1 changed files with 175 additions and 210 deletions

View File

@ -1081,34 +1081,202 @@ ENTRY(fast_syscall_spill_registers)
rsr a0, sar
s32i a3, a2, PT_AREG3
s32i a0, a2, PT_SAR
/* The spill routine might clobber a4, a7, a8, a11, a12, and a15. */
s32i a4, a2, PT_AREG4
s32i a0, a2, PT_AREG5 # store SAR to PT_AREG5
/* The spill routine might clobber a7, a11, and a15. */
s32i a7, a2, PT_AREG7
s32i a8, a2, PT_AREG8
s32i a11, a2, PT_AREG11
s32i a12, a2, PT_AREG12
s32i a15, a2, PT_AREG15
call0 _spill_registers # destroys a3, a4, and SAR
/*
* Rotate ws so that the current windowbase is at bit 0.
* Assume ws = xxxwww1yy (www1 current window frame).
* Rotate ws right so that a4 = yyxxxwww1.
*/
rsr a0, windowbase
rsr a3, windowstart # a3 = xxxwww1yy
ssr a0 # holds WB
slli a0, a3, WSBITS
or a3, a3, a0 # a3 = xxxwww1yyxxxwww1yy
srl a3, a3 # a3 = 00xxxwww1yyxxxwww1
/* We are done if there are no more than the current register frame. */
extui a3, a3, 1, WSBITS-1 # a3 = 0yyxxxwww
movi a0, (1 << (WSBITS-1))
_beqz a3, .Lnospill # only one active frame? jump
/* We want 1 at the top, so that we return to the current windowbase */
or a3, a3, a0 # 1yyxxxwww
/* Skip empty frames - get 'oldest' WINDOWSTART-bit. */
wsr a3, windowstart # save shifted windowstart
neg a0, a3
and a3, a0, a3 # first bit set from right: 000010000
ffs_ws a0, a3 # a0: shifts to skip empty frames
movi a3, WSBITS
sub a0, a3, a0 # WSBITS-a0:number of 0-bits from right
ssr a0 # save in SAR for later.
rsr a3, windowbase
add a3, a3, a0
wsr a3, windowbase
rsync
rsr a3, windowstart
srl a3, a3 # shift windowstart
/* WB is now just one frame below the oldest frame in the register
window. WS is shifted so the oldest frame is in bit 0, thus, WB
and WS differ by one 4-register frame. */
/* Save frames. Depending what call was used (call4, call8, call12),
* we have to save 4,8. or 12 registers.
*/
.Lloop: _bbsi.l a3, 1, .Lc4
_bbci.l a3, 2, .Lc12
.Lc8: s32e a4, a13, -16
l32e a4, a5, -12
s32e a8, a4, -32
s32e a5, a13, -12
s32e a6, a13, -8
s32e a7, a13, -4
s32e a9, a4, -28
s32e a10, a4, -24
s32e a11, a4, -20
srli a11, a3, 2 # shift windowbase by 2
rotw 2
_bnei a3, 1, .Lloop
j .Lexit
.Lc4: s32e a4, a9, -16
s32e a5, a9, -12
s32e a6, a9, -8
s32e a7, a9, -4
srli a7, a3, 1
rotw 1
_bnei a3, 1, .Lloop
j .Lexit
.Lc12: _bbci.l a3, 3, .Linvalid_mask # bit 2 shouldn't be zero!
/* 12-register frame (call12) */
l32e a0, a5, -12
s32e a8, a0, -48
mov a8, a0
s32e a9, a8, -44
s32e a10, a8, -40
s32e a11, a8, -36
s32e a12, a8, -32
s32e a13, a8, -28
s32e a14, a8, -24
s32e a15, a8, -20
srli a15, a3, 3
/* The stack pointer for a4..a7 is out of reach, so we rotate the
* window, grab the stackpointer, and rotate back.
* Alternatively, we could also use the following approach, but that
* makes the fixup routine much more complicated:
* rotw 1
* s32e a0, a13, -16
* ...
* rotw 2
*/
rotw 1
mov a4, a13
rotw -1
s32e a4, a8, -16
s32e a5, a8, -12
s32e a6, a8, -8
s32e a7, a8, -4
rotw 3
_beqi a3, 1, .Lexit
j .Lloop
.Lexit:
/* Done. Do the final rotation and set WS */
rotw 1
rsr a3, windowbase
ssl a3
movi a3, 1
sll a3, a3
wsr a3, windowstart
.Lnospill:
/* Advance PC, restore registers and SAR, and return from exception. */
l32i a3, a2, PT_AREG5
l32i a4, a2, PT_AREG4
l32i a3, a2, PT_SAR
l32i a0, a2, PT_AREG0
wsr a3, sar
l32i a3, a2, PT_AREG3
/* Restore clobbered registers. */
l32i a4, a2, PT_AREG4
l32i a7, a2, PT_AREG7
l32i a8, a2, PT_AREG8
l32i a11, a2, PT_AREG11
l32i a12, a2, PT_AREG12
l32i a15, a2, PT_AREG15
movi a2, 0
rfe
.Linvalid_mask:
/* We get here because of an unrecoverable error in the window
* registers, so set up a dummy frame and kill the user application.
* Note: We assume EXC_TABLE_KSTK contains a valid stack pointer.
*/
movi a0, 1
movi a1, 0
wsr a0, windowstart
wsr a1, windowbase
rsync
movi a0, 0
rsr a3, excsave1
l32i a1, a3, EXC_TABLE_KSTK
movi a4, (1 << PS_WOE_BIT) | LOCKLEVEL
wsr a4, ps
rsync
movi a6, SIGSEGV
movi a4, do_exit
callx4 a4
/* shouldn't return, so panic */
wsr a0, excsave1
movi a0, unrecoverable_exception
callx0 a0 # should not return
1: j 1b
ENDPROC(fast_syscall_spill_registers)
/* Fixup handler.
@ -1232,209 +1400,6 @@ ENTRY(fast_syscall_spill_registers_fixup_return)
ENDPROC(fast_syscall_spill_registers_fixup_return)
/*
* spill all registers.
*
* This is not a real function. The following conditions must be met:
*
* - must be called with call0.
* - uses a3, a4 and SAR.
* - the last 'valid' register of each frame are clobbered.
* - the caller must have registered a fixup handler
* (or be inside a critical section)
* - PS_EXCM must be set (PS_WOE cleared?)
*/
ENTRY(_spill_registers)
/*
* Rotate ws so that the current windowbase is at bit 0.
* Assume ws = xxxwww1yy (www1 current window frame).
* Rotate ws right so that a4 = yyxxxwww1.
*/
rsr a4, windowbase
rsr a3, windowstart # a3 = xxxwww1yy
ssr a4 # holds WB
slli a4, a3, WSBITS
or a3, a3, a4 # a3 = xxxwww1yyxxxwww1yy
srl a3, a3 # a3 = 00xxxwww1yyxxxwww1
/* We are done if there are no more than the current register frame. */
extui a3, a3, 1, WSBITS-1 # a3 = 0yyxxxwww
movi a4, (1 << (WSBITS-1))
_beqz a3, .Lnospill # only one active frame? jump
/* We want 1 at the top, so that we return to the current windowbase */
or a3, a3, a4 # 1yyxxxwww
/* Skip empty frames - get 'oldest' WINDOWSTART-bit. */
wsr a3, windowstart # save shifted windowstart
neg a4, a3
and a3, a4, a3 # first bit set from right: 000010000
ffs_ws a4, a3 # a4: shifts to skip empty frames
movi a3, WSBITS
sub a4, a3, a4 # WSBITS-a4:number of 0-bits from right
ssr a4 # save in SAR for later.
rsr a3, windowbase
add a3, a3, a4
wsr a3, windowbase
rsync
rsr a3, windowstart
srl a3, a3 # shift windowstart
/* WB is now just one frame below the oldest frame in the register
window. WS is shifted so the oldest frame is in bit 0, thus, WB
and WS differ by one 4-register frame. */
/* Save frames. Depending what call was used (call4, call8, call12),
* we have to save 4,8. or 12 registers.
*/
_bbsi.l a3, 1, .Lc4
_bbsi.l a3, 2, .Lc8
/* Special case: we have a call12-frame starting at a4. */
_bbci.l a3, 3, .Lc12 # bit 3 shouldn't be zero! (Jump to Lc12 first)
s32e a4, a1, -16 # a1 is valid with an empty spill area
l32e a4, a5, -12
s32e a8, a4, -48
mov a8, a4
l32e a4, a1, -16
j .Lc12c
.Lnospill:
ret
.Lloop: _bbsi.l a3, 1, .Lc4
_bbci.l a3, 2, .Lc12
.Lc8: s32e a4, a13, -16
l32e a4, a5, -12
s32e a8, a4, -32
s32e a5, a13, -12
s32e a6, a13, -8
s32e a7, a13, -4
s32e a9, a4, -28
s32e a10, a4, -24
s32e a11, a4, -20
srli a11, a3, 2 # shift windowbase by 2
rotw 2
_bnei a3, 1, .Lloop
.Lexit: /* Done. Do the final rotation, set WS, and return. */
rotw 1
rsr a3, windowbase
ssl a3
movi a3, 1
sll a3, a3
wsr a3, windowstart
ret
.Lc4: s32e a4, a9, -16
s32e a5, a9, -12
s32e a6, a9, -8
s32e a7, a9, -4
srli a7, a3, 1
rotw 1
_bnei a3, 1, .Lloop
j .Lexit
.Lc12: _bbci.l a3, 3, .Linvalid_mask # bit 2 shouldn't be zero!
/* 12-register frame (call12) */
l32e a2, a5, -12
s32e a8, a2, -48
mov a8, a2
.Lc12c: s32e a9, a8, -44
s32e a10, a8, -40
s32e a11, a8, -36
s32e a12, a8, -32
s32e a13, a8, -28
s32e a14, a8, -24
s32e a15, a8, -20
srli a15, a3, 3
/* The stack pointer for a4..a7 is out of reach, so we rotate the
* window, grab the stackpointer, and rotate back.
* Alternatively, we could also use the following approach, but that
* makes the fixup routine much more complicated:
* rotw 1
* s32e a0, a13, -16
* ...
* rotw 2
*/
rotw 1
mov a5, a13
rotw -1
s32e a4, a9, -16
s32e a5, a9, -12
s32e a6, a9, -8
s32e a7, a9, -4
rotw 3
_beqi a3, 1, .Lexit
j .Lloop
.Linvalid_mask:
/* We get here because of an unrecoverable error in the window
* registers. If we are in user space, we kill the application,
* however, this condition is unrecoverable in kernel space.
*/
rsr a0, ps
_bbci.l a0, PS_UM_BIT, 1f
/* User space: Setup a dummy frame and kill application.
* Note: We assume EXC_TABLE_KSTK contains a valid stack pointer.
*/
movi a0, 1
movi a1, 0
wsr a0, windowstart
wsr a1, windowbase
rsync
movi a0, 0
rsr a3, excsave1
l32i a1, a3, EXC_TABLE_KSTK
movi a4, (1 << PS_WOE_BIT) | LOCKLEVEL
wsr a4, ps
rsync
movi a6, SIGSEGV
movi a4, do_exit
callx4 a4
1: /* Kernel space: PANIC! */
wsr a0, excsave1
movi a0, unrecoverable_exception
callx0 a0 # should not return
1: j 1b
ENDPROC(_spill_registers)
#ifdef CONFIG_MMU
/*
* We should never get here. Bail out!