7604537bbb
Current implementation of cpu_{suspend}/cpu_{resume} relies on the MPIDR to index the array of pointers where the context is saved and restored. The current approach works as long as the MPIDR can be considered a linear index, so that the pointers array can simply be dereferenced by using the MPIDR[7:0] value. On ARM multi-cluster systems, where the MPIDR may not be a linear index, to properly dereference the stack pointer array, a mapping function should be applied to it so that it can be used for arrays look-ups. This patch adds code in the cpu_{suspend}/cpu_{resume} implementation that relies on shifting and ORing hashing method to map a MPIDR value to a set of buckets precomputed at boot to have a collision free mapping from MPIDR to context pointers. The hashing algorithm must be simple, fast, and implementable with few instructions since in the cpu_resume path the mapping is carried out with the MMU off and the I-cache off, hence code and data are fetched from DRAM with no-caching available. Simplicity is counterbalanced with a little increase of memory (allocated dynamically) for stack pointers buckets, that should be anyway fairly limited on most systems. Memory for context pointers is allocated in a early_initcall with size precomputed and stashed previously in kernel data structures. Memory for context pointers is allocated through kmalloc; this guarantees contiguous physical addresses for the allocated memory which is fundamental to the correct functioning of the resume mechanism that relies on the context pointer array to be a chunk of contiguous physical memory. Virtual to physical address conversion for the context pointer array base is carried out at boot to avoid fiddling with virt_to_phys conversions in the cpu_resume path which is quite fragile and should be optimized to execute as few instructions as possible. Virtual and physical context pointer base array addresses are stashed in a struct that is accessible from assembly using values generated through the asm-offsets.c mechanism. Cc: Will Deacon <will.deacon@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Russell King <linux@arm.linux.org.uk> Cc: Colin Cross <ccross@android.com> Cc: Santosh Shilimkar <santosh.shilimkar@ti.com> Cc: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: Amit Kucheria <amit.kucheria@linaro.org> Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> Reviewed-by: Dave Martin <Dave.Martin@arm.com> Reviewed-by: Nicolas Pitre <nico@linaro.org> Tested-by: Shawn Guo <shawn.guo@linaro.org> Tested-by: Kevin Hilman <khilman@linaro.org> Tested-by: Stephen Warren <swarren@wwwdotorg.org>
166 lines
4.8 KiB
ArmAsm
166 lines
4.8 KiB
ArmAsm
#include <linux/linkage.h>
|
|
#include <linux/threads.h>
|
|
#include <asm/asm-offsets.h>
|
|
#include <asm/assembler.h>
|
|
#include <asm/glue-cache.h>
|
|
#include <asm/glue-proc.h>
|
|
.text
|
|
|
|
/*
|
|
* Implementation of MPIDR hash algorithm through shifting
|
|
* and OR'ing.
|
|
*
|
|
* @dst: register containing hash result
|
|
* @rs0: register containing affinity level 0 bit shift
|
|
* @rs1: register containing affinity level 1 bit shift
|
|
* @rs2: register containing affinity level 2 bit shift
|
|
* @mpidr: register containing MPIDR value
|
|
* @mask: register containing MPIDR mask
|
|
*
|
|
* Pseudo C-code:
|
|
*
|
|
*u32 dst;
|
|
*
|
|
*compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 mpidr, u32 mask) {
|
|
* u32 aff0, aff1, aff2;
|
|
* u32 mpidr_masked = mpidr & mask;
|
|
* aff0 = mpidr_masked & 0xff;
|
|
* aff1 = mpidr_masked & 0xff00;
|
|
* aff2 = mpidr_masked & 0xff0000;
|
|
* dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2);
|
|
*}
|
|
* Input registers: rs0, rs1, rs2, mpidr, mask
|
|
* Output register: dst
|
|
* Note: input and output registers must be disjoint register sets
|
|
(eg: a macro instance with mpidr = r1 and dst = r1 is invalid)
|
|
*/
|
|
.macro compute_mpidr_hash dst, rs0, rs1, rs2, mpidr, mask
|
|
and \mpidr, \mpidr, \mask @ mask out MPIDR bits
|
|
and \dst, \mpidr, #0xff @ mask=aff0
|
|
ARM( mov \dst, \dst, lsr \rs0 ) @ dst=aff0>>rs0
|
|
THUMB( lsr \dst, \dst, \rs0 )
|
|
and \mask, \mpidr, #0xff00 @ mask = aff1
|
|
ARM( orr \dst, \dst, \mask, lsr \rs1 ) @ dst|=(aff1>>rs1)
|
|
THUMB( lsr \mask, \mask, \rs1 )
|
|
THUMB( orr \dst, \dst, \mask )
|
|
and \mask, \mpidr, #0xff0000 @ mask = aff2
|
|
ARM( orr \dst, \dst, \mask, lsr \rs2 ) @ dst|=(aff2>>rs2)
|
|
THUMB( lsr \mask, \mask, \rs2 )
|
|
THUMB( orr \dst, \dst, \mask )
|
|
.endm
|
|
|
|
/*
|
|
* Save CPU state for a suspend. This saves the CPU general purpose
|
|
* registers, and allocates space on the kernel stack to save the CPU
|
|
* specific registers and some other data for resume.
|
|
* r0 = suspend function arg0
|
|
* r1 = suspend function
|
|
*/
|
|
ENTRY(__cpu_suspend)
|
|
stmfd sp!, {r4 - r11, lr}
|
|
#ifdef MULTI_CPU
|
|
ldr r10, =processor
|
|
ldr r4, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state
|
|
#else
|
|
ldr r4, =cpu_suspend_size
|
|
#endif
|
|
mov r5, sp @ current virtual SP
|
|
add r4, r4, #12 @ Space for pgd, virt sp, phys resume fn
|
|
sub sp, sp, r4 @ allocate CPU state on stack
|
|
stmfd sp!, {r0, r1} @ save suspend func arg and pointer
|
|
add r0, sp, #8 @ save pointer to save block
|
|
mov r1, r4 @ size of save block
|
|
mov r2, r5 @ virtual SP
|
|
ldr r3, =sleep_save_sp
|
|
ldr r3, [r3, #SLEEP_SAVE_SP_VIRT]
|
|
ALT_SMP(mrc p15, 0, r9, c0, c0, 5)
|
|
ALT_UP_B(1f)
|
|
ldr r8, =mpidr_hash
|
|
/*
|
|
* This ldmia relies on the memory layout of the mpidr_hash
|
|
* struct mpidr_hash.
|
|
*/
|
|
ldmia r8, {r4-r7} @ r4 = mpidr mask (r5,r6,r7) = l[0,1,2] shifts
|
|
compute_mpidr_hash lr, r5, r6, r7, r9, r4
|
|
add r3, r3, lr, lsl #2
|
|
1:
|
|
bl __cpu_suspend_save
|
|
adr lr, BSYM(cpu_suspend_abort)
|
|
ldmfd sp!, {r0, pc} @ call suspend fn
|
|
ENDPROC(__cpu_suspend)
|
|
.ltorg
|
|
|
|
cpu_suspend_abort:
|
|
ldmia sp!, {r1 - r3} @ pop phys pgd, virt SP, phys resume fn
|
|
teq r0, #0
|
|
moveq r0, #1 @ force non-zero value
|
|
mov sp, r2
|
|
ldmfd sp!, {r4 - r11, pc}
|
|
ENDPROC(cpu_suspend_abort)
|
|
|
|
/*
|
|
* r0 = control register value
|
|
*/
|
|
.align 5
|
|
.pushsection .idmap.text,"ax"
|
|
ENTRY(cpu_resume_mmu)
|
|
ldr r3, =cpu_resume_after_mmu
|
|
instr_sync
|
|
mcr p15, 0, r0, c1, c0, 0 @ turn on MMU, I-cache, etc
|
|
mrc p15, 0, r0, c0, c0, 0 @ read id reg
|
|
instr_sync
|
|
mov r0, r0
|
|
mov r0, r0
|
|
mov pc, r3 @ jump to virtual address
|
|
ENDPROC(cpu_resume_mmu)
|
|
.popsection
|
|
cpu_resume_after_mmu:
|
|
bl cpu_init @ restore the und/abt/irq banked regs
|
|
mov r0, #0 @ return zero on success
|
|
ldmfd sp!, {r4 - r11, pc}
|
|
ENDPROC(cpu_resume_after_mmu)
|
|
|
|
/*
|
|
* Note: Yes, part of the following code is located into the .data section.
|
|
* This is to allow sleep_save_sp to be accessed with a relative load
|
|
* while we can't rely on any MMU translation. We could have put
|
|
* sleep_save_sp in the .text section as well, but some setups might
|
|
* insist on it to be truly read-only.
|
|
*/
|
|
.data
|
|
.align
|
|
ENTRY(cpu_resume)
|
|
mov r1, #0
|
|
ALT_SMP(mrc p15, 0, r0, c0, c0, 5)
|
|
ALT_UP_B(1f)
|
|
adr r2, mpidr_hash_ptr
|
|
ldr r3, [r2]
|
|
add r2, r2, r3 @ r2 = struct mpidr_hash phys address
|
|
/*
|
|
* This ldmia relies on the memory layout of the mpidr_hash
|
|
* struct mpidr_hash.
|
|
*/
|
|
ldmia r2, { r3-r6 } @ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts
|
|
compute_mpidr_hash r1, r4, r5, r6, r0, r3
|
|
1:
|
|
adr r0, _sleep_save_sp
|
|
ldr r0, [r0, #SLEEP_SAVE_SP_PHYS]
|
|
ldr r0, [r0, r1, lsl #2]
|
|
|
|
setmode PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1 @ set SVC, irqs off
|
|
@ load phys pgd, stack, resume fn
|
|
ARM( ldmia r0!, {r1, sp, pc} )
|
|
THUMB( ldmia r0!, {r1, r2, r3} )
|
|
THUMB( mov sp, r2 )
|
|
THUMB( bx r3 )
|
|
ENDPROC(cpu_resume)
|
|
|
|
.align 2
|
|
mpidr_hash_ptr:
|
|
.long mpidr_hash - . @ mpidr_hash struct offset
|
|
|
|
.type sleep_save_sp, #object
|
|
ENTRY(sleep_save_sp)
|
|
_sleep_save_sp:
|
|
.space SLEEP_SAVE_SP_SZ @ struct sleep_save_sp
|