target-arm: Implement WFE as a yield operation

Implement WFE to yield our timeslice to the next CPU.
This avoids slowdowns in multicore configurations caused
by one core busy-waiting on a spinlock which can't possibly
be unlocked until the other core has an opportunity to run.
This speeds up my test case A15 dual-core boot by a factor
of three (though it is still four or five times slower than
a single-core boot).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1393339545-22111-1-git-send-email-peter.maydell@linaro.org
Reviewed-by: Richard Henderson <rth@twiddle.net>
Tested-by: Rob Herring <rob.herring@linaro.org>
This commit is contained in:
Peter Maydell 2014-03-10 14:56:30 +00:00
parent 2b194951c5
commit 72c1d3af6e
5 changed files with 19 additions and 0 deletions

View File

@ -59,6 +59,7 @@ typedef uint64_t target_ulong;
#define EXCP_HLT 0x10001 /* hlt instruction reached */ #define EXCP_HLT 0x10001 /* hlt instruction reached */
#define EXCP_DEBUG 0x10002 /* cpu stopped after a breakpoint or singlestep */ #define EXCP_DEBUG 0x10002 /* cpu stopped after a breakpoint or singlestep */
#define EXCP_HALTED 0x10003 /* cpu is halted (waiting for external event) */ #define EXCP_HALTED 0x10003 /* cpu is halted (waiting for external event) */
#define EXCP_YIELD 0x10004 /* cpu wants to yield timeslice to another */
#define TB_JMP_CACHE_BITS 12 #define TB_JMP_CACHE_BITS 12
#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS) #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)

View File

@ -50,6 +50,7 @@ DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_NO_RWG_SE,
i32, i32, i32, i32) i32, i32, i32, i32)
DEF_HELPER_2(exception, void, env, i32) DEF_HELPER_2(exception, void, env, i32)
DEF_HELPER_1(wfi, void, env) DEF_HELPER_1(wfi, void, env)
DEF_HELPER_1(wfe, void, env)
DEF_HELPER_3(cpsr_write, void, env, i32, i32) DEF_HELPER_3(cpsr_write, void, env, i32, i32)
DEF_HELPER_1(cpsr_read, i32, env) DEF_HELPER_1(cpsr_read, i32, env)

View File

@ -225,6 +225,15 @@ void HELPER(wfi)(CPUARMState *env)
cpu_loop_exit(env); cpu_loop_exit(env);
} }
void HELPER(wfe)(CPUARMState *env)
{
/* Don't actually halt the CPU, just yield back to top
* level loop
*/
env->exception_index = EXCP_YIELD;
cpu_loop_exit(env);
}
void HELPER(exception)(CPUARMState *env, uint32_t excp) void HELPER(exception)(CPUARMState *env, uint32_t excp)
{ {
env->exception_index = excp; env->exception_index = excp;

View File

@ -3939,6 +3939,9 @@ static void gen_nop_hint(DisasContext *s, int val)
s->is_jmp = DISAS_WFI; s->is_jmp = DISAS_WFI;
break; break;
case 2: /* wfe */ case 2: /* wfe */
gen_set_pc_im(s, s->pc);
s->is_jmp = DISAS_WFE;
break;
case 4: /* sev */ case 4: /* sev */
case 5: /* sevl */ case 5: /* sevl */
/* TODO: Implement SEV, SEVL and WFE. May help SMP performance. */ /* TODO: Implement SEV, SEVL and WFE. May help SMP performance. */
@ -10857,6 +10860,9 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
case DISAS_WFI: case DISAS_WFI:
gen_helper_wfi(cpu_env); gen_helper_wfi(cpu_env);
break; break;
case DISAS_WFE:
gen_helper_wfe(cpu_env);
break;
case DISAS_SWI: case DISAS_SWI:
gen_exception(EXCP_SWI); gen_exception(EXCP_SWI);
break; break;

View File

@ -44,6 +44,8 @@ extern TCGv_ptr cpu_env;
* emitting unreachable code at the end of the TB in the A64 decoder * emitting unreachable code at the end of the TB in the A64 decoder
*/ */
#define DISAS_EXC 6 #define DISAS_EXC 6
/* WFE */
#define DISAS_WFE 7
#ifdef TARGET_AARCH64 #ifdef TARGET_AARCH64
void a64_translate_init(void); void a64_translate_init(void);