tcg-ppc: use new return-argument ld/st helpers

These use a 32-bit load-of-immediate to save a mflr+addi+mtlr sequence.
Tested with a Windows 98 guest (pretty much the most recent thing I
could run on my PPC machine) and kvm-unit-tests's sieve.flat.  The
speed up for sieve.flat is as high as 10% for qemu-system-i386, 25%
(no kidding) for qemu-system-x86_64 on my PowerBook G4.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
Paolo Bonzini 2013-09-05 10:22:09 +02:00 committed by Richard Henderson
parent 6a11557988
commit 619f90ba62
2 changed files with 21 additions and 24 deletions

View File

@ -324,9 +324,7 @@ extern uintptr_t tci_tb_ptr;
In some implementations, we pass the "logical" return address manually;
in others, we must infer the logical return from the true return. */
#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
# if defined (_ARCH_PPC) && !defined (_ARCH_PPC64)
# define GETRA_LDST(RA) (*(int32_t *)((RA) - 4))
# elif defined(__arm__)
# if defined(__arm__)
/* We define two insns between the return address and the branch back to
straight-line. Find and decode that branch insn. */
# define GETRA_LDST(RA) tcg_getra_ldst(RA)

View File

@ -550,22 +550,24 @@ static void add_qemu_ldst_label (TCGContext *s,
label->label_ptr[0] = label_ptr;
}
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
int mmu_idx) */
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
* int mmu_idx, uintptr_t ra)
*/
static const void * const qemu_ld_helpers[4] = {
helper_ldb_mmu,
helper_ldw_mmu,
helper_ldl_mmu,
helper_ldq_mmu,
helper_ret_ldub_mmu,
helper_ret_lduw_mmu,
helper_ret_ldul_mmu,
helper_ret_ldq_mmu,
};
/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
uintxx_t val, int mmu_idx) */
/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
* uintxx_t val, int mmu_idx, uintptr_t ra)
*/
static const void * const qemu_st_helpers[4] = {
helper_stb_mmu,
helper_stw_mmu,
helper_stl_mmu,
helper_stq_mmu,
helper_ret_stb_mmu,
helper_ret_stw_mmu,
helper_ret_stl_mmu,
helper_ret_stq_mmu,
};
static void *ld_trampolines[4];
@ -860,9 +862,9 @@ static void tcg_out_qemu_ld_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg);
tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#endif
tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
tcg_out_movi (s, TCG_TYPE_I32, ir++, mem_index);
tcg_out_movi (s, TCG_TYPE_I32, ir, (tcg_target_long) raddr);
tcg_out_b (s, LK, (tcg_target_long) ld_trampolines[s_bits]);
tcg_out32 (s, (tcg_target_long) raddr);
switch (opc) {
case 0|4:
tcg_out32 (s, EXTSB | RA (data_reg) | RS (3));
@ -954,10 +956,10 @@ static void tcg_out_qemu_st_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
}
ir++;
tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
tcg_out_b (s, LK, (tcg_target_long) st_trampolines[opc]);
tcg_out32 (s, (tcg_target_long) raddr);
tcg_out_b (s, 0, (tcg_target_long) raddr);
tcg_out_movi (s, TCG_TYPE_I32, ir++, mem_index);
tcg_out_movi (s, TCG_TYPE_I32, ir, (tcg_target_long) raddr);
tcg_out32 (s, MTSPR | RS (ir) | LR);
tcg_out_b (s, 0, (tcg_target_long) st_trampolines[opc]);
}
void tcg_out_tb_finalize(TCGContext *s)
@ -981,9 +983,6 @@ void tcg_out_tb_finalize(TCGContext *s)
#ifdef CONFIG_SOFTMMU
static void emit_ldst_trampoline (TCGContext *s, const void *ptr)
{
tcg_out32 (s, MFSPR | RT (3) | LR);
tcg_out32 (s, ADDI | RT (3) | RA (3) | 4);
tcg_out32 (s, MTSPR | RS (3) | LR);
tcg_out_mov (s, TCG_TYPE_I32, 3, TCG_AREG0);
tcg_out_call (s, (tcg_target_long) ptr, 1, 0);
}