common-user: Re-enable ppc32 host

tcg: Avoid recursion in tcg_gen_mulu2_i32
 tcg: Mark tcg helpers noinline to avoid an issue with LTO
 tcg/arm: Use register pair allocation for qemu_{ld,st}_i64
 disas: Enable loongarch disassembler, and fixes
 tcg/loongarch64: Improve move immediate
 tcg/loongarch64: Improve add immediate
 tcg/loongarch64: Improve setcond
 tcg/loongarch64: Implement movcond
 tcg/loongarch64: Use tcg_pcrel_diff in tcg_out_ldst
 tcg/loongarch64: Reorg goto_tb implementation
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmPPO+0dHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV93jwgAhG+H5XHtJqF2isCc
 a6pYuUWRbhsOFL23FmWKx2O41tHlJ2Seort8M9eIHOu21L9DUJFd291O/4ckiMQM
 13+KH/Kl5fumM+uEkO9YMyplOddmvygdTd5dCi5y349Gi3CgJH3n4HUl0qnioM/7
 Dy3n8JIvYsBp+8jUsLXo1gSl5P1kLMLwJmP68qgy8z8Xly4bDco1Nb2UKb7qKevO
 lMr6L+2/ALbKLZ6OU50erdUrlbgNs0eiQyJAfJ47SQ57RGuqF4pZ09+9yRI2FPZt
 UlSn+srsec1ieYyM2e5krVWbNcXaj6FouV7CkbgFXoUZt29xA1HTXsso+8vLgDPu
 g8vvuw==
 =Up0b
 -----END PGP SIGNATURE-----

Merge tag 'pull-tcg-20230123' of https://gitlab.com/rth7680/qemu into staging

common-user: Re-enable ppc32 host
tcg: Avoid recursion in tcg_gen_mulu2_i32
tcg: Mark tcg helpers noinline to avoid an issue with LTO
tcg/arm: Use register pair allocation for qemu_{ld,st}_i64
disas: Enable loongarch disassembler, and fixes
tcg/loongarch64: Improve move immediate
tcg/loongarch64: Improve add immediate
tcg/loongarch64: Improve setcond
tcg/loongarch64: Implement movcond
tcg/loongarch64: Use tcg_pcrel_diff in tcg_out_ldst
tcg/loongarch64: Reorg goto_tb implementation

# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmPPO+0dHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV93jwgAhG+H5XHtJqF2isCc
# a6pYuUWRbhsOFL23FmWKx2O41tHlJ2Seort8M9eIHOu21L9DUJFd291O/4ckiMQM
# 13+KH/Kl5fumM+uEkO9YMyplOddmvygdTd5dCi5y349Gi3CgJH3n4HUl0qnioM/7
# Dy3n8JIvYsBp+8jUsLXo1gSl5P1kLMLwJmP68qgy8z8Xly4bDco1Nb2UKb7qKevO
# lMr6L+2/ALbKLZ6OU50erdUrlbgNs0eiQyJAfJ47SQ57RGuqF4pZ09+9yRI2FPZt
# UlSn+srsec1ieYyM2e5krVWbNcXaj6FouV7CkbgFXoUZt29xA1HTXsso+8vLgDPu
# g8vvuw==
# =Up0b
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 24 Jan 2023 02:01:17 GMT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* tag 'pull-tcg-20230123' of https://gitlab.com/rth7680/qemu:
  tcg/loongarch64: Reorg goto_tb implementation
  tcg/loongarch64: Use tcg_pcrel_diff in tcg_out_ldst
  tcg/loongarch64: Implement movcond
  tcg/loongarch64: Improve setcond expansion
  tcg/loongarch64: Introduce tcg_out_addi
  tcg/loongarch64: Update tcg-insn-defs.c.inc
  tcg/loongarch64: Optimize immediate loading
  target/loongarch: Disassemble pcadd* addresses
  target/loongarch: Disassemble jirl properly
  target/loongarch: Enable the disassembler for host tcg
  tcg: Mark tcg helpers noinline to avoid an issue with LTO
  linux-user: Implment host/ppc/host-signal.h
  common-user/host/ppc: Implement safe-syscall.inc.S
  tcg/arm: Use register pair allocation for qemu_{ld,st}_i64
  tcg: Avoid recursion in tcg_gen_mulu2_i32

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2023-02-03 09:30:45 +00:00
commit bf4460a8d9
18 changed files with 497 additions and 170 deletions

View File

@ -0,0 +1,107 @@
/*
* safe-syscall.inc.S : host-specific assembly fragment
* to handle signals occurring at the same time as system calls.
* This is intended to be included by common-user/safe-syscall.S
*
* Copyright (C) 2022 Linaro, Ltd.
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
/*
* Standardize on the _CALL_FOO symbols used by GCC:
* Apple XCode does not define _CALL_DARWIN.
* Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV (32-bit).
*/
#if !defined(_CALL_SYSV) && \
!defined(_CALL_DARWIN) && \
!defined(_CALL_AIX) && \
!defined(_CALL_ELF)
# if defined(__APPLE__)
# define _CALL_DARWIN
# elif defined(__ELF__) && TCG_TARGET_REG_BITS == 32
# define _CALL_SYSV
# else
# error "Unknown ABI"
# endif
#endif
#ifndef _CALL_SYSV
# error "Unsupported ABI"
#endif
.global safe_syscall_base
.global safe_syscall_start
.global safe_syscall_end
.type safe_syscall_base, @function
.text
/*
* This is the entry point for making a system call. The calling
* convention here is that of a C varargs function with the
* first argument an 'int *' to the signal_pending flag, the
* second one the system call number (as a 'long'), and all further
* arguments being syscall arguments (also 'long').
*/
safe_syscall_base:
.cfi_startproc
stwu 1, -8(1)
.cfi_def_cfa_offset 8
stw 30, 4(1)
.cfi_offset 30, -4
/*
* We enter with r3 == &signal_pending
* r4 == syscall number
* r5 ... r10 == syscall arguments
* and return the result in r3
* and the syscall instruction needs
* r0 == syscall number
* r3 ... r8 == syscall arguments
* and returns the result in r3
* Shuffle everything around appropriately.
*/
mr 30, 3 /* signal_pending */
mr 0, 4 /* syscall number */
mr 3, 5 /* syscall arguments */
mr 4, 6
mr 5, 7
mr 6, 8
mr 7, 9
mr 8, 10
/*
* This next sequence of code works in conjunction with the
* rewind_if_safe_syscall_function(). If a signal is taken
* and the interrupted PC is anywhere between 'safe_syscall_start'
* and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
* The code sequence must therefore be able to cope with this, and
* the syscall instruction must be the final one in the sequence.
*/
safe_syscall_start:
/* if signal_pending is non-zero, don't do the call */
lwz 12, 0(30)
cmpwi 0, 12, 0
bne- 2f
sc
safe_syscall_end:
/* code path when we did execute the syscall */
lwz 30, 4(1) /* restore r30 */
addi 1, 1, 8 /* restore stack */
.cfi_restore 30
.cfi_def_cfa_offset 0
bnslr+ /* return on success */
b safe_syscall_set_errno_tail
/* code path when we didn't execute the syscall */
2: lwz 30, 4(1)
addi 1, 1, 8
addi 3, 0, QEMU_ERESTARTSYS
b safe_syscall_set_errno_tail
.cfi_endproc
.size safe_syscall_base, .-safe_syscall_base

View File

@ -198,6 +198,8 @@ static void initialize_debug_host(CPUDebug *s)
s->info.cap_insn_split = 6; s->info.cap_insn_split = 6;
#elif defined(__hppa__) #elif defined(__hppa__)
s->info.print_insn = print_insn_hppa; s->info.print_insn = print_insn_hppa;
#elif defined(__loongarch__)
s->info.print_insn = print_insn_loongarch;
#endif #endif
} }

View File

@ -6,34 +6,49 @@
#include "exec/helper-head.h" #include "exec/helper-head.h"
/*
* Work around an issue with --enable-lto, in which GCC's ipa-split pass
* decides to split out the noreturn code paths that raise an exception,
* taking the __builtin_return_address() along into the new function,
* where it no longer computes a value that returns to TCG generated code.
* Despite the name, the noinline attribute affects splitter, so this
* prevents the optimization in question. Given that helpers should not
* otherwise be called directly, this should have any other visible effect.
*
* See https://gitlab.com/qemu-project/qemu/-/issues/1454
*/
#define DEF_HELPER_ATTR __attribute__((noinline))
#define DEF_HELPER_FLAGS_0(name, flags, ret) \ #define DEF_HELPER_FLAGS_0(name, flags, ret) \
dh_ctype(ret) HELPER(name) (void); dh_ctype(ret) HELPER(name) (void) DEF_HELPER_ATTR;
#define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \ #define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \
dh_ctype(ret) HELPER(name) (dh_ctype(t1)); dh_ctype(ret) HELPER(name) (dh_ctype(t1)) DEF_HELPER_ATTR;
#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \ #define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \
dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2)); dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2)) DEF_HELPER_ATTR;
#define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) \ #define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) \
dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3)); dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), \
dh_ctype(t3)) DEF_HELPER_ATTR;
#define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \ #define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \
dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
dh_ctype(t4)); dh_ctype(t4)) DEF_HELPER_ATTR;
#define DEF_HELPER_FLAGS_5(name, flags, ret, t1, t2, t3, t4, t5) \ #define DEF_HELPER_FLAGS_5(name, flags, ret, t1, t2, t3, t4, t5) \
dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
dh_ctype(t4), dh_ctype(t5)); dh_ctype(t4), dh_ctype(t5)) DEF_HELPER_ATTR;
#define DEF_HELPER_FLAGS_6(name, flags, ret, t1, t2, t3, t4, t5, t6) \ #define DEF_HELPER_FLAGS_6(name, flags, ret, t1, t2, t3, t4, t5, t6) \
dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
dh_ctype(t4), dh_ctype(t5), dh_ctype(t6)); dh_ctype(t4), dh_ctype(t5), \
dh_ctype(t6)) DEF_HELPER_ATTR;
#define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7) \ #define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7) \
dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
dh_ctype(t4), dh_ctype(t5), dh_ctype(t6), \ dh_ctype(t4), dh_ctype(t5), dh_ctype(t6), \
dh_ctype(t7)); dh_ctype(t7)) DEF_HELPER_ATTR;
#define IN_HELPER_PROTO #define IN_HELPER_PROTO
@ -51,5 +66,6 @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
#undef DEF_HELPER_FLAGS_5 #undef DEF_HELPER_FLAGS_5
#undef DEF_HELPER_FLAGS_6 #undef DEF_HELPER_FLAGS_6
#undef DEF_HELPER_FLAGS_7 #undef DEF_HELPER_FLAGS_7
#undef DEF_HELPER_ATTR
#endif /* HELPER_PROTO_H */ #endif /* HELPER_PROTO_H */

View File

@ -155,13 +155,6 @@ typedef uint64_t TCGRegSet;
#define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rem_i64 0
#endif #endif
/* For 32-bit targets, some sort of unsigned widening multiply is required. */
#if TCG_TARGET_REG_BITS == 32 \
&& !(defined(TCG_TARGET_HAS_mulu2_i32) \
|| defined(TCG_TARGET_HAS_muluh_i32))
# error "Missing unsigned widening multiply"
#endif
#if !defined(TCG_TARGET_HAS_v64) \ #if !defined(TCG_TARGET_HAS_v64) \
&& !defined(TCG_TARGET_HAS_v128) \ && !defined(TCG_TARGET_HAS_v128) \
&& !defined(TCG_TARGET_HAS_v256) && !defined(TCG_TARGET_HAS_v256)

View File

@ -0,0 +1,39 @@
/*
* host-signal.h: signal info dependent on the host architecture
*
* Copyright (c) 2022 Linaro Ltd.
*
* This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
* See the COPYING file in the top-level directory.
*/
#ifndef PPC_HOST_SIGNAL_H
#define PPC_HOST_SIGNAL_H
#include <asm/ptrace.h>
/* The third argument to a SA_SIGINFO handler is ucontext_t. */
typedef ucontext_t host_sigcontext;
static inline uintptr_t host_signal_pc(host_sigcontext *uc)
{
return uc->uc_mcontext.regs->nip;
}
static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc)
{
uc->uc_mcontext.regs->nip = pc;
}
static inline void *host_signal_mask(host_sigcontext *uc)
{
return &uc->uc_sigmask;
}
static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc)
{
return uc->uc_mcontext.regs->trap != 0x400
&& (uc->uc_mcontext.regs->dsisr & 0x02000000);
}
#endif

View File

@ -519,10 +519,6 @@ INSN(fsel, fffc)
INSN(addu16i_d, rr_i) INSN(addu16i_d, rr_i)
INSN(lu12i_w, r_i) INSN(lu12i_w, r_i)
INSN(lu32i_d, r_i) INSN(lu32i_d, r_i)
INSN(pcaddi, r_i)
INSN(pcalau12i, r_i)
INSN(pcaddu12i, r_i)
INSN(pcaddu18i, r_i)
INSN(ll_w, rr_i) INSN(ll_w, rr_i)
INSN(sc_w, rr_i) INSN(sc_w, rr_i)
INSN(ll_d, rr_i) INSN(ll_d, rr_i)
@ -628,7 +624,7 @@ INSN(beqz, r_offs)
INSN(bnez, r_offs) INSN(bnez, r_offs)
INSN(bceqz, c_offs) INSN(bceqz, c_offs)
INSN(bcnez, c_offs) INSN(bcnez, c_offs)
INSN(jirl, rr_offs) INSN(jirl, rr_i)
INSN(b, offs) INSN(b, offs)
INSN(bl, offs) INSN(bl, offs)
INSN(beq, rr_offs) INSN(beq, rr_offs)
@ -755,3 +751,36 @@ static bool trans_fcmp_cond_##suffix(DisasContext *ctx, \
FCMP_INSN(s) FCMP_INSN(s)
FCMP_INSN(d) FCMP_INSN(d)
#define PCADD_INSN(name) \
static bool trans_##name(DisasContext *ctx, arg_##name *a) \
{ \
output(ctx, #name, "r%d, %d # 0x%" PRIx64, \
a->rd, a->imm, gen_##name(ctx->pc, a->imm)); \
return true; \
}
static uint64_t gen_pcaddi(uint64_t pc, int imm)
{
return pc + (imm << 2);
}
static uint64_t gen_pcalau12i(uint64_t pc, int imm)
{
return (pc + (imm << 12)) & ~0xfff;
}
static uint64_t gen_pcaddu12i(uint64_t pc, int imm)
{
return pc + (imm << 12);
}
static uint64_t gen_pcaddu18i(uint64_t pc, int imm)
{
return pc + ((uint64_t)(imm) << 18);
}
PCADD_INSN(pcaddi)
PCADD_INSN(pcalau12i)
PCADD_INSN(pcaddu12i)
PCADD_INSN(pcaddu18i)

View File

@ -23,7 +23,7 @@ static bool trans_jirl(DisasContext *ctx, arg_jirl *a)
TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
tcg_gen_addi_tl(cpu_pc, src1, a->offs); tcg_gen_addi_tl(cpu_pc, src1, a->imm);
tcg_gen_movi_tl(dest, ctx->base.pc_next + 4); tcg_gen_movi_tl(dest, ctx->base.pc_next + 4);
gen_set_gpr(a->rd, dest, EXT_NONE); gen_set_gpr(a->rd, dest, EXT_NONE);
tcg_gen_lookup_and_goto_ptr(); tcg_gen_lookup_and_goto_ptr();

View File

@ -67,6 +67,7 @@
@rr_ui12 .... ...... imm:12 rj:5 rd:5 &rr_i @rr_ui12 .... ...... imm:12 rj:5 rd:5 &rr_i
@rr_i14s2 .... .... .............. rj:5 rd:5 &rr_i imm=%i14s2 @rr_i14s2 .... .... .............. rj:5 rd:5 &rr_i imm=%i14s2
@rr_i16 .... .. imm:s16 rj:5 rd:5 &rr_i @rr_i16 .... .. imm:s16 rj:5 rd:5 &rr_i
@rr_i16s2 .... .. ................ rj:5 rd:5 &rr_i imm=%offs16
@hint_r_i12 .... ...... imm:s12 rj:5 hint:5 &hint_r_i @hint_r_i12 .... ...... imm:s12 rj:5 hint:5 &hint_r_i
@rrr_sa2p1 .... ........ ... .. rk:5 rj:5 rd:5 &rrr_sa sa=%sa2p1 @rrr_sa2p1 .... ........ ... .. rk:5 rj:5 rd:5 &rrr_sa sa=%sa2p1
@rrr_sa2 .... ........ ... sa:2 rk:5 rj:5 rd:5 &rrr_sa @rrr_sa2 .... ........ ... sa:2 rk:5 rj:5 rd:5 &rrr_sa
@ -444,7 +445,7 @@ beqz 0100 00 ................ ..... ..... @r_offs21
bnez 0100 01 ................ ..... ..... @r_offs21 bnez 0100 01 ................ ..... ..... @r_offs21
bceqz 0100 10 ................ 00 ... ..... @c_offs21 bceqz 0100 10 ................ 00 ... ..... @c_offs21
bcnez 0100 10 ................ 01 ... ..... @c_offs21 bcnez 0100 10 ................ 01 ... ..... @c_offs21
jirl 0100 11 ................ ..... ..... @rr_offs16 jirl 0100 11 ................ ..... ..... @rr_i16s2
b 0101 00 .......................... @offs26 b 0101 00 .......................... @offs26
bl 0101 01 .......................... @offs26 bl 0101 01 .......................... @offs26
beq 0101 10 ................ ..... ..... @rr_offs16 beq 0101 10 ................ ..... ..... @rr_offs16

View File

@ -3,7 +3,6 @@ gen = decodetree.process('insns.decode')
loongarch_ss = ss.source_set() loongarch_ss = ss.source_set()
loongarch_ss.add(files( loongarch_ss.add(files(
'cpu.c', 'cpu.c',
'disas.c',
)) ))
loongarch_tcg_ss = ss.source_set() loongarch_tcg_ss = ss.source_set()
loongarch_tcg_ss.add(gen) loongarch_tcg_ss.add(gen)
@ -24,6 +23,8 @@ loongarch_softmmu_ss.add(files(
'iocsr_helper.c', 'iocsr_helper.c',
)) ))
common_ss.add(when: 'CONFIG_LOONGARCH_DIS', if_true: [files('disas.c'), gen])
loongarch_ss.add_all(when: 'CONFIG_TCG', if_true: [loongarch_tcg_ss]) loongarch_ss.add_all(when: 'CONFIG_TCG', if_true: [loongarch_tcg_ss])
target_arch += {'loongarch': loongarch_ss} target_arch += {'loongarch': loongarch_ss}

View File

@ -15,8 +15,9 @@ C_O0_I2(r, rIN)
C_O0_I2(s, s) C_O0_I2(s, s)
C_O0_I2(w, r) C_O0_I2(w, r)
C_O0_I3(s, s, s) C_O0_I3(s, s, s)
C_O0_I3(S, p, s)
C_O0_I4(r, r, rI, rI) C_O0_I4(r, r, rI, rI)
C_O0_I4(s, s, s, s) C_O0_I4(S, p, s, s)
C_O1_I1(r, l) C_O1_I1(r, l)
C_O1_I1(r, r) C_O1_I1(r, r)
C_O1_I1(w, r) C_O1_I1(w, r)
@ -38,8 +39,8 @@ C_O1_I2(w, w, wZ)
C_O1_I3(w, w, w, w) C_O1_I3(w, w, w, w)
C_O1_I4(r, r, r, rI, rI) C_O1_I4(r, r, r, rI, rI)
C_O1_I4(r, r, rIN, rIK, 0) C_O1_I4(r, r, rIN, rIK, 0)
C_O2_I1(r, r, l) C_O2_I1(e, p, l)
C_O2_I2(r, r, l, l) C_O2_I2(e, p, l, l)
C_O2_I2(r, r, r, r) C_O2_I2(r, r, r, r)
C_O2_I4(r, r, r, r, rIN, rIK) C_O2_I4(r, r, r, r, rIN, rIK)
C_O2_I4(r, r, rI, rI, rIN, rIK) C_O2_I4(r, r, rI, rI, rIN, rIK)

View File

@ -8,9 +8,11 @@
* Define constraint letters for register sets: * Define constraint letters for register sets:
* REGS(letter, register_mask) * REGS(letter, register_mask)
*/ */
REGS('e', ALL_GENERAL_REGS & 0x5555) /* even regs */
REGS('r', ALL_GENERAL_REGS) REGS('r', ALL_GENERAL_REGS)
REGS('l', ALL_QLOAD_REGS) REGS('l', ALL_QLOAD_REGS)
REGS('s', ALL_QSTORE_REGS) REGS('s', ALL_QSTORE_REGS)
REGS('S', ALL_QSTORE_REGS & 0x5555) /* even qstore */
REGS('w', ALL_VECTOR_REGS) REGS('w', ALL_VECTOR_REGS)
/* /*

View File

@ -1694,9 +1694,11 @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend); tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
break; break;
case MO_UQ: case MO_UQ:
/* We used pair allocation for datalo, so already should be aligned. */
tcg_debug_assert((datalo & 1) == 0);
tcg_debug_assert(datahi == datalo + 1);
/* LDRD requires alignment; double-check that. */ /* LDRD requires alignment; double-check that. */
if (get_alignment_bits(opc) >= MO_64 if (get_alignment_bits(opc) >= MO_64) {
&& (datalo & 1) == 0 && datahi == datalo + 1) {
/* /*
* Rm (the second address op) must not overlap Rt or Rt + 1. * Rm (the second address op) must not overlap Rt or Rt + 1.
* Since datalo is aligned, we can simplify the test via alignment. * Since datalo is aligned, we can simplify the test via alignment.
@ -1750,9 +1752,11 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
break; break;
case MO_UQ: case MO_UQ:
/* We used pair allocation for datalo, so already should be aligned. */
tcg_debug_assert((datalo & 1) == 0);
tcg_debug_assert(datahi == datalo + 1);
/* LDRD requires alignment; double-check that. */ /* LDRD requires alignment; double-check that. */
if (get_alignment_bits(opc) >= MO_64 if (get_alignment_bits(opc) >= MO_64) {
&& (datalo & 1) == 0 && datahi == datalo + 1) {
tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0); tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0);
} else if (datalo == addrlo) { } else if (datalo == addrlo) {
tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4); tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
@ -1834,9 +1838,11 @@ static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc,
tcg_out_st32_r(s, cond, datalo, addrlo, addend); tcg_out_st32_r(s, cond, datalo, addrlo, addend);
break; break;
case MO_64: case MO_64:
/* We used pair allocation for datalo, so already should be aligned. */
tcg_debug_assert((datalo & 1) == 0);
tcg_debug_assert(datahi == datalo + 1);
/* STRD requires alignment; double-check that. */ /* STRD requires alignment; double-check that. */
if (get_alignment_bits(opc) >= MO_64 if (get_alignment_bits(opc) >= MO_64) {
&& (datalo & 1) == 0 && datahi == datalo + 1) {
tcg_out_strd_r(s, cond, datalo, addrlo, addend); tcg_out_strd_r(s, cond, datalo, addrlo, addend);
} else if (scratch_addend) { } else if (scratch_addend) {
tcg_out_st32_rwb(s, cond, datalo, addend, addrlo); tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
@ -1871,9 +1877,11 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
break; break;
case MO_64: case MO_64:
/* We used pair allocation for datalo, so already should be aligned. */
tcg_debug_assert((datalo & 1) == 0);
tcg_debug_assert(datahi == datalo + 1);
/* STRD requires alignment; double-check that. */ /* STRD requires alignment; double-check that. */
if (get_alignment_bits(opc) >= MO_64 if (get_alignment_bits(opc) >= MO_64) {
&& (datalo & 1) == 0 && datahi == datalo + 1) {
tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0); tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
} else { } else {
tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
@ -2372,11 +2380,11 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i32:
return TARGET_LONG_BITS == 32 ? C_O1_I1(r, l) : C_O1_I2(r, l, l); return TARGET_LONG_BITS == 32 ? C_O1_I1(r, l) : C_O1_I2(r, l, l);
case INDEX_op_qemu_ld_i64: case INDEX_op_qemu_ld_i64:
return TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, l) : C_O2_I2(r, r, l, l); return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, l) : C_O2_I2(e, p, l, l);
case INDEX_op_qemu_st_i32: case INDEX_op_qemu_st_i32:
return TARGET_LONG_BITS == 32 ? C_O0_I2(s, s) : C_O0_I3(s, s, s); return TARGET_LONG_BITS == 32 ? C_O0_I2(s, s) : C_O0_I3(s, s, s);
case INDEX_op_qemu_st_i64: case INDEX_op_qemu_st_i64:
return TARGET_LONG_BITS == 32 ? C_O0_I3(s, s, s) : C_O0_I4(s, s, s, s); return TARGET_LONG_BITS == 32 ? C_O0_I3(S, p, s) : C_O0_I4(S, p, s, s);
case INDEX_op_st_vec: case INDEX_op_st_vec:
return C_O0_I2(w, r); return C_O0_I2(w, r);

View File

@ -4,7 +4,7 @@
* *
* This file is auto-generated by genqemutcgdefs from * This file is auto-generated by genqemutcgdefs from
* https://github.com/loongson-community/loongarch-opcodes, * https://github.com/loongson-community/loongarch-opcodes,
* from commit 961f0c60f5b63e574d785995600c71ad5413fdc4. * from commit 25ca7effe9d88101c1cf96c4005423643386d81f.
* DO NOT EDIT. * DO NOT EDIT.
*/ */
@ -74,6 +74,7 @@ typedef enum {
OPC_ANDI = 0x03400000, OPC_ANDI = 0x03400000,
OPC_ORI = 0x03800000, OPC_ORI = 0x03800000,
OPC_XORI = 0x03c00000, OPC_XORI = 0x03c00000,
OPC_ADDU16I_D = 0x10000000,
OPC_LU12I_W = 0x14000000, OPC_LU12I_W = 0x14000000,
OPC_CU32I_D = 0x16000000, OPC_CU32I_D = 0x16000000,
OPC_PCADDU2I = 0x18000000, OPC_PCADDU2I = 0x18000000,
@ -710,6 +711,13 @@ tcg_out_opc_xori(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk12)
tcg_out32(s, encode_djuk12_insn(OPC_XORI, d, j, uk12)); tcg_out32(s, encode_djuk12_insn(OPC_XORI, d, j, uk12));
} }
/* Emits the `addu16i.d d, j, sk16` instruction. */
static void __attribute__((unused))
tcg_out_opc_addu16i_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
{
tcg_out32(s, encode_djsk16_insn(OPC_ADDU16I_D, d, j, sk16));
}
/* Emits the `lu12i.w d, sj20` instruction. */ /* Emits the `lu12i.w d, sj20` instruction. */
static void __attribute__((unused)) static void __attribute__((unused))
tcg_out_opc_lu12i_w(TCGContext *s, TCGReg d, int32_t sj20) tcg_out_opc_lu12i_w(TCGContext *s, TCGReg d, int32_t sj20)

View File

@ -23,9 +23,12 @@ C_O1_I1(r, L)
C_O1_I2(r, r, rC) C_O1_I2(r, r, rC)
C_O1_I2(r, r, ri) C_O1_I2(r, r, ri)
C_O1_I2(r, r, rI) C_O1_I2(r, r, rI)
C_O1_I2(r, r, rJ)
C_O1_I2(r, r, rU) C_O1_I2(r, r, rU)
C_O1_I2(r, r, rW) C_O1_I2(r, r, rW)
C_O1_I2(r, r, rZ) C_O1_I2(r, r, rZ)
C_O1_I2(r, 0, rZ) C_O1_I2(r, 0, rZ)
C_O1_I2(r, rZ, rN) C_O1_I2(r, rZ, ri)
C_O1_I2(r, rZ, rJ)
C_O1_I2(r, rZ, rZ) C_O1_I2(r, rZ, rZ)
C_O1_I4(r, rZ, rJ, rZ, rZ)

View File

@ -21,7 +21,7 @@ REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
* CONST(letter, TCG_CT_CONST_* bit set) * CONST(letter, TCG_CT_CONST_* bit set)
*/ */
CONST('I', TCG_CT_CONST_S12) CONST('I', TCG_CT_CONST_S12)
CONST('N', TCG_CT_CONST_N12) CONST('J', TCG_CT_CONST_S32)
CONST('U', TCG_CT_CONST_U12) CONST('U', TCG_CT_CONST_U12)
CONST('Z', TCG_CT_CONST_ZERO) CONST('Z', TCG_CT_CONST_ZERO)
CONST('C', TCG_CT_CONST_C12) CONST('C', TCG_CT_CONST_C12)

View File

@ -126,7 +126,7 @@ static const int tcg_target_call_oarg_regs[] = {
#define TCG_CT_CONST_ZERO 0x100 #define TCG_CT_CONST_ZERO 0x100
#define TCG_CT_CONST_S12 0x200 #define TCG_CT_CONST_S12 0x200
#define TCG_CT_CONST_N12 0x400 #define TCG_CT_CONST_S32 0x400
#define TCG_CT_CONST_U12 0x800 #define TCG_CT_CONST_U12 0x800
#define TCG_CT_CONST_C12 0x1000 #define TCG_CT_CONST_C12 0x1000
#define TCG_CT_CONST_WSZ 0x2000 #define TCG_CT_CONST_WSZ 0x2000
@ -161,7 +161,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) { if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) {
return true; return true;
} }
if ((ct & TCG_CT_CONST_N12) && -val == sextreg(-val, 0, 12)) { if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
return true; return true;
} }
if ((ct & TCG_CT_CONST_U12) && val >= 0 && val <= 0xfff) { if ((ct & TCG_CT_CONST_U12) && val >= 0 && val <= 0xfff) {
@ -274,16 +274,6 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
return true; return true;
} }
static bool imm_part_needs_loading(bool high_bits_are_ones,
tcg_target_long part)
{
if (high_bits_are_ones) {
return part != -1;
} else {
return part != 0;
}
}
/* Loads a 32-bit immediate into rd, sign-extended. */ /* Loads a 32-bit immediate into rd, sign-extended. */
static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val) static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val)
{ {
@ -291,16 +281,16 @@ static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val)
tcg_target_long hi12 = sextreg(val, 12, 20); tcg_target_long hi12 = sextreg(val, 12, 20);
/* Single-instruction cases. */ /* Single-instruction cases. */
if (lo == val) { if (hi12 == 0) {
/* val fits in simm12: addi.w rd, zero, val */
tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val);
return;
}
if (0x800 <= val && val <= 0xfff) {
/* val fits in uimm12: ori rd, zero, val */ /* val fits in uimm12: ori rd, zero, val */
tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val); tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val);
return; return;
} }
if (hi12 == sextreg(lo, 12, 20)) {
/* val fits in simm12: addi.w rd, zero, val */
tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val);
return;
}
/* High bits must be set; load with lu12i.w + optional ori. */ /* High bits must be set; load with lu12i.w + optional ori. */
tcg_out_opc_lu12i_w(s, rd, hi12); tcg_out_opc_lu12i_w(s, rd, hi12);
@ -334,8 +324,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
intptr_t pc_offset; intptr_t pc_offset;
tcg_target_long val_lo, val_hi, pc_hi, offset_hi; tcg_target_long val_lo, val_hi, pc_hi, offset_hi;
tcg_target_long hi32, hi52; tcg_target_long hi12, hi32, hi52;
bool rd_high_bits_are_ones;
/* Value fits in signed i32. */ /* Value fits in signed i32. */
if (type == TCG_TYPE_I32 || val == (int32_t)val) { if (type == TCG_TYPE_I32 || val == (int32_t)val) {
@ -366,29 +355,68 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
return; return;
} }
hi12 = sextreg(val, 12, 20);
hi32 = sextreg(val, 32, 20); hi32 = sextreg(val, 32, 20);
hi52 = sextreg(val, 52, 12); hi52 = sextreg(val, 52, 12);
/* Single cu52i.d case. */ /* Single cu52i.d case. */
if (ctz64(val) >= 52) { if ((hi52 != 0) && (ctz64(val) >= 52)) {
tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, hi52); tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, hi52);
return; return;
} }
/* Slow path. Initialize the low 32 bits, then concat high bits. */ /* Slow path. Initialize the low 32 bits, then concat high bits. */
tcg_out_movi_i32(s, rd, val); tcg_out_movi_i32(s, rd, val);
rd_high_bits_are_ones = (int32_t)val < 0;
if (imm_part_needs_loading(rd_high_bits_are_ones, hi32)) { /* Load hi32 and hi52 explicitly when they are unexpected values. */
if (hi32 != sextreg(hi12, 20, 20)) {
tcg_out_opc_cu32i_d(s, rd, hi32); tcg_out_opc_cu32i_d(s, rd, hi32);
rd_high_bits_are_ones = hi32 < 0;
} }
if (imm_part_needs_loading(rd_high_bits_are_ones, hi52)) { if (hi52 != sextreg(hi32, 20, 12)) {
tcg_out_opc_cu52i_d(s, rd, rd, hi52); tcg_out_opc_cu52i_d(s, rd, rd, hi52);
} }
} }
static void tcg_out_addi(TCGContext *s, TCGType type, TCGReg rd,
TCGReg rs, tcg_target_long imm)
{
tcg_target_long lo12 = sextreg(imm, 0, 12);
tcg_target_long hi16 = sextreg(imm - lo12, 16, 16);
/*
* Note that there's a hole in between hi16 and lo12:
*
* 3 2 1 0
* 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
* ...+-------------------------------+-------+-----------------------+
* | hi16 | | lo12 |
* ...+-------------------------------+-------+-----------------------+
*
* For bits within that hole, it's more efficient to use LU12I and ADD.
*/
if (imm == (hi16 << 16) + lo12) {
if (hi16) {
tcg_out_opc_addu16i_d(s, rd, rs, hi16);
rs = rd;
}
if (type == TCG_TYPE_I32) {
tcg_out_opc_addi_w(s, rd, rs, lo12);
} else if (lo12) {
tcg_out_opc_addi_d(s, rd, rs, lo12);
} else {
tcg_out_mov(s, type, rd, rs);
}
} else {
tcg_out_movi(s, type, TCG_REG_TMP0, imm);
if (type == TCG_TYPE_I32) {
tcg_out_opc_add_w(s, rd, rs, TCG_REG_TMP0);
} else {
tcg_out_opc_add_d(s, rd, rs, TCG_REG_TMP0);
}
}
}
static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg) static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
{ {
tcg_out_opc_andi(s, ret, arg, 0xff); tcg_out_opc_andi(s, ret, arg, 0xff);
@ -441,64 +469,155 @@ static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc,
tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0); tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0);
} }
static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, #define SETCOND_INV TCG_TARGET_NB_REGS
TCGReg arg1, TCGReg arg2, bool c2) #define SETCOND_NEZ (SETCOND_INV << 1)
{ #define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ)
TCGReg tmp;
if (c2) { static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret,
tcg_debug_assert(arg2 == 0); TCGReg arg1, tcg_target_long arg2, bool c2)
{
int flags = 0;
switch (cond) {
case TCG_COND_EQ: /* -> NE */
case TCG_COND_GE: /* -> LT */
case TCG_COND_GEU: /* -> LTU */
case TCG_COND_GT: /* -> LE */
case TCG_COND_GTU: /* -> LEU */
cond = tcg_invert_cond(cond);
flags ^= SETCOND_INV;
break;
default:
break;
} }
switch (cond) { switch (cond) {
case TCG_COND_EQ:
if (c2) {
tmp = arg1;
} else {
tcg_out_opc_sub_d(s, ret, arg1, arg2);
tmp = ret;
}
tcg_out_opc_sltui(s, ret, tmp, 1);
break;
case TCG_COND_NE:
if (c2) {
tmp = arg1;
} else {
tcg_out_opc_sub_d(s, ret, arg1, arg2);
tmp = ret;
}
tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp);
break;
case TCG_COND_LT:
tcg_out_opc_slt(s, ret, arg1, arg2);
break;
case TCG_COND_GE:
tcg_out_opc_slt(s, ret, arg1, arg2);
tcg_out_opc_xori(s, ret, ret, 1);
break;
case TCG_COND_LE: case TCG_COND_LE:
tcg_out_setcond(s, TCG_COND_GE, ret, arg2, arg1, false);
break;
case TCG_COND_GT:
tcg_out_setcond(s, TCG_COND_LT, ret, arg2, arg1, false);
break;
case TCG_COND_LTU:
tcg_out_opc_sltu(s, ret, arg1, arg2);
break;
case TCG_COND_GEU:
tcg_out_opc_sltu(s, ret, arg1, arg2);
tcg_out_opc_xori(s, ret, ret, 1);
break;
case TCG_COND_LEU: case TCG_COND_LEU:
tcg_out_setcond(s, TCG_COND_GEU, ret, arg2, arg1, false); /*
* If we have a constant input, the most efficient way to implement
* LE is by adding 1 and using LT. Watch out for wrap around for LEU.
* We don't need to care for this for LE because the constant input
* is still constrained to int32_t, and INT32_MAX+1 is representable
* in the 64-bit temporary register.
*/
if (c2) {
if (cond == TCG_COND_LEU) {
/* unsigned <= -1 is true */
if (arg2 == -1) {
tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV));
return ret;
}
cond = TCG_COND_LTU;
} else {
cond = TCG_COND_LT;
}
arg2 += 1;
} else {
TCGReg tmp = arg2;
arg2 = arg1;
arg1 = tmp;
cond = tcg_swap_cond(cond); /* LE -> GE */
cond = tcg_invert_cond(cond); /* GE -> LT */
flags ^= SETCOND_INV;
}
break; break;
case TCG_COND_GTU: default:
tcg_out_setcond(s, TCG_COND_LTU, ret, arg2, arg1, false);
break; break;
}
switch (cond) {
case TCG_COND_NE:
flags |= SETCOND_NEZ;
if (!c2) {
tcg_out_opc_xor(s, ret, arg1, arg2);
} else if (arg2 == 0) {
ret = arg1;
} else if (arg2 >= 0 && arg2 <= 0xfff) {
tcg_out_opc_xori(s, ret, arg1, arg2);
} else {
tcg_out_addi(s, TCG_TYPE_REG, ret, arg1, -arg2);
}
break;
case TCG_COND_LT:
case TCG_COND_LTU:
if (c2) {
if (arg2 >= -0x800 && arg2 <= 0x7ff) {
if (cond == TCG_COND_LT) {
tcg_out_opc_slti(s, ret, arg1, arg2);
} else {
tcg_out_opc_sltui(s, ret, arg1, arg2);
}
break;
}
tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2);
arg2 = TCG_REG_TMP0;
}
if (cond == TCG_COND_LT) {
tcg_out_opc_slt(s, ret, arg1, arg2);
} else {
tcg_out_opc_sltu(s, ret, arg1, arg2);
}
break;
default: default:
g_assert_not_reached(); g_assert_not_reached();
break; break;
} }
return ret | flags;
}
static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
TCGReg arg1, tcg_target_long arg2, bool c2)
{
int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
if (tmpflags != ret) {
TCGReg tmp = tmpflags & ~SETCOND_FLAGS;
switch (tmpflags & SETCOND_FLAGS) {
case SETCOND_INV:
/* Intermediate result is boolean: simply invert. */
tcg_out_opc_xori(s, ret, tmp, 1);
break;
case SETCOND_NEZ:
/* Intermediate result is zero/non-zero: test != 0. */
tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp);
break;
case SETCOND_NEZ | SETCOND_INV:
/* Intermediate result is zero/non-zero: test == 0. */
tcg_out_opc_sltui(s, ret, tmp, 1);
break;
default:
g_assert_not_reached();
}
}
}
static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
TCGReg c1, tcg_target_long c2, bool const2,
TCGReg v1, TCGReg v2)
{
int tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, c1, c2, const2);
TCGReg t;
/* Standardize the test below to t != 0. */
if (tmpflags & SETCOND_INV) {
t = v1, v1 = v2, v2 = t;
}
t = tmpflags & ~SETCOND_FLAGS;
if (v1 == TCG_REG_ZERO) {
tcg_out_opc_masknez(s, ret, v2, t);
} else if (v2 == TCG_REG_ZERO) {
tcg_out_opc_maskeqz(s, ret, v1, t);
} else {
tcg_out_opc_masknez(s, TCG_REG_TMP2, v2, t); /* t ? 0 : v2 */
tcg_out_opc_maskeqz(s, TCG_REG_TMP1, v1, t); /* t ? v1 : 0 */
tcg_out_opc_or(s, ret, TCG_REG_TMP1, TCG_REG_TMP2);
}
} }
/* /*
@ -583,7 +702,7 @@ static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data,
intptr_t imm12 = sextreg(offset, 0, 12); intptr_t imm12 = sextreg(offset, 0, 12);
if (offset != imm12) { if (offset != imm12) {
intptr_t diff = offset - (uintptr_t)s->code_ptr; intptr_t diff = tcg_pcrel_diff(s, (void *)offset);
if (addr == TCG_REG_ZERO && diff == (int32_t)diff) { if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
imm12 = sextreg(diff, 0, 12); imm12 = sextreg(diff, 0, 12);
@ -1032,37 +1151,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
#endif #endif
} }
/* LoongArch uses `andi zero, zero, 0` as NOP. */
#define NOP OPC_ANDI
static void tcg_out_nop(TCGContext *s)
{
tcg_out32(s, NOP);
}
void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
uintptr_t jmp_rx, uintptr_t jmp_rw)
{
tcg_insn_unit i1, i2;
ptrdiff_t upper, lower;
uintptr_t addr = tb->jmp_target_addr[n];
ptrdiff_t offset = (ptrdiff_t)(addr - jmp_rx) >> 2;
if (offset == sextreg(offset, 0, 26)) {
i1 = encode_sd10k16_insn(OPC_B, offset);
i2 = NOP;
} else {
tcg_debug_assert(offset == sextreg(offset, 0, 36));
lower = (int16_t)offset;
upper = (offset - lower) >> 16;
i1 = encode_dsj20_insn(OPC_PCADDU18I, TCG_REG_TMP0, upper);
i2 = encode_djsk16_insn(OPC_JIRL, TCG_REG_ZERO, TCG_REG_TMP0, lower);
}
uint64_t pair = ((uint64_t)i2 << 32) | i1;
qatomic_set((uint64_t *)jmp_rw, pair);
flush_idcache_range(jmp_rx, jmp_rw, 8);
}
/* /*
* Entry-points * Entry-points
*/ */
@ -1083,22 +1171,43 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
static void tcg_out_goto_tb(TCGContext *s, int which) static void tcg_out_goto_tb(TCGContext *s, int which)
{ {
/* /*
* Ensure that patch area is 8-byte aligned so that an * Direct branch, or load indirect address, to be patched
* atomic write can be used to patch the target address. * by tb_target_set_jmp_target. Check indirect load offset
* in range early, regardless of direct branch distance,
* via assert within tcg_out_opc_pcaddu2i.
*/ */
if ((uintptr_t)s->code_ptr & 7) { uintptr_t i_addr = get_jmp_target_addr(s, which);
tcg_out_nop(s); intptr_t i_disp = tcg_pcrel_diff(s, (void *)i_addr);
}
set_jmp_insn_offset(s, which); set_jmp_insn_offset(s, which);
/* tcg_out_opc_pcaddu2i(s, TCG_REG_TMP0, i_disp >> 2);
* actual branch destination will be patched by
* tb_target_set_jmp_target later /* Finish the load and indirect branch. */
*/ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_TMP0, 0);
tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0);
tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0); tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
set_jmp_reset_offset(s, which); set_jmp_reset_offset(s, which);
} }
void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
uintptr_t jmp_rx, uintptr_t jmp_rw)
{
uintptr_t d_addr = tb->jmp_target_addr[n];
ptrdiff_t d_disp = (ptrdiff_t)(d_addr - jmp_rx) >> 2;
tcg_insn_unit insn;
/* Either directly branch, or load slot address for indirect branch. */
if (d_disp == sextreg(d_disp, 0, 26)) {
insn = encode_sd10k16_insn(OPC_B, d_disp);
} else {
uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
intptr_t i_disp = i_addr - jmp_rx;
insn = encode_dsj20_insn(OPC_PCADDU2I, TCG_REG_TMP0, i_disp >> 2);
}
qatomic_set((tcg_insn_unit *)jmp_rw, insn);
flush_idcache_range(jmp_rx, jmp_rw, 4);
}
static void tcg_out_op(TCGContext *s, TCGOpcode opc, static void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg args[TCG_MAX_OP_ARGS], const TCGArg args[TCG_MAX_OP_ARGS],
const int const_args[TCG_MAX_OP_ARGS]) const int const_args[TCG_MAX_OP_ARGS])
@ -1361,14 +1470,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_add_i32: case INDEX_op_add_i32:
if (c2) { if (c2) {
tcg_out_opc_addi_w(s, a0, a1, a2); tcg_out_addi(s, TCG_TYPE_I32, a0, a1, a2);
} else { } else {
tcg_out_opc_add_w(s, a0, a1, a2); tcg_out_opc_add_w(s, a0, a1, a2);
} }
break; break;
case INDEX_op_add_i64: case INDEX_op_add_i64:
if (c2) { if (c2) {
tcg_out_opc_addi_d(s, a0, a1, a2); tcg_out_addi(s, TCG_TYPE_I64, a0, a1, a2);
} else { } else {
tcg_out_opc_add_d(s, a0, a1, a2); tcg_out_opc_add_d(s, a0, a1, a2);
} }
@ -1376,14 +1485,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_sub_i32: case INDEX_op_sub_i32:
if (c2) { if (c2) {
tcg_out_opc_addi_w(s, a0, a1, -a2); tcg_out_addi(s, TCG_TYPE_I32, a0, a1, -a2);
} else { } else {
tcg_out_opc_sub_w(s, a0, a1, a2); tcg_out_opc_sub_w(s, a0, a1, a2);
} }
break; break;
case INDEX_op_sub_i64: case INDEX_op_sub_i64:
if (c2) { if (c2) {
tcg_out_opc_addi_d(s, a0, a1, -a2); tcg_out_addi(s, TCG_TYPE_I64, a0, a1, -a2);
} else { } else {
tcg_out_opc_sub_d(s, a0, a1, a2); tcg_out_opc_sub_d(s, a0, a1, a2);
} }
@ -1443,6 +1552,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_setcond(s, args[3], a0, a1, a2, c2); tcg_out_setcond(s, args[3], a0, a1, a2, c2);
break; break;
case INDEX_op_movcond_i32:
case INDEX_op_movcond_i64:
tcg_out_movcond(s, args[5], a0, a1, a2, c2, args[3], args[4]);
break;
case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i32:
case INDEX_op_ld8s_i64: case INDEX_op_ld8s_i64:
tcg_out_ldst(s, OPC_LD_B, a0, a1, a2); tcg_out_ldst(s, OPC_LD_B, a0, a1, a2);
@ -1597,8 +1711,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
return C_O1_I2(r, r, ri); return C_O1_I2(r, r, ri);
case INDEX_op_add_i32: case INDEX_op_add_i32:
return C_O1_I2(r, r, ri);
case INDEX_op_add_i64: case INDEX_op_add_i64:
return C_O1_I2(r, r, rI); return C_O1_I2(r, r, rJ);
case INDEX_op_and_i32: case INDEX_op_and_i32:
case INDEX_op_and_i64: case INDEX_op_and_i64:
@ -1617,18 +1732,17 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_ctz_i64: case INDEX_op_ctz_i64:
return C_O1_I2(r, r, rW); return C_O1_I2(r, r, rW);
case INDEX_op_setcond_i32:
case INDEX_op_setcond_i64:
return C_O1_I2(r, r, rZ);
case INDEX_op_deposit_i32: case INDEX_op_deposit_i32:
case INDEX_op_deposit_i64: case INDEX_op_deposit_i64:
/* Must deposit into the same register as input */ /* Must deposit into the same register as input */
return C_O1_I2(r, 0, rZ); return C_O1_I2(r, 0, rZ);
case INDEX_op_sub_i32: case INDEX_op_sub_i32:
case INDEX_op_setcond_i32:
return C_O1_I2(r, rZ, ri);
case INDEX_op_sub_i64: case INDEX_op_sub_i64:
return C_O1_I2(r, rZ, rN); case INDEX_op_setcond_i64:
return C_O1_I2(r, rZ, rJ);
case INDEX_op_mul_i32: case INDEX_op_mul_i32:
case INDEX_op_mul_i64: case INDEX_op_mul_i64:
@ -1646,6 +1760,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_remu_i64: case INDEX_op_remu_i64:
return C_O1_I2(r, rZ, rZ); return C_O1_I2(r, rZ, rZ);
case INDEX_op_movcond_i32:
case INDEX_op_movcond_i64:
return C_O1_I4(r, rZ, rJ, rZ, rZ);
default: default:
g_assert_not_reached(); g_assert_not_reached();
} }

View File

@ -42,11 +42,8 @@
#define TCG_TARGET_INSN_UNIT_SIZE 4 #define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_NB_REGS 32 #define TCG_TARGET_NB_REGS 32
/*
* PCADDU18I + JIRL sequence can give 20 + 16 + 2 = 38 bits #define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
* signed offset, which is +/- 128 GiB.
*/
#define MAX_CODE_GEN_BUFFER_SIZE (128 * GiB)
typedef enum { typedef enum {
TCG_REG_ZERO, TCG_REG_ZERO,
@ -97,7 +94,7 @@ typedef enum {
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
/* optional instructions */ /* optional instructions */
#define TCG_TARGET_HAS_movcond_i32 0 #define TCG_TARGET_HAS_movcond_i32 1
#define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_div_i32 1
#define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_rem_i32 1
#define TCG_TARGET_HAS_div2_i32 0 #define TCG_TARGET_HAS_div2_i32 0
@ -133,7 +130,7 @@ typedef enum {
#define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0
/* 64-bit operations */ /* 64-bit operations */
#define TCG_TARGET_HAS_movcond_i64 0 #define TCG_TARGET_HAS_movcond_i64 1
#define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_div_i64 1
#define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_rem_i64 1
#define TCG_TARGET_HAS_div2_i64 0 #define TCG_TARGET_HAS_div2_i64 0

View File

@ -874,7 +874,7 @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2); tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
tcg_gen_mov_i32(rl, t); tcg_gen_mov_i32(rl, t);
tcg_temp_free_i32(t); tcg_temp_free_i32(t);
} else { } else if (TCG_TARGET_REG_BITS == 64) {
TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t0 = tcg_temp_new_i64();
TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64();
tcg_gen_extu_i32_i64(t0, arg1); tcg_gen_extu_i32_i64(t0, arg1);
@ -883,6 +883,8 @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
tcg_gen_extr_i64_i32(rl, rh, t0); tcg_gen_extr_i64_i32(rl, rh, t0);
tcg_temp_free_i64(t0); tcg_temp_free_i64(t0);
tcg_temp_free_i64(t1); tcg_temp_free_i64(t1);
} else {
qemu_build_not_reached();
} }
} }