util: Host cpu detection for x86 and aa64

util: Use cpu detection for bufferiszero
 migration: Use cpu detection for xbzrle
 tcg: Replace and remove cpu_atomic_{ld,st}o*
 host/include: Split qemu/atomic128.h
 tcg: Remove DEBUG_DISAS
 tcg: Remove USE_TCG_OPTIMIZATIONS
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmRtbwAdHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV8xlgf7B/RnVG7u7Hjndr6h
 fH07ujjElAivs+H05S0GGbQYpSNlqVv8PzXT2olJTAe15ryb537dCkqxyKW53vgb
 pUWzZf9Zy8XfN48W5V91dSKQE3gm5wBlOM6LI85F8XrIQyjZqkHti+rw3GxsamNL
 8n2euOR0vx/jculBRxvZUAJDzb/0shN583mC5+wX/KInCHiNmMC6sCggyd5bpFJZ
 1wqWwrUCqJ0KAAYKd9WrIKt6QwAX3kUDiBQPa1g+psBjZ1CYQ4lqZZn9uYQ4hEtG
 yBnT0ER2LOBQaKXJ0BrdG5c/mUNX7WkLBDTb+QjGGkfPc/bHIirXqeFzuyrXahg8
 kY155w==
 =XH8Z
 -----END PGP SIGNATURE-----

Merge tag 'pull-tcg-20230523-3' of https://gitlab.com/rth7680/qemu into staging

util: Host cpu detection for x86 and aa64
util: Use cpu detection for bufferiszero
migration: Use cpu detection for xbzrle
tcg: Replace and remove cpu_atomic_{ld,st}o*
host/include: Split qemu/atomic128.h
tcg: Remove DEBUG_DISAS
tcg: Remove USE_TCG_OPTIMIZATIONS

# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmRtbwAdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV8xlgf7B/RnVG7u7Hjndr6h
# fH07ujjElAivs+H05S0GGbQYpSNlqVv8PzXT2olJTAe15ryb537dCkqxyKW53vgb
# pUWzZf9Zy8XfN48W5V91dSKQE3gm5wBlOM6LI85F8XrIQyjZqkHti+rw3GxsamNL
# 8n2euOR0vx/jculBRxvZUAJDzb/0shN583mC5+wX/KInCHiNmMC6sCggyd5bpFJZ
# 1wqWwrUCqJ0KAAYKd9WrIKt6QwAX3kUDiBQPa1g+psBjZ1CYQ4lqZZn9uYQ4hEtG
# yBnT0ER2LOBQaKXJ0BrdG5c/mUNX7WkLBDTb+QjGGkfPc/bHIirXqeFzuyrXahg8
# kY155w==
# =XH8Z
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 23 May 2023 06:57:20 PM PDT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate]

* tag 'pull-tcg-20230523-3' of https://gitlab.com/rth7680/qemu: (28 commits)
  tcg: Remove USE_TCG_OPTIMIZATIONS
  tcg: Remove DEBUG_DISAS
  qemu/atomic128: Add runtime test for FEAT_LSE2
  qemu/atomic128: Improve cmpxchg fallback for atomic16_set
  tcg: Split out tcg/debug-assert.h
  accel/tcg: Correctly use atomic128.h in ldst_atomicity.c.inc
  qemu/atomic128: Split atomic16_read
  accel/tcg: Eliminate #if on HAVE_ATOMIC128 and HAVE_CMPXCHG128
  accel/tcg: Remove prot argument to atomic_mmu_lookup
  accel/tcg: Remove cpu_atomic_{ld,st}o_*_mmu
  target/s390x: Always use cpu_atomic_cmpxchgl_be_mmu in do_csst
  target/s390x: Use cpu_{ld,st}*_mmu in do_csst
  accel/tcg: Unify cpu_{ld,st}*_{be,le}_mmu
  target/s390x: Use tcg_gen_qemu_{ld,st}_i128 for LPQ, STPQ
  target/ppc: Use tcg_gen_qemu_{ld,st}_i128 for LQARX, LQ, STQ
  include/qemu: Move CONFIG_ATOMIC128_OPT handling to atomic128.h
  meson: Fix detect atomic128 support with optimization
  include/host: Split out atomic128-ldst.h
  include/host: Split out atomic128-cas.h
  util: Add cpuinfo-aarch64.c
  ...

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2023-05-23 18:57:46 -07:00
commit 1c12355b31
56 changed files with 1042 additions and 2073 deletions

View File

@ -157,6 +157,9 @@ F: include/exec/helper*.h
F: include/sysemu/cpus.h F: include/sysemu/cpus.h
F: include/sysemu/tcg.h F: include/sysemu/tcg.h
F: include/hw/core/tcg-cpu-ops.h F: include/hw/core/tcg-cpu-ops.h
F: host/include/*/host/cpuinfo.h
F: util/cpuinfo-*.c
F: include/tcg/
FPU emulation FPU emulation
M: Aurelien Jarno <aurelien@aurel32.net> M: Aurelien Jarno <aurelien@aurel32.net>

View File

@ -19,20 +19,6 @@ static void atomic_trace_rmw_post(CPUArchState *env, uint64_t addr,
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_RW); qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_RW);
} }
#if HAVE_ATOMIC128
static void atomic_trace_ld_post(CPUArchState *env, uint64_t addr,
MemOpIdx oi)
{
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
}
static void atomic_trace_st_post(CPUArchState *env, uint64_t addr,
MemOpIdx oi)
{
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
#endif
/* /*
* Atomic helpers callable from TCG. * Atomic helpers callable from TCG.
* These have a common interface and all defer to cpu_atomic_* * These have a common interface and all defer to cpu_atomic_*
@ -62,36 +48,16 @@ CMPXCHG_HELPER(cmpxchgo_le, Int128)
#undef CMPXCHG_HELPER #undef CMPXCHG_HELPER
Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, uint64_t addr, Int128 HELPER(nonatomic_cmpxchgo)(CPUArchState *env, uint64_t addr,
Int128 cmpv, Int128 newv, uint32_t oi) Int128 cmpv, Int128 newv, uint32_t oi)
{ {
#if TCG_TARGET_REG_BITS == 32 #if TCG_TARGET_REG_BITS == 32
uintptr_t ra = GETPC(); uintptr_t ra = GETPC();
Int128 oldv; Int128 oldv;
oldv = cpu_ld16_be_mmu(env, addr, oi, ra); oldv = cpu_ld16_mmu(env, addr, oi, ra);
if (int128_eq(oldv, cmpv)) { if (int128_eq(oldv, cmpv)) {
cpu_st16_be_mmu(env, addr, newv, oi, ra); cpu_st16_mmu(env, addr, newv, oi, ra);
} else {
/* Even with comparison failure, still need a write cycle. */
probe_write(env, addr, 16, get_mmuidx(oi), ra);
}
return oldv;
#else
g_assert_not_reached();
#endif
}
Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, uint64_t addr,
Int128 cmpv, Int128 newv, uint32_t oi)
{
#if TCG_TARGET_REG_BITS == 32
uintptr_t ra = GETPC();
Int128 oldv;
oldv = cpu_ld16_le_mmu(env, addr, oi, ra);
if (int128_eq(oldv, cmpv)) {
cpu_st16_le_mmu(env, addr, newv, oi, ra);
} else { } else {
/* Even with comparison failure, still need a write cycle. */ /* Even with comparison failure, still need a write cycle. */
probe_write(env, addr, 16, get_mmuidx(oi), ra); probe_write(env, addr, 16, get_mmuidx(oi), ra);

View File

@ -73,8 +73,7 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
ABI_TYPE cmpv, ABI_TYPE newv, ABI_TYPE cmpv, ABI_TYPE newv,
MemOpIdx oi, uintptr_t retaddr) MemOpIdx oi, uintptr_t retaddr)
{ {
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);
PAGE_READ | PAGE_WRITE, retaddr);
DATA_TYPE ret; DATA_TYPE ret;
#if DATA_SIZE == 16 #if DATA_SIZE == 16
@ -87,38 +86,11 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
return ret; return ret;
} }
#if DATA_SIZE >= 16 #if DATA_SIZE < 16
#if HAVE_ATOMIC128
ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr)
{
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
PAGE_READ, retaddr);
DATA_TYPE val;
val = atomic16_read(haddr);
ATOMIC_MMU_CLEANUP;
atomic_trace_ld_post(env, addr, oi);
return val;
}
void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
MemOpIdx oi, uintptr_t retaddr)
{
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
PAGE_WRITE, retaddr);
atomic16_set(haddr, val);
ATOMIC_MMU_CLEANUP;
atomic_trace_st_post(env, addr, oi);
}
#endif
#else
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val, ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
MemOpIdx oi, uintptr_t retaddr) MemOpIdx oi, uintptr_t retaddr)
{ {
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);
PAGE_READ | PAGE_WRITE, retaddr);
DATA_TYPE ret; DATA_TYPE ret;
ret = qatomic_xchg__nocheck(haddr, val); ret = qatomic_xchg__nocheck(haddr, val);
@ -131,9 +103,8 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \ ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
{ \ { \
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \ DATA_TYPE *haddr, ret; \
PAGE_READ | PAGE_WRITE, retaddr); \ haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr); \
DATA_TYPE ret; \
ret = qatomic_##X(haddr, val); \ ret = qatomic_##X(haddr, val); \
ATOMIC_MMU_CLEANUP; \ ATOMIC_MMU_CLEANUP; \
atomic_trace_rmw_post(env, addr, oi); \ atomic_trace_rmw_post(env, addr, oi); \
@ -163,9 +134,8 @@ GEN_ATOMIC_HELPER(xor_fetch)
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \ ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
{ \ { \
XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \ XDATA_TYPE *haddr, cmp, old, new, val = xval; \
PAGE_READ | PAGE_WRITE, retaddr); \ haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr); \
XDATA_TYPE cmp, old, new, val = xval; \
smp_mb(); \ smp_mb(); \
cmp = qatomic_read__nocheck(haddr); \ cmp = qatomic_read__nocheck(haddr); \
do { \ do { \
@ -188,7 +158,7 @@ GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new) GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new)
#undef GEN_ATOMIC_HELPER_FN #undef GEN_ATOMIC_HELPER_FN
#endif /* DATA SIZE >= 16 */ #endif /* DATA SIZE < 16 */
#undef END #undef END
@ -206,8 +176,7 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
ABI_TYPE cmpv, ABI_TYPE newv, ABI_TYPE cmpv, ABI_TYPE newv,
MemOpIdx oi, uintptr_t retaddr) MemOpIdx oi, uintptr_t retaddr)
{ {
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);
PAGE_READ | PAGE_WRITE, retaddr);
DATA_TYPE ret; DATA_TYPE ret;
#if DATA_SIZE == 16 #if DATA_SIZE == 16
@ -220,39 +189,11 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
return BSWAP(ret); return BSWAP(ret);
} }
#if DATA_SIZE >= 16 #if DATA_SIZE < 16
#if HAVE_ATOMIC128
ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr)
{
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
PAGE_READ, retaddr);
DATA_TYPE val;
val = atomic16_read(haddr);
ATOMIC_MMU_CLEANUP;
atomic_trace_ld_post(env, addr, oi);
return BSWAP(val);
}
void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
MemOpIdx oi, uintptr_t retaddr)
{
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
PAGE_WRITE, retaddr);
val = BSWAP(val);
atomic16_set(haddr, val);
ATOMIC_MMU_CLEANUP;
atomic_trace_st_post(env, addr, oi);
}
#endif
#else
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val, ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
MemOpIdx oi, uintptr_t retaddr) MemOpIdx oi, uintptr_t retaddr)
{ {
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);
PAGE_READ | PAGE_WRITE, retaddr);
ABI_TYPE ret; ABI_TYPE ret;
ret = qatomic_xchg__nocheck(haddr, BSWAP(val)); ret = qatomic_xchg__nocheck(haddr, BSWAP(val));
@ -265,9 +206,8 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \ ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
{ \ { \
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \ DATA_TYPE *haddr, ret; \
PAGE_READ | PAGE_WRITE, retaddr); \ haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr); \
DATA_TYPE ret; \
ret = qatomic_##X(haddr, BSWAP(val)); \ ret = qatomic_##X(haddr, BSWAP(val)); \
ATOMIC_MMU_CLEANUP; \ ATOMIC_MMU_CLEANUP; \
atomic_trace_rmw_post(env, addr, oi); \ atomic_trace_rmw_post(env, addr, oi); \
@ -294,9 +234,8 @@ GEN_ATOMIC_HELPER(xor_fetch)
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \ ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
{ \ { \
XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \ XDATA_TYPE *haddr, ldo, ldn, old, new, val = xval; \
PAGE_READ | PAGE_WRITE, retaddr); \ haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr); \
XDATA_TYPE ldo, ldn, old, new, val = xval; \
smp_mb(); \ smp_mb(); \
ldn = qatomic_read__nocheck(haddr); \ ldn = qatomic_read__nocheck(haddr); \
do { \ do { \
@ -326,7 +265,7 @@ GEN_ATOMIC_HELPER_FN(add_fetch, ADD, DATA_TYPE, new)
#undef ADD #undef ADD
#undef GEN_ATOMIC_HELPER_FN #undef GEN_ATOMIC_HELPER_FN
#endif /* DATA_SIZE >= 16 */ #endif /* DATA_SIZE < 16 */
#undef END #undef END
#endif /* DATA_SIZE > 1 */ #endif /* DATA_SIZE > 1 */

View File

@ -307,7 +307,6 @@ static void log_cpu_exec(target_ulong pc, CPUState *cpu,
cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc, cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc,
tb->flags, tb->cflags, lookup_symbol(pc)); tb->flags, tb->cflags, lookup_symbol(pc));
#if defined(DEBUG_DISAS)
if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) { if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
FILE *logfile = qemu_log_trylock(); FILE *logfile = qemu_log_trylock();
if (logfile) { if (logfile) {
@ -323,7 +322,6 @@ static void log_cpu_exec(target_ulong pc, CPUState *cpu,
qemu_log_unlock(logfile); qemu_log_unlock(logfile);
} }
} }
#endif /* DEBUG_DISAS */
} }
} }

View File

@ -1896,12 +1896,9 @@ static bool mmu_lookup(CPUArchState *env, target_ulong addr, MemOpIdx oi,
/* /*
* Probe for an atomic operation. Do not allow unaligned operations, * Probe for an atomic operation. Do not allow unaligned operations,
* or io operations to proceed. Return the host address. * or io operations to proceed. Return the host address.
*
* @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE.
*/ */
static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
MemOpIdx oi, int size, int prot, MemOpIdx oi, int size, uintptr_t retaddr)
uintptr_t retaddr)
{ {
uintptr_t mmu_idx = get_mmuidx(oi); uintptr_t mmu_idx = get_mmuidx(oi);
MemOp mop = get_memop(oi); MemOp mop = get_memop(oi);
@ -1937,54 +1934,37 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
tlbe = tlb_entry(env, mmu_idx, addr); tlbe = tlb_entry(env, mmu_idx, addr);
/* Check TLB entry and enforce page permissions. */ /* Check TLB entry and enforce page permissions. */
if (prot & PAGE_WRITE) { tlb_addr = tlb_addr_write(tlbe);
tlb_addr = tlb_addr_write(tlbe); if (!tlb_hit(tlb_addr, addr)) {
if (!tlb_hit(tlb_addr, addr)) { if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE,
if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE, addr & TARGET_PAGE_MASK)) {
addr & TARGET_PAGE_MASK)) { tlb_fill(env_cpu(env), addr, size,
tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
MMU_DATA_STORE, mmu_idx, retaddr); index = tlb_index(env, mmu_idx, addr);
index = tlb_index(env, mmu_idx, addr); tlbe = tlb_entry(env, mmu_idx, addr);
tlbe = tlb_entry(env, mmu_idx, addr);
}
tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
}
if (prot & PAGE_READ) {
/*
* Let the guest notice RMW on a write-only page.
* We have just verified that the page is writable.
* Subpage lookups may have left TLB_INVALID_MASK set,
* but addr_read will only be -1 if PAGE_READ was unset.
*/
if (unlikely(tlbe->addr_read == -1)) {
tlb_fill(env_cpu(env), addr, size,
MMU_DATA_LOAD, mmu_idx, retaddr);
/*
* Since we don't support reads and writes to different
* addresses, and we do have the proper page loaded for
* write, this shouldn't ever return. But just in case,
* handle via stop-the-world.
*/
goto stop_the_world;
}
/* Collect TLB_WATCHPOINT for read. */
tlb_addr |= tlbe->addr_read;
}
} else /* if (prot & PAGE_READ) */ {
tlb_addr = tlbe->addr_read;
if (!tlb_hit(tlb_addr, addr)) {
if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_LOAD,
addr & TARGET_PAGE_MASK)) {
tlb_fill(env_cpu(env), addr, size,
MMU_DATA_LOAD, mmu_idx, retaddr);
index = tlb_index(env, mmu_idx, addr);
tlbe = tlb_entry(env, mmu_idx, addr);
}
tlb_addr = tlbe->addr_read & ~TLB_INVALID_MASK;
} }
tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
} }
/*
* Let the guest notice RMW on a write-only page.
* We have just verified that the page is writable.
* Subpage lookups may have left TLB_INVALID_MASK set,
* but addr_read will only be -1 if PAGE_READ was unset.
*/
if (unlikely(tlbe->addr_read == -1)) {
tlb_fill(env_cpu(env), addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
/*
* Since we don't support reads and writes to different
* addresses, and we do have the proper page loaded for
* write, this shouldn't ever return. But just in case,
* handle via stop-the-world.
*/
goto stop_the_world;
}
/* Collect TLB_WATCHPOINT for read. */
tlb_addr |= tlbe->addr_read;
/* Notice an IO access or a needs-MMU-lookup access */ /* Notice an IO access or a needs-MMU-lookup access */
if (unlikely(tlb_addr & (TLB_MMIO | TLB_DISCARD_WRITE))) { if (unlikely(tlb_addr & (TLB_MMIO | TLB_DISCARD_WRITE))) {
/* There's really nothing that can be done to /* There's really nothing that can be done to
@ -2000,11 +1980,8 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
} }
if (unlikely(tlb_addr & TLB_WATCHPOINT)) { if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
QEMU_BUILD_BUG_ON(PAGE_READ != BP_MEM_READ); cpu_check_watchpoint(env_cpu(env), addr, size, full->attrs,
QEMU_BUILD_BUG_ON(PAGE_WRITE != BP_MEM_WRITE); BP_MEM_READ | BP_MEM_WRITE, retaddr);
/* therefore prot == watchpoint bits */
cpu_check_watchpoint(env_cpu(env), addr, size,
full->attrs, prot, retaddr);
} }
return hostaddr; return hostaddr;
@ -2575,89 +2552,45 @@ uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
return ret; return ret;
} }
uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr, uint16_t cpu_ldw_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra) MemOpIdx oi, uintptr_t ra)
{ {
uint16_t ret; uint16_t ret;
tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUW); tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD); ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
plugin_load_cb(env, addr, oi); plugin_load_cb(env, addr, oi);
return ret; return ret;
} }
uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t cpu_ldl_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra) MemOpIdx oi, uintptr_t ra)
{ {
uint32_t ret; uint32_t ret;
tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUL); tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD); ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
plugin_load_cb(env, addr, oi); plugin_load_cb(env, addr, oi);
return ret; return ret;
} }
uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t cpu_ldq_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra) MemOpIdx oi, uintptr_t ra)
{ {
uint64_t ret; uint64_t ret;
tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUQ); tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD); ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
plugin_load_cb(env, addr, oi); plugin_load_cb(env, addr, oi);
return ret; return ret;
} }
uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr, Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra) MemOpIdx oi, uintptr_t ra)
{
uint16_t ret;
tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUW);
ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
plugin_load_cb(env, addr, oi);
return ret;
}
uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
uint32_t ret;
tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUL);
ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
plugin_load_cb(env, addr, oi);
return ret;
}
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
uint64_t ret;
tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUQ);
ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
plugin_load_cb(env, addr, oi);
return ret;
}
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{ {
Int128 ret; Int128 ret;
tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_BE|MO_128)); tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
ret = do_ld16_mmu(env, addr, oi, ra);
plugin_load_cb(env, addr, oi);
return ret;
}
Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
Int128 ret;
tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_LE|MO_128));
ret = do_ld16_mmu(env, addr, oi, ra); ret = do_ld16_mmu(env, addr, oi, ra);
plugin_load_cb(env, addr, oi); plugin_load_cb(env, addr, oi);
return ret; return ret;
@ -2779,7 +2712,7 @@ static uint64_t do_st16_leN(CPUArchState *env, MMULookupPageData *p,
case MO_ATOM_WITHIN16_PAIR: case MO_ATOM_WITHIN16_PAIR:
/* Since size > 8, this is the half that must be atomic. */ /* Since size > 8, this is the half that must be atomic. */
if (!HAVE_al16) { if (!HAVE_ATOMIC128_RW) {
cpu_loop_exit_atomic(env_cpu(env), ra); cpu_loop_exit_atomic(env_cpu(env), ra);
} }
return store_whole_le16(p->haddr, p->size, val_le); return store_whole_le16(p->haddr, p->size, val_le);
@ -3045,66 +2978,34 @@ void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
plugin_store_cb(env, addr, oi); plugin_store_cb(env, addr, oi);
} }
void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val, void cpu_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
MemOpIdx oi, uintptr_t retaddr) MemOpIdx oi, uintptr_t retaddr)
{ {
tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUW); tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
do_st2_mmu(env, addr, val, oi, retaddr); do_st2_mmu(env, addr, val, oi, retaddr);
plugin_store_cb(env, addr, oi); plugin_store_cb(env, addr, oi);
} }
void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val, void cpu_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr) MemOpIdx oi, uintptr_t retaddr)
{ {
tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUL); tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
do_st4_mmu(env, addr, val, oi, retaddr); do_st4_mmu(env, addr, val, oi, retaddr);
plugin_store_cb(env, addr, oi); plugin_store_cb(env, addr, oi);
} }
void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val, void cpu_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr) MemOpIdx oi, uintptr_t retaddr)
{ {
tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUQ); tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
do_st8_mmu(env, addr, val, oi, retaddr); do_st8_mmu(env, addr, val, oi, retaddr);
plugin_store_cb(env, addr, oi); plugin_store_cb(env, addr, oi);
} }
void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val, void cpu_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
MemOpIdx oi, uintptr_t retaddr) MemOpIdx oi, uintptr_t retaddr)
{ {
tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUW); tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
do_st2_mmu(env, addr, val, oi, retaddr);
plugin_store_cb(env, addr, oi);
}
void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUL);
do_st4_mmu(env, addr, val, oi, retaddr);
plugin_store_cb(env, addr, oi);
}
void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
{
tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUQ);
do_st8_mmu(env, addr, val, oi, retaddr);
plugin_store_cb(env, addr, oi);
}
void cpu_st16_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
MemOpIdx oi, uintptr_t retaddr)
{
tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_BE|MO_128));
do_st16_mmu(env, addr, val, oi, retaddr);
plugin_store_cb(env, addr, oi);
}
void cpu_st16_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
MemOpIdx oi, uintptr_t retaddr)
{
tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_LE|MO_128));
do_st16_mmu(env, addr, val, oi, retaddr); do_st16_mmu(env, addr, val, oi, retaddr);
plugin_store_cb(env, addr, oi); plugin_store_cb(env, addr, oi);
} }
@ -3137,7 +3038,7 @@ void cpu_st16_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
#include "atomic_template.h" #include "atomic_template.h"
#endif #endif
#if HAVE_CMPXCHG128 || HAVE_ATOMIC128 #if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128)
#define DATA_SIZE 16 #define DATA_SIZE 16
#include "atomic_template.h" #include "atomic_template.h"
#endif #endif

View File

@ -16,35 +16,6 @@
#endif #endif
#define HAVE_al8_fast (ATOMIC_REG_SIZE >= 8) #define HAVE_al8_fast (ATOMIC_REG_SIZE >= 8)
/*
* If __alignof(unsigned __int128) < 16, GCC may refuse to inline atomics
* that are supported by the host, e.g. s390x. We can force the pointer to
* have our known alignment with __builtin_assume_aligned, however prior to
* GCC 13 that was only reliable with optimization enabled. See
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
*/
#if defined(CONFIG_ATOMIC128_OPT)
# if !defined(__OPTIMIZE__)
# define ATTRIBUTE_ATOMIC128_OPT __attribute__((optimize("O1")))
# endif
# define CONFIG_ATOMIC128
#endif
#ifndef ATTRIBUTE_ATOMIC128_OPT
# define ATTRIBUTE_ATOMIC128_OPT
#endif
#if defined(CONFIG_ATOMIC128)
# define HAVE_al16_fast true
#else
# define HAVE_al16_fast false
#endif
#if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128)
# define HAVE_al16 true
#else
# define HAVE_al16 false
#endif
/** /**
* required_atomicity: * required_atomicity:
* *
@ -163,26 +134,6 @@ static inline uint64_t load_atomic8(void *pv)
return qatomic_read__nocheck(p); return qatomic_read__nocheck(p);
} }
/**
* load_atomic16:
* @pv: host address
*
* Atomically load 16 aligned bytes from @pv.
*/
static inline Int128 ATTRIBUTE_ATOMIC128_OPT
load_atomic16(void *pv)
{
#ifdef CONFIG_ATOMIC128
__uint128_t *p = __builtin_assume_aligned(pv, 16);
Int128Alias r;
r.u = qatomic_read__nocheck(p);
return r.s;
#else
qemu_build_not_reached();
#endif
}
/** /**
* load_atomic8_or_exit: * load_atomic8_or_exit:
* @env: cpu context * @env: cpu context
@ -228,8 +179,8 @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
{ {
Int128 *p = __builtin_assume_aligned(pv, 16); Int128 *p = __builtin_assume_aligned(pv, 16);
if (HAVE_al16_fast) { if (HAVE_ATOMIC128_RO) {
return load_atomic16(p); return atomic16_read_ro(p);
} }
#ifdef CONFIG_USER_ONLY #ifdef CONFIG_USER_ONLY
@ -249,14 +200,9 @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
* In system mode all guest pages are writable, and for user-only * In system mode all guest pages are writable, and for user-only
* we have just checked writability. Try cmpxchg. * we have just checked writability. Try cmpxchg.
*/ */
#if defined(CONFIG_CMPXCHG128) if (HAVE_ATOMIC128_RW) {
/* Swap 0 with 0, with the side-effect of returning the old value. */ return atomic16_read_rw(p);
{
Int128Alias r;
r.u = __sync_val_compare_and_swap_16((__uint128_t *)p, 0, 0);
return r.s;
} }
#endif
/* Ultimate fallback: re-execute in serial context. */ /* Ultimate fallback: re-execute in serial context. */
cpu_loop_exit_atomic(env_cpu(env), ra); cpu_loop_exit_atomic(env_cpu(env), ra);
@ -377,11 +323,10 @@ static uint64_t load_atom_extract_al16_or_exit(CPUArchState *env, uintptr_t ra,
static inline uint64_t ATTRIBUTE_ATOMIC128_OPT static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
load_atom_extract_al16_or_al8(void *pv, int s) load_atom_extract_al16_or_al8(void *pv, int s)
{ {
#if defined(CONFIG_ATOMIC128)
uintptr_t pi = (uintptr_t)pv; uintptr_t pi = (uintptr_t)pv;
int o = pi & 7; int o = pi & 7;
int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8; int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
__uint128_t r; Int128 r;
pv = (void *)(pi & ~7); pv = (void *)(pi & ~7);
if (pi & 8) { if (pi & 8) {
@ -390,18 +335,14 @@ load_atom_extract_al16_or_al8(void *pv, int s)
uint64_t b = qatomic_read__nocheck(p8 + 1); uint64_t b = qatomic_read__nocheck(p8 + 1);
if (HOST_BIG_ENDIAN) { if (HOST_BIG_ENDIAN) {
r = ((__uint128_t)a << 64) | b; r = int128_make128(b, a);
} else { } else {
r = ((__uint128_t)b << 64) | a; r = int128_make128(a, b);
} }
} else { } else {
__uint128_t *p16 = __builtin_assume_aligned(pv, 16, 0); r = atomic16_read_ro(pv);
r = qatomic_read__nocheck(p16);
} }
return r >> shr; return int128_getlo(int128_urshift(r, shr));
#else
qemu_build_not_reached();
#endif
} }
/** /**
@ -489,7 +430,7 @@ static uint16_t load_atom_2(CPUArchState *env, uintptr_t ra,
if (likely((pi & 1) == 0)) { if (likely((pi & 1) == 0)) {
return load_atomic2(pv); return load_atomic2(pv);
} }
if (HAVE_al16_fast) { if (HAVE_ATOMIC128_RO) {
return load_atom_extract_al16_or_al8(pv, 2); return load_atom_extract_al16_or_al8(pv, 2);
} }
@ -528,7 +469,7 @@ static uint32_t load_atom_4(CPUArchState *env, uintptr_t ra,
if (likely((pi & 3) == 0)) { if (likely((pi & 3) == 0)) {
return load_atomic4(pv); return load_atomic4(pv);
} }
if (HAVE_al16_fast) { if (HAVE_ATOMIC128_RO) {
return load_atom_extract_al16_or_al8(pv, 4); return load_atom_extract_al16_or_al8(pv, 4);
} }
@ -574,7 +515,7 @@ static uint64_t load_atom_8(CPUArchState *env, uintptr_t ra,
if (HAVE_al8 && likely((pi & 7) == 0)) { if (HAVE_al8 && likely((pi & 7) == 0)) {
return load_atomic8(pv); return load_atomic8(pv);
} }
if (HAVE_al16_fast) { if (HAVE_ATOMIC128_RO) {
return load_atom_extract_al16_or_al8(pv, 8); return load_atom_extract_al16_or_al8(pv, 8);
} }
@ -624,8 +565,8 @@ static Int128 load_atom_16(CPUArchState *env, uintptr_t ra,
* If the host does not support 16-byte atomics, wait until we have * If the host does not support 16-byte atomics, wait until we have
* examined the atomicity parameters below. * examined the atomicity parameters below.
*/ */
if (HAVE_al16_fast && likely((pi & 15) == 0)) { if (HAVE_ATOMIC128_RO && likely((pi & 15) == 0)) {
return load_atomic16(pv); return atomic16_read_ro(pv);
} }
atmax = required_atomicity(env, pi, memop); atmax = required_atomicity(env, pi, memop);
@ -704,36 +645,6 @@ static inline void store_atomic8(void *pv, uint64_t val)
qatomic_set__nocheck(p, val); qatomic_set__nocheck(p, val);
} }
/**
* store_atomic16:
* @pv: host address
* @val: value to store
*
* Atomically store 16 aligned bytes to @pv.
*/
static inline void ATTRIBUTE_ATOMIC128_OPT
store_atomic16(void *pv, Int128Alias val)
{
#if defined(CONFIG_ATOMIC128)
__uint128_t *pu = __builtin_assume_aligned(pv, 16);
qatomic_set__nocheck(pu, val.u);
#elif defined(CONFIG_CMPXCHG128)
__uint128_t *pu = __builtin_assume_aligned(pv, 16);
__uint128_t o;
/*
* Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
* defer to libatomic, so we must use __sync_*_compare_and_swap_16
* and accept the sequential consistency that comes with it.
*/
do {
o = *pu;
} while (!__sync_bool_compare_and_swap_16(pu, o, val.u));
#else
qemu_build_not_reached();
#endif
}
/** /**
* store_atom_4x2 * store_atom_4x2
*/ */
@ -974,7 +885,7 @@ static uint64_t store_whole_le16(void *pv, int size, Int128 val_le)
int sh = o * 8; int sh = o * 8;
Int128 m, v; Int128 m, v;
qemu_build_assert(HAVE_al16); qemu_build_assert(HAVE_ATOMIC128_RW);
/* Like MAKE_64BIT_MASK(0, sz), but larger. */ /* Like MAKE_64BIT_MASK(0, sz), but larger. */
if (sz <= 64) { if (sz <= 64) {
@ -1034,7 +945,7 @@ static void store_atom_2(CPUArchState *env, uintptr_t ra,
return; return;
} }
} else if ((pi & 15) == 7) { } else if ((pi & 15) == 7) {
if (HAVE_al16) { if (HAVE_ATOMIC128_RW) {
Int128 v = int128_lshift(int128_make64(val), 56); Int128 v = int128_lshift(int128_make64(val), 56);
Int128 m = int128_lshift(int128_make64(0xffff), 56); Int128 m = int128_lshift(int128_make64(0xffff), 56);
store_atom_insert_al16(pv - 7, v, m); store_atom_insert_al16(pv - 7, v, m);
@ -1103,7 +1014,7 @@ static void store_atom_4(CPUArchState *env, uintptr_t ra,
return; return;
} }
} else { } else {
if (HAVE_al16) { if (HAVE_ATOMIC128_RW) {
store_whole_le16(pv, 4, int128_make64(cpu_to_le32(val))); store_whole_le16(pv, 4, int128_make64(cpu_to_le32(val)));
return; return;
} }
@ -1168,7 +1079,7 @@ static void store_atom_8(CPUArchState *env, uintptr_t ra,
} }
break; break;
case MO_64: case MO_64:
if (HAVE_al16) { if (HAVE_ATOMIC128_RW) {
store_whole_le16(pv, 8, int128_make64(cpu_to_le64(val))); store_whole_le16(pv, 8, int128_make64(cpu_to_le64(val)));
return; return;
} }
@ -1194,8 +1105,8 @@ static void store_atom_16(CPUArchState *env, uintptr_t ra,
uint64_t a, b; uint64_t a, b;
int atmax; int atmax;
if (HAVE_al16_fast && likely((pi & 15) == 0)) { if (HAVE_ATOMIC128_RW && likely((pi & 15) == 0)) {
store_atomic16(pv, val); atomic16_set(pv, val);
return; return;
} }
@ -1223,7 +1134,7 @@ static void store_atom_16(CPUArchState *env, uintptr_t ra,
} }
break; break;
case -MO_64: case -MO_64:
if (HAVE_al16) { if (HAVE_ATOMIC128_RW) {
uint64_t val_le; uint64_t val_le;
int s2 = pi & 15; int s2 = pi & 15;
int s1 = 16 - s2; int s1 = 16 - s2;
@ -1250,8 +1161,8 @@ static void store_atom_16(CPUArchState *env, uintptr_t ra,
} }
break; break;
case MO_128: case MO_128:
if (HAVE_al16) { if (HAVE_ATOMIC128_RW) {
store_atomic16(pv, val); atomic16_set(pv, val);
return; return;
} }
break; break;

View File

@ -26,7 +26,7 @@ uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
int mmu_idx, uintptr_t ra) int mmu_idx, uintptr_t ra)
{ {
MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx); MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx);
return cpu_ldw_be_mmu(env, addr, oi, ra); return cpu_ldw_mmu(env, addr, oi, ra);
} }
int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
@ -39,21 +39,21 @@ uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
int mmu_idx, uintptr_t ra) int mmu_idx, uintptr_t ra)
{ {
MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx); MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx);
return cpu_ldl_be_mmu(env, addr, oi, ra); return cpu_ldl_mmu(env, addr, oi, ra);
} }
uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
int mmu_idx, uintptr_t ra) int mmu_idx, uintptr_t ra)
{ {
MemOpIdx oi = make_memop_idx(MO_BEUQ | MO_UNALN, mmu_idx); MemOpIdx oi = make_memop_idx(MO_BEUQ | MO_UNALN, mmu_idx);
return cpu_ldq_be_mmu(env, addr, oi, ra); return cpu_ldq_mmu(env, addr, oi, ra);
} }
uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
int mmu_idx, uintptr_t ra) int mmu_idx, uintptr_t ra)
{ {
MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx); MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx);
return cpu_ldw_le_mmu(env, addr, oi, ra); return cpu_ldw_mmu(env, addr, oi, ra);
} }
int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
@ -66,14 +66,14 @@ uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
int mmu_idx, uintptr_t ra) int mmu_idx, uintptr_t ra)
{ {
MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx); MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx);
return cpu_ldl_le_mmu(env, addr, oi, ra); return cpu_ldl_mmu(env, addr, oi, ra);
} }
uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
int mmu_idx, uintptr_t ra) int mmu_idx, uintptr_t ra)
{ {
MemOpIdx oi = make_memop_idx(MO_LEUQ | MO_UNALN, mmu_idx); MemOpIdx oi = make_memop_idx(MO_LEUQ | MO_UNALN, mmu_idx);
return cpu_ldq_le_mmu(env, addr, oi, ra); return cpu_ldq_mmu(env, addr, oi, ra);
} }
void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
@ -87,42 +87,42 @@ void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
int mmu_idx, uintptr_t ra) int mmu_idx, uintptr_t ra)
{ {
MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx); MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx);
cpu_stw_be_mmu(env, addr, val, oi, ra); cpu_stw_mmu(env, addr, val, oi, ra);
} }
void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
int mmu_idx, uintptr_t ra) int mmu_idx, uintptr_t ra)
{ {
MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx); MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx);
cpu_stl_be_mmu(env, addr, val, oi, ra); cpu_stl_mmu(env, addr, val, oi, ra);
} }
void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
int mmu_idx, uintptr_t ra) int mmu_idx, uintptr_t ra)
{ {
MemOpIdx oi = make_memop_idx(MO_BEUQ | MO_UNALN, mmu_idx); MemOpIdx oi = make_memop_idx(MO_BEUQ | MO_UNALN, mmu_idx);
cpu_stq_be_mmu(env, addr, val, oi, ra); cpu_stq_mmu(env, addr, val, oi, ra);
} }
void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
int mmu_idx, uintptr_t ra) int mmu_idx, uintptr_t ra)
{ {
MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx); MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx);
cpu_stw_le_mmu(env, addr, val, oi, ra); cpu_stw_mmu(env, addr, val, oi, ra);
} }
void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
int mmu_idx, uintptr_t ra) int mmu_idx, uintptr_t ra)
{ {
MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx); MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx);
cpu_stl_le_mmu(env, addr, val, oi, ra); cpu_stl_mmu(env, addr, val, oi, ra);
} }
void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
int mmu_idx, uintptr_t ra) int mmu_idx, uintptr_t ra)
{ {
MemOpIdx oi = make_memop_idx(MO_LEUQ | MO_UNALN, mmu_idx); MemOpIdx oi = make_memop_idx(MO_LEUQ | MO_UNALN, mmu_idx);
cpu_stq_le_mmu(env, addr, val, oi, ra); cpu_stq_mmu(env, addr, val, oi, ra);
} }
/*--------------------------*/ /*--------------------------*/

View File

@ -65,9 +65,7 @@ DEF_HELPER_FLAGS_5(atomic_cmpxchgo_le, TCG_CALL_NO_WG,
i128, env, i64, i128, i128, i32) i128, env, i64, i128, i128, i32)
#endif #endif
DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_be, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo, TCG_CALL_NO_WG,
i128, env, i64, i128, i128, i32)
DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_le, TCG_CALL_NO_WG,
i128, env, i64, i128, i128, i32) i128, env, i64, i128, i128, i32)
#ifdef CONFIG_ATOMIC64 #ifdef CONFIG_ATOMIC64

View File

@ -432,7 +432,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
qatomic_set(&prof->search_out_len, prof->search_out_len + search_size); qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
#endif #endif
#ifdef DEBUG_DISAS
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
qemu_log_in_addr_range(pc)) { qemu_log_in_addr_range(pc)) {
FILE *logfile = qemu_log_trylock(); FILE *logfile = qemu_log_trylock();
@ -505,7 +504,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
qemu_log_unlock(logfile); qemu_log_unlock(logfile);
} }
} }
#endif
qatomic_set(&tcg_ctx->code_gen_ptr, (void *) qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,

View File

@ -122,7 +122,6 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
tb->size = db->pc_next - db->pc_first; tb->size = db->pc_next - db->pc_first;
tb->icount = db->num_insns; tb->icount = db->num_insns;
#ifdef DEBUG_DISAS
if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
&& qemu_log_in_addr_range(db->pc_first)) { && qemu_log_in_addr_range(db->pc_first)) {
FILE *logfile = qemu_log_trylock(); FILE *logfile = qemu_log_trylock();
@ -133,7 +132,6 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
qemu_log_unlock(logfile); qemu_log_unlock(logfile);
} }
} }
#endif
} }
static void *translator_access(CPUArchState *env, DisasContextBase *db, static void *translator_access(CPUArchState *env, DisasContextBase *db,

View File

@ -940,8 +940,8 @@ uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
return ret; return ret;
} }
static uint16_t do_ld2_he_mmu(CPUArchState *env, abi_ptr addr, static uint16_t do_ld2_mmu(CPUArchState *env, abi_ptr addr,
MemOp mop, uintptr_t ra) MemOp mop, uintptr_t ra)
{ {
void *haddr; void *haddr;
uint16_t ret; uint16_t ret;
@ -950,59 +950,35 @@ static uint16_t do_ld2_he_mmu(CPUArchState *env, abi_ptr addr,
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
ret = load_atom_2(env, ra, haddr, mop); ret = load_atom_2(env, ra, haddr, mop);
clear_helper_retaddr(); clear_helper_retaddr();
if (mop & MO_BSWAP) {
ret = bswap16(ret);
}
return ret; return ret;
} }
tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr, tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t ra) MemOpIdx oi, uintptr_t ra)
{ {
MemOp mop = get_memop(oi); return do_ld2_mmu(env, addr, get_memop(oi), ra);
uint16_t ret = do_ld2_he_mmu(env, addr, mop, ra);
if (mop & MO_BSWAP) {
ret = bswap16(ret);
}
return ret;
} }
tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr, tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t ra) MemOpIdx oi, uintptr_t ra)
{ {
MemOp mop = get_memop(oi); return (int16_t)do_ld2_mmu(env, addr, get_memop(oi), ra);
int16_t ret = do_ld2_he_mmu(env, addr, mop, ra); }
if (mop & MO_BSWAP) { uint16_t cpu_ldw_mmu(CPUArchState *env, abi_ptr addr,
ret = bswap16(ret); MemOpIdx oi, uintptr_t ra)
} {
uint16_t ret = do_ld2_mmu(env, addr, get_memop(oi), ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return ret; return ret;
} }
uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr, static uint32_t do_ld4_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra) MemOp mop, uintptr_t ra)
{
MemOp mop = get_memop(oi);
uint16_t ret;
tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
ret = do_ld2_he_mmu(env, addr, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return cpu_to_be16(ret);
}
uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
uint16_t ret;
tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
ret = do_ld2_he_mmu(env, addr, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return cpu_to_le16(ret);
}
static uint32_t do_ld4_he_mmu(CPUArchState *env, abi_ptr addr,
MemOp mop, uintptr_t ra)
{ {
void *haddr; void *haddr;
uint32_t ret; uint32_t ret;
@ -1011,59 +987,35 @@ static uint32_t do_ld4_he_mmu(CPUArchState *env, abi_ptr addr,
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
ret = load_atom_4(env, ra, haddr, mop); ret = load_atom_4(env, ra, haddr, mop);
clear_helper_retaddr(); clear_helper_retaddr();
if (mop & MO_BSWAP) {
ret = bswap32(ret);
}
return ret; return ret;
} }
tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr, tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t ra) MemOpIdx oi, uintptr_t ra)
{ {
MemOp mop = get_memop(oi); return do_ld4_mmu(env, addr, get_memop(oi), ra);
uint32_t ret = do_ld4_he_mmu(env, addr, mop, ra);
if (mop & MO_BSWAP) {
ret = bswap32(ret);
}
return ret;
} }
tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr, tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t ra) MemOpIdx oi, uintptr_t ra)
{ {
MemOp mop = get_memop(oi); return (int32_t)do_ld4_mmu(env, addr, get_memop(oi), ra);
int32_t ret = do_ld4_he_mmu(env, addr, mop, ra); }
if (mop & MO_BSWAP) { uint32_t cpu_ldl_mmu(CPUArchState *env, abi_ptr addr,
ret = bswap32(ret); MemOpIdx oi, uintptr_t ra)
} {
uint32_t ret = do_ld4_mmu(env, addr, get_memop(oi), ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return ret; return ret;
} }
uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr, static uint64_t do_ld8_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra) MemOp mop, uintptr_t ra)
{
MemOp mop = get_memop(oi);
uint32_t ret;
tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
ret = do_ld4_he_mmu(env, addr, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return cpu_to_be32(ret);
}
uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
uint32_t ret;
tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
ret = do_ld4_he_mmu(env, addr, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return cpu_to_le32(ret);
}
static uint64_t do_ld8_he_mmu(CPUArchState *env, abi_ptr addr,
MemOp mop, uintptr_t ra)
{ {
void *haddr; void *haddr;
uint64_t ret; uint64_t ret;
@ -1072,14 +1024,6 @@ static uint64_t do_ld8_he_mmu(CPUArchState *env, abi_ptr addr,
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
ret = load_atom_8(env, ra, haddr, mop); ret = load_atom_8(env, ra, haddr, mop);
clear_helper_retaddr(); clear_helper_retaddr();
return ret;
}
uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
uint64_t ret = do_ld8_he_mmu(env, addr, mop, ra);
if (mop & MO_BSWAP) { if (mop & MO_BSWAP) {
ret = bswap64(ret); ret = bswap64(ret);
@ -1087,32 +1031,22 @@ uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
return ret; return ret;
} }
uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t ra) MemOpIdx oi, uintptr_t ra)
{ {
MemOp mop = get_memop(oi); return do_ld8_mmu(env, addr, get_memop(oi), ra);
uint64_t ret;
tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
ret = do_ld8_he_mmu(env, addr, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return cpu_to_be64(ret);
} }
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t cpu_ldq_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra) MemOpIdx oi, uintptr_t ra)
{ {
MemOp mop = get_memop(oi); uint64_t ret = do_ld8_mmu(env, addr, get_memop(oi), ra);
uint64_t ret;
tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
ret = do_ld8_he_mmu(env, addr, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return cpu_to_le64(ret); return ret;
} }
static Int128 do_ld16_he_mmu(CPUArchState *env, abi_ptr addr, static Int128 do_ld16_mmu(CPUArchState *env, abi_ptr addr,
MemOp mop, uintptr_t ra) MemOp mop, uintptr_t ra)
{ {
void *haddr; void *haddr;
Int128 ret; Int128 ret;
@ -1121,14 +1055,6 @@ static Int128 do_ld16_he_mmu(CPUArchState *env, abi_ptr addr,
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
ret = load_atom_16(env, ra, haddr, mop); ret = load_atom_16(env, ra, haddr, mop);
clear_helper_retaddr(); clear_helper_retaddr();
return ret;
}
Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
Int128 ret = do_ld16_he_mmu(env, addr, mop, ra);
if (mop & MO_BSWAP) { if (mop & MO_BSWAP) {
ret = bswap128(ret); ret = bswap128(ret);
@ -1136,38 +1062,22 @@ Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
return ret; return ret;
} }
Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t ra)
{
return do_ld16_mmu(env, addr, get_memop(oi), ra);
}
Int128 helper_ld_i128(CPUArchState *env, uint64_t addr, MemOpIdx oi) Int128 helper_ld_i128(CPUArchState *env, uint64_t addr, MemOpIdx oi)
{ {
return helper_ld16_mmu(env, addr, oi, GETPC()); return helper_ld16_mmu(env, addr, oi, GETPC());
} }
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra) MemOpIdx oi, uintptr_t ra)
{ {
MemOp mop = get_memop(oi); Int128 ret = do_ld16_mmu(env, addr, get_memop(oi), ra);
Int128 ret;
tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
ret = do_ld16_he_mmu(env, addr, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
if (!HOST_BIG_ENDIAN) {
ret = bswap128(ret);
}
return ret;
}
Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
Int128 ret;
tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
ret = do_ld16_he_mmu(env, addr, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
if (HOST_BIG_ENDIAN) {
ret = bswap128(ret);
}
return ret; return ret;
} }
@ -1195,13 +1105,17 @@ void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
} }
static void do_st2_he_mmu(CPUArchState *env, abi_ptr addr, uint16_t val, static void do_st2_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
MemOp mop, uintptr_t ra) MemOp mop, uintptr_t ra)
{ {
void *haddr; void *haddr;
tcg_debug_assert((mop & MO_SIZE) == MO_16); tcg_debug_assert((mop & MO_SIZE) == MO_16);
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
if (mop & MO_BSWAP) {
val = bswap16(val);
}
store_atom_2(env, ra, haddr, mop, val); store_atom_2(env, ra, haddr, mop, val);
clear_helper_retaddr(); clear_helper_retaddr();
} }
@ -1209,41 +1123,27 @@ static void do_st2_he_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val, void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
MemOpIdx oi, uintptr_t ra) MemOpIdx oi, uintptr_t ra)
{ {
MemOp mop = get_memop(oi); do_st2_mmu(env, addr, val, get_memop(oi), ra);
if (mop & MO_BSWAP) {
val = bswap16(val);
}
do_st2_he_mmu(env, addr, val, mop, ra);
} }
void cpu_stw_be_mmu(CPUArchState *env, abi_ptr addr, uint16_t val, void cpu_stw_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
MemOpIdx oi, uintptr_t ra) MemOpIdx oi, uintptr_t ra)
{ {
MemOp mop = get_memop(oi); do_st2_mmu(env, addr, val, get_memop(oi), ra);
tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
do_st2_he_mmu(env, addr, be16_to_cpu(val), mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
} }
void cpu_stw_le_mmu(CPUArchState *env, abi_ptr addr, uint16_t val, static void do_st4_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
MemOpIdx oi, uintptr_t ra) MemOp mop, uintptr_t ra)
{
MemOp mop = get_memop(oi);
tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
do_st2_he_mmu(env, addr, le16_to_cpu(val), mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
static void do_st4_he_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
MemOp mop, uintptr_t ra)
{ {
void *haddr; void *haddr;
tcg_debug_assert((mop & MO_SIZE) == MO_32); tcg_debug_assert((mop & MO_SIZE) == MO_32);
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
if (mop & MO_BSWAP) {
val = bswap32(val);
}
store_atom_4(env, ra, haddr, mop, val); store_atom_4(env, ra, haddr, mop, val);
clear_helper_retaddr(); clear_helper_retaddr();
} }
@ -1251,41 +1151,27 @@ static void do_st4_he_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val, void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
MemOpIdx oi, uintptr_t ra) MemOpIdx oi, uintptr_t ra)
{ {
MemOp mop = get_memop(oi); do_st4_mmu(env, addr, val, get_memop(oi), ra);
if (mop & MO_BSWAP) {
val = bswap32(val);
}
do_st4_he_mmu(env, addr, val, mop, ra);
} }
void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val, void cpu_stl_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
MemOpIdx oi, uintptr_t ra) MemOpIdx oi, uintptr_t ra)
{ {
MemOp mop = get_memop(oi); do_st4_mmu(env, addr, val, get_memop(oi), ra);
tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
do_st4_he_mmu(env, addr, be32_to_cpu(val), mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
} }
void cpu_stl_le_mmu(CPUArchState *env, abi_ptr addr, uint32_t val, static void do_st8_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
MemOpIdx oi, uintptr_t ra) MemOp mop, uintptr_t ra)
{
MemOp mop = get_memop(oi);
tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
do_st4_he_mmu(env, addr, le32_to_cpu(val), mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
static void do_st8_he_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
MemOp mop, uintptr_t ra)
{ {
void *haddr; void *haddr;
tcg_debug_assert((mop & MO_SIZE) == MO_64); tcg_debug_assert((mop & MO_SIZE) == MO_64);
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
if (mop & MO_BSWAP) {
val = bswap64(val);
}
store_atom_8(env, ra, haddr, mop, val); store_atom_8(env, ra, haddr, mop, val);
clear_helper_retaddr(); clear_helper_retaddr();
} }
@ -1293,41 +1179,27 @@ static void do_st8_he_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val, void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
MemOpIdx oi, uintptr_t ra) MemOpIdx oi, uintptr_t ra)
{ {
MemOp mop = get_memop(oi); do_st8_mmu(env, addr, val, get_memop(oi), ra);
if (mop & MO_BSWAP) {
val = bswap64(val);
}
do_st8_he_mmu(env, addr, val, mop, ra);
} }
void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val, void cpu_stq_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
MemOpIdx oi, uintptr_t ra) MemOpIdx oi, uintptr_t ra)
{ {
MemOp mop = get_memop(oi); do_st8_mmu(env, addr, val, get_memop(oi), ra);
tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
do_st8_he_mmu(env, addr, cpu_to_be64(val), mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
} }
void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val, static void do_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
MemOpIdx oi, uintptr_t ra) MemOp mop, uintptr_t ra)
{
MemOp mop = get_memop(oi);
tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
do_st8_he_mmu(env, addr, cpu_to_le64(val), mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
static void do_st16_he_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
MemOp mop, uintptr_t ra)
{ {
void *haddr; void *haddr;
tcg_debug_assert((mop & MO_SIZE) == MO_128); tcg_debug_assert((mop & MO_SIZE) == MO_128);
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
if (mop & MO_BSWAP) {
val = bswap128(val);
}
store_atom_16(env, ra, haddr, mop, val); store_atom_16(env, ra, haddr, mop, val);
clear_helper_retaddr(); clear_helper_retaddr();
} }
@ -1335,12 +1207,7 @@ static void do_st16_he_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val, void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
MemOpIdx oi, uintptr_t ra) MemOpIdx oi, uintptr_t ra)
{ {
MemOp mop = get_memop(oi); do_st16_mmu(env, addr, val, get_memop(oi), ra);
if (mop & MO_BSWAP) {
val = bswap128(val);
}
do_st16_he_mmu(env, addr, val, mop, ra);
} }
void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi) void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
@ -1348,29 +1215,10 @@ void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
helper_st16_mmu(env, addr, val, oi, GETPC()); helper_st16_mmu(env, addr, val, oi, GETPC());
} }
void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, void cpu_st16_mmu(CPUArchState *env, abi_ptr addr,
Int128 val, MemOpIdx oi, uintptr_t ra) Int128 val, MemOpIdx oi, uintptr_t ra)
{ {
MemOp mop = get_memop(oi); do_st16_mmu(env, addr, val, get_memop(oi), ra);
tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
if (!HOST_BIG_ENDIAN) {
val = bswap128(val);
}
do_st16_he_mmu(env, addr, val, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
Int128 val, MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
if (HOST_BIG_ENDIAN) {
val = bswap128(val);
}
do_st16_he_mmu(env, addr, val, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
} }
@ -1475,12 +1323,9 @@ uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
/* /*
* Do not allow unaligned operations to proceed. Return the host address. * Do not allow unaligned operations to proceed. Return the host address.
*
* @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE.
*/ */
static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
MemOpIdx oi, int size, int prot, MemOpIdx oi, int size, uintptr_t retaddr)
uintptr_t retaddr)
{ {
MemOp mop = get_memop(oi); MemOp mop = get_memop(oi);
int a_bits = get_alignment_bits(mop); int a_bits = get_alignment_bits(mop);
@ -1488,8 +1333,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
/* Enforce guest required alignment. */ /* Enforce guest required alignment. */
if (unlikely(addr & ((1 << a_bits) - 1))) { if (unlikely(addr & ((1 << a_bits) - 1))) {
MMUAccessType t = prot == PAGE_READ ? MMU_DATA_LOAD : MMU_DATA_STORE; cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, retaddr);
cpu_loop_exit_sigbus(env_cpu(env), addr, t, retaddr);
} }
/* Enforce qemu required alignment. */ /* Enforce qemu required alignment. */
@ -1527,7 +1371,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
#include "atomic_template.h" #include "atomic_template.h"
#endif #endif
#if HAVE_ATOMIC128 || HAVE_CMPXCHG128 #if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128)
#define DATA_SIZE 16 #define DATA_SIZE 16
#include "atomic_template.h" #include "atomic_template.h"
#endif #endif

View File

@ -0,0 +1,45 @@
/*
* SPDX-License-Identifier: GPL-2.0-or-later
* Compare-and-swap for 128-bit atomic operations, AArch64 version.
*
* Copyright (C) 2018, 2023 Linaro, Ltd.
*
* See docs/devel/atomics.rst for discussion about the guarantees each
* atomic primitive is meant to provide.
*/
#ifndef AARCH64_ATOMIC128_CAS_H
#define AARCH64_ATOMIC128_CAS_H
/* Through gcc 10, aarch64 has no support for 128-bit atomics. */
#if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128)
#include "host/include/generic/host/atomic128-cas.h"
#else
static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
{
uint64_t cmpl = int128_getlo(cmp), cmph = int128_gethi(cmp);
uint64_t newl = int128_getlo(new), newh = int128_gethi(new);
uint64_t oldl, oldh;
uint32_t tmp;
asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t"
"cmp %[oldl], %[cmpl]\n\t"
"ccmp %[oldh], %[cmph], #0, eq\n\t"
"b.ne 1f\n\t"
"stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t"
"cbnz %w[tmp], 0b\n"
"1:"
: [mem] "+m"(*ptr), [tmp] "=&r"(tmp),
[oldl] "=&r"(oldl), [oldh] "=&r"(oldh)
: [cmpl] "r"(cmpl), [cmph] "r"(cmph),
[newl] "r"(newl), [newh] "r"(newh)
: "memory", "cc");
return int128_make128(oldl, oldh);
}
# define CONFIG_CMPXCHG128 1
# define HAVE_CMPXCHG128 1
#endif
#endif /* AARCH64_ATOMIC128_CAS_H */

View File

@ -0,0 +1,79 @@
/*
* SPDX-License-Identifier: GPL-2.0-or-later
* Load/store for 128-bit atomic operations, AArch64 version.
*
* Copyright (C) 2018, 2023 Linaro, Ltd.
*
* See docs/devel/atomics.rst for discussion about the guarantees each
* atomic primitive is meant to provide.
*/
#ifndef AARCH64_ATOMIC128_LDST_H
#define AARCH64_ATOMIC128_LDST_H
#include "host/cpuinfo.h"
#include "tcg/debug-assert.h"
/*
* Through gcc 10, aarch64 has no support for 128-bit atomics.
* Through clang 16, without -march=armv8.4-a, __atomic_load_16
* is incorrectly expanded to a read-write operation.
*
* Anyway, this method allows runtime detection of FEAT_LSE2.
*/
#define HAVE_ATOMIC128_RO (cpuinfo & CPUINFO_LSE2)
#define HAVE_ATOMIC128_RW 1
static inline Int128 atomic16_read_ro(const Int128 *ptr)
{
uint64_t l, h;
tcg_debug_assert(HAVE_ATOMIC128_RO);
/* With FEAT_LSE2, 16-byte aligned LDP is atomic. */
asm("ldp %[l], %[h], %[mem]"
: [l] "=r"(l), [h] "=r"(h) : [mem] "m"(*ptr));
return int128_make128(l, h);
}
static inline Int128 atomic16_read_rw(Int128 *ptr)
{
uint64_t l, h;
uint32_t tmp;
if (cpuinfo & CPUINFO_LSE2) {
/* With FEAT_LSE2, 16-byte aligned LDP is atomic. */
asm("ldp %[l], %[h], %[mem]"
: [l] "=r"(l), [h] "=r"(h) : [mem] "m"(*ptr));
} else {
/* The load must be paired with the store to guarantee not tearing. */
asm("0: ldxp %[l], %[h], %[mem]\n\t"
"stxp %w[tmp], %[l], %[h], %[mem]\n\t"
"cbnz %w[tmp], 0b"
: [mem] "+m"(*ptr), [tmp] "=&r"(tmp), [l] "=&r"(l), [h] "=&r"(h));
}
return int128_make128(l, h);
}
static inline void atomic16_set(Int128 *ptr, Int128 val)
{
uint64_t l = int128_getlo(val), h = int128_gethi(val);
uint64_t t1, t2;
if (cpuinfo & CPUINFO_LSE2) {
/* With FEAT_LSE2, 16-byte aligned STP is atomic. */
asm("stp %[l], %[h], %[mem]"
: [mem] "=m"(*ptr) : [l] "r"(l), [h] "r"(h));
} else {
/* Load into temporaries to acquire the exclusive access lock. */
asm("0: ldxp %[t1], %[t2], %[mem]\n\t"
"stxp %w[t1], %[l], %[h], %[mem]\n\t"
"cbnz %w[t1], 0b"
: [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2)
: [l] "r"(l), [h] "r"(h));
}
}
#endif /* AARCH64_ATOMIC128_LDST_H */

View File

@ -0,0 +1,22 @@
/*
* SPDX-License-Identifier: GPL-2.0-or-later
* Host specific cpu indentification for AArch64.
*/
#ifndef HOST_CPUINFO_H
#define HOST_CPUINFO_H
#define CPUINFO_ALWAYS (1u << 0) /* so cpuinfo is nonzero */
#define CPUINFO_LSE (1u << 1)
#define CPUINFO_LSE2 (1u << 2)
/* Initialized with a constructor. */
extern unsigned cpuinfo;
/*
* We cannot rely on constructor ordering, so other constructors must
* use the function interface rather than the variable above.
*/
unsigned cpuinfo_init(void);
#endif /* HOST_CPUINFO_H */

View File

@ -0,0 +1,47 @@
/*
* SPDX-License-Identifier: GPL-2.0-or-later
* Compare-and-swap for 128-bit atomic operations, generic version.
*
* Copyright (C) 2018, 2023 Linaro, Ltd.
*
* See docs/devel/atomics.rst for discussion about the guarantees each
* atomic primitive is meant to provide.
*/
#ifndef HOST_ATOMIC128_CAS_H
#define HOST_ATOMIC128_CAS_H
#if defined(CONFIG_ATOMIC128)
static inline Int128 ATTRIBUTE_ATOMIC128_OPT
atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
{
__int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
Int128Alias r, c, n;
c.s = cmp;
n.s = new;
r.i = qatomic_cmpxchg__nocheck(ptr_align, c.i, n.i);
return r.s;
}
# define HAVE_CMPXCHG128 1
#elif defined(CONFIG_CMPXCHG128)
static inline Int128 ATTRIBUTE_ATOMIC128_OPT
atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
{
__int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
Int128Alias r, c, n;
c.s = cmp;
n.s = new;
r.i = __sync_val_compare_and_swap_16(ptr_align, c.i, n.i);
return r.s;
}
# define HAVE_CMPXCHG128 1
#else
/* Fallback definition that must be optimized away, or error. */
Int128 QEMU_ERROR("unsupported atomic")
atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new);
# define HAVE_CMPXCHG128 0
#endif
#endif /* HOST_ATOMIC128_CAS_H */

View File

@ -0,0 +1,81 @@
/*
* SPDX-License-Identifier: GPL-2.0-or-later
* Load/store for 128-bit atomic operations, generic version.
*
* Copyright (C) 2018, 2023 Linaro, Ltd.
*
* See docs/devel/atomics.rst for discussion about the guarantees each
* atomic primitive is meant to provide.
*/
#ifndef HOST_ATOMIC128_LDST_H
#define HOST_ATOMIC128_LDST_H
#if defined(CONFIG_ATOMIC128)
# define HAVE_ATOMIC128_RO 1
# define HAVE_ATOMIC128_RW 1
static inline Int128 ATTRIBUTE_ATOMIC128_OPT
atomic16_read_ro(const Int128 *ptr)
{
const __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
Int128Alias r;
r.i = qatomic_read__nocheck(ptr_align);
return r.s;
}
static inline Int128 ATTRIBUTE_ATOMIC128_OPT
atomic16_read_rw(Int128 *ptr)
{
return atomic16_read_ro(ptr);
}
static inline void ATTRIBUTE_ATOMIC128_OPT
atomic16_set(Int128 *ptr, Int128 val)
{
__int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
Int128Alias v;
v.s = val;
qatomic_set__nocheck(ptr_align, v.i);
}
#elif defined(CONFIG_CMPXCHG128)
# define HAVE_ATOMIC128_RO 0
# define HAVE_ATOMIC128_RW 1
Int128 QEMU_ERROR("unsupported atomic") atomic16_read_ro(const Int128 *ptr);
static inline Int128 ATTRIBUTE_ATOMIC128_OPT
atomic16_read_rw(Int128 *ptr)
{
/* Maybe replace 0 with 0, returning the old value. */
Int128 z = int128_make64(0);
return atomic16_cmpxchg(ptr, z, z);
}
static inline void ATTRIBUTE_ATOMIC128_OPT
atomic16_set(Int128 *ptr, Int128 val)
{
__int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
__int128_t old;
Int128Alias new;
new.s = val;
do {
old = *ptr_align;
} while (!__sync_bool_compare_and_swap_16(ptr_align, old, new.i));
}
#else
# define HAVE_ATOMIC128_RO 0
# define HAVE_ATOMIC128_RW 0
/* Fallback definitions that must be optimized away, or error. */
Int128 QEMU_ERROR("unsupported atomic") atomic16_read_ro(const Int128 *ptr);
Int128 QEMU_ERROR("unsupported atomic") atomic16_read_rw(Int128 *ptr);
void QEMU_ERROR("unsupported atomic") atomic16_set(Int128 *ptr, Int128 val);
#endif
#endif /* HOST_ATOMIC128_LDST_H */

View File

@ -0,0 +1,4 @@
/*
* No host specific cpu indentification.
* SPDX-License-Identifier: GPL-2.0-or-later
*/

View File

@ -0,0 +1,39 @@
/*
* SPDX-License-Identifier: GPL-2.0-or-later
* Host specific cpu indentification for x86.
*/
#ifndef HOST_CPUINFO_H
#define HOST_CPUINFO_H
/* Digested version of <cpuid.h> */
#define CPUINFO_ALWAYS (1u << 0) /* so cpuinfo is nonzero */
#define CPUINFO_CMOV (1u << 1)
#define CPUINFO_MOVBE (1u << 2)
#define CPUINFO_LZCNT (1u << 3)
#define CPUINFO_POPCNT (1u << 4)
#define CPUINFO_BMI1 (1u << 5)
#define CPUINFO_BMI2 (1u << 6)
#define CPUINFO_SSE2 (1u << 7)
#define CPUINFO_SSE4 (1u << 8)
#define CPUINFO_AVX1 (1u << 9)
#define CPUINFO_AVX2 (1u << 10)
#define CPUINFO_AVX512F (1u << 11)
#define CPUINFO_AVX512VL (1u << 12)
#define CPUINFO_AVX512BW (1u << 13)
#define CPUINFO_AVX512DQ (1u << 14)
#define CPUINFO_AVX512VBMI2 (1u << 15)
#define CPUINFO_ATOMIC_VMOVDQA (1u << 16)
#define CPUINFO_ATOMIC_VMOVDQU (1u << 17)
/* Initialized with a constructor. */
extern unsigned cpuinfo;
/*
* We cannot rely on constructor ordering, so other constructors must
* use the function interface rather than the variable above.
*/
unsigned cpuinfo_init(void);
#endif /* HOST_CPUINFO_H */

View File

@ -0,0 +1 @@
#include "host/include/i386/host/cpuinfo.h"

View File

@ -207,43 +207,21 @@ void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint64_t val,
int mmu_idx, uintptr_t ra); int mmu_idx, uintptr_t ra);
uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra); uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra);
uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr ptr, uint16_t cpu_ldw_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra);
MemOpIdx oi, uintptr_t ra); uint32_t cpu_ldl_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra);
uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr ptr, uint64_t cpu_ldq_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra);
MemOpIdx oi, uintptr_t ra); Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra);
uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr ptr,
MemOpIdx oi, uintptr_t ra);
uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr ptr,
MemOpIdx oi, uintptr_t ra);
uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr ptr,
MemOpIdx oi, uintptr_t ra);
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr ptr,
MemOpIdx oi, uintptr_t ra);
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra);
Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra);
void cpu_stb_mmu(CPUArchState *env, abi_ptr ptr, uint8_t val, void cpu_stb_mmu(CPUArchState *env, abi_ptr ptr, uint8_t val,
MemOpIdx oi, uintptr_t ra); MemOpIdx oi, uintptr_t ra);
void cpu_stw_be_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val, void cpu_stw_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val,
MemOpIdx oi, uintptr_t ra); MemOpIdx oi, uintptr_t ra);
void cpu_stl_be_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val, void cpu_stl_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val,
MemOpIdx oi, uintptr_t ra); MemOpIdx oi, uintptr_t ra);
void cpu_stq_be_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val, void cpu_stq_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
MemOpIdx oi, uintptr_t ra); MemOpIdx oi, uintptr_t ra);
void cpu_stw_le_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val, void cpu_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
MemOpIdx oi, uintptr_t ra); MemOpIdx oi, uintptr_t ra);
void cpu_stl_le_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val,
MemOpIdx oi, uintptr_t ra);
void cpu_stq_le_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
MemOpIdx oi, uintptr_t ra);
void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
MemOpIdx oi, uintptr_t ra);
void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
MemOpIdx oi, uintptr_t ra);
uint32_t cpu_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr, uint32_t cpu_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr,
uint32_t cmpv, uint32_t newv, uint32_t cmpv, uint32_t newv,
@ -322,15 +300,6 @@ Int128 cpu_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr,
Int128 cmpv, Int128 newv, Int128 cmpv, Int128 newv,
MemOpIdx oi, uintptr_t retaddr); MemOpIdx oi, uintptr_t retaddr);
Int128 cpu_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr);
Int128 cpu_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr);
void cpu_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
MemOpIdx oi, uintptr_t retaddr);
void cpu_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
MemOpIdx oi, uintptr_t retaddr);
#if defined(CONFIG_USER_ONLY) #if defined(CONFIG_USER_ONLY)
extern __thread uintptr_t helper_retaddr; extern __thread uintptr_t helper_retaddr;
@ -416,9 +385,6 @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
# define cpu_ldsw_mmuidx_ra cpu_ldsw_be_mmuidx_ra # define cpu_ldsw_mmuidx_ra cpu_ldsw_be_mmuidx_ra
# define cpu_ldl_mmuidx_ra cpu_ldl_be_mmuidx_ra # define cpu_ldl_mmuidx_ra cpu_ldl_be_mmuidx_ra
# define cpu_ldq_mmuidx_ra cpu_ldq_be_mmuidx_ra # define cpu_ldq_mmuidx_ra cpu_ldq_be_mmuidx_ra
# define cpu_ldw_mmu cpu_ldw_be_mmu
# define cpu_ldl_mmu cpu_ldl_be_mmu
# define cpu_ldq_mmu cpu_ldq_be_mmu
# define cpu_stw_data cpu_stw_be_data # define cpu_stw_data cpu_stw_be_data
# define cpu_stl_data cpu_stl_be_data # define cpu_stl_data cpu_stl_be_data
# define cpu_stq_data cpu_stq_be_data # define cpu_stq_data cpu_stq_be_data
@ -428,9 +394,6 @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
# define cpu_stw_mmuidx_ra cpu_stw_be_mmuidx_ra # define cpu_stw_mmuidx_ra cpu_stw_be_mmuidx_ra
# define cpu_stl_mmuidx_ra cpu_stl_be_mmuidx_ra # define cpu_stl_mmuidx_ra cpu_stl_be_mmuidx_ra
# define cpu_stq_mmuidx_ra cpu_stq_be_mmuidx_ra # define cpu_stq_mmuidx_ra cpu_stq_be_mmuidx_ra
# define cpu_stw_mmu cpu_stw_be_mmu
# define cpu_stl_mmu cpu_stl_be_mmu
# define cpu_stq_mmu cpu_stq_be_mmu
#else #else
# define cpu_lduw_data cpu_lduw_le_data # define cpu_lduw_data cpu_lduw_le_data
# define cpu_ldsw_data cpu_ldsw_le_data # define cpu_ldsw_data cpu_ldsw_le_data
@ -444,9 +407,6 @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
# define cpu_ldsw_mmuidx_ra cpu_ldsw_le_mmuidx_ra # define cpu_ldsw_mmuidx_ra cpu_ldsw_le_mmuidx_ra
# define cpu_ldl_mmuidx_ra cpu_ldl_le_mmuidx_ra # define cpu_ldl_mmuidx_ra cpu_ldl_le_mmuidx_ra
# define cpu_ldq_mmuidx_ra cpu_ldq_le_mmuidx_ra # define cpu_ldq_mmuidx_ra cpu_ldq_le_mmuidx_ra
# define cpu_ldw_mmu cpu_ldw_le_mmu
# define cpu_ldl_mmu cpu_ldl_le_mmu
# define cpu_ldq_mmu cpu_ldq_le_mmu
# define cpu_stw_data cpu_stw_le_data # define cpu_stw_data cpu_stw_le_data
# define cpu_stl_data cpu_stl_le_data # define cpu_stl_data cpu_stl_le_data
# define cpu_stq_data cpu_stq_le_data # define cpu_stq_data cpu_stq_le_data
@ -456,9 +416,6 @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
# define cpu_stw_mmuidx_ra cpu_stw_le_mmuidx_ra # define cpu_stw_mmuidx_ra cpu_stw_le_mmuidx_ra
# define cpu_stl_mmuidx_ra cpu_stl_le_mmuidx_ra # define cpu_stl_mmuidx_ra cpu_stl_le_mmuidx_ra
# define cpu_stq_mmuidx_ra cpu_stq_le_mmuidx_ra # define cpu_stq_mmuidx_ra cpu_stq_le_mmuidx_ra
# define cpu_stw_mmu cpu_stw_le_mmu
# define cpu_stl_mmu cpu_stl_le_mmu
# define cpu_stq_mmu cpu_stq_le_mmu
#endif #endif
uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr, uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,

View File

@ -27,9 +27,6 @@
#include "qemu/interval-tree.h" #include "qemu/interval-tree.h"
#include "qemu/clang-tsa.h" #include "qemu/clang-tsa.h"
/* allow to see translation results - the slowdown should be negligible, so we leave it */
#define DEBUG_DISAS
/* Page tracking code uses ram addresses in system mode, and virtual /* Page tracking code uses ram addresses in system mode, and virtual
addresses in userspace mode. Define tb_page_addr_t to be an appropriate addresses in userspace mode. Define tb_page_addr_t to be an appropriate
type. */ type. */

View File

@ -15,6 +15,23 @@
#include "qemu/int128.h" #include "qemu/int128.h"
/*
* If __alignof(unsigned __int128) < 16, GCC may refuse to inline atomics
* that are supported by the host, e.g. s390x. We can force the pointer to
* have our known alignment with __builtin_assume_aligned, however prior to
* GCC 13 that was only reliable with optimization enabled. See
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
*/
#if defined(CONFIG_ATOMIC128_OPT)
# if !defined(__OPTIMIZE__)
# define ATTRIBUTE_ATOMIC128_OPT __attribute__((optimize("O1")))
# endif
# define CONFIG_ATOMIC128
#endif
#ifndef ATTRIBUTE_ATOMIC128_OPT
# define ATTRIBUTE_ATOMIC128_OPT
#endif
/* /*
* GCC is a house divided about supporting large atomic operations. * GCC is a house divided about supporting large atomic operations.
* *
@ -41,132 +58,7 @@
* Therefore, special case each platform. * Therefore, special case each platform.
*/ */
#if defined(CONFIG_ATOMIC128) #include "host/atomic128-cas.h"
static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) #include "host/atomic128-ldst.h"
{
Int128Alias r, c, n;
c.s = cmp;
n.s = new;
r.i = qatomic_cmpxchg__nocheck((__int128_t *)ptr, c.i, n.i);
return r.s;
}
# define HAVE_CMPXCHG128 1
#elif defined(CONFIG_CMPXCHG128)
static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
{
Int128Alias r, c, n;
c.s = cmp;
n.s = new;
r.i = __sync_val_compare_and_swap_16((__int128_t *)ptr, c.i, n.i);
return r.s;
}
# define HAVE_CMPXCHG128 1
#elif defined(__aarch64__)
/* Through gcc 8, aarch64 has no support for 128-bit at all. */
static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
{
uint64_t cmpl = int128_getlo(cmp), cmph = int128_gethi(cmp);
uint64_t newl = int128_getlo(new), newh = int128_gethi(new);
uint64_t oldl, oldh;
uint32_t tmp;
asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t"
"cmp %[oldl], %[cmpl]\n\t"
"ccmp %[oldh], %[cmph], #0, eq\n\t"
"b.ne 1f\n\t"
"stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t"
"cbnz %w[tmp], 0b\n"
"1:"
: [mem] "+m"(*ptr), [tmp] "=&r"(tmp),
[oldl] "=&r"(oldl), [oldh] "=&r"(oldh)
: [cmpl] "r"(cmpl), [cmph] "r"(cmph),
[newl] "r"(newl), [newh] "r"(newh)
: "memory", "cc");
return int128_make128(oldl, oldh);
}
# define HAVE_CMPXCHG128 1
#else
/* Fallback definition that must be optimized away, or error. */
Int128 QEMU_ERROR("unsupported atomic")
atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new);
# define HAVE_CMPXCHG128 0
#endif /* Some definition for HAVE_CMPXCHG128 */
#if defined(CONFIG_ATOMIC128)
static inline Int128 atomic16_read(Int128 *ptr)
{
Int128Alias r;
r.i = qatomic_read__nocheck((__int128_t *)ptr);
return r.s;
}
static inline void atomic16_set(Int128 *ptr, Int128 val)
{
Int128Alias v;
v.s = val;
qatomic_set__nocheck((__int128_t *)ptr, v.i);
}
# define HAVE_ATOMIC128 1
#elif !defined(CONFIG_USER_ONLY) && defined(__aarch64__)
/* We can do better than cmpxchg for AArch64. */
static inline Int128 atomic16_read(Int128 *ptr)
{
uint64_t l, h;
uint32_t tmp;
/* The load must be paired with the store to guarantee not tearing. */
asm("0: ldxp %[l], %[h], %[mem]\n\t"
"stxp %w[tmp], %[l], %[h], %[mem]\n\t"
"cbnz %w[tmp], 0b"
: [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h));
return int128_make128(l, h);
}
static inline void atomic16_set(Int128 *ptr, Int128 val)
{
uint64_t l = int128_getlo(val), h = int128_gethi(val);
uint64_t t1, t2;
/* Load into temporaries to acquire the exclusive access lock. */
asm("0: ldxp %[t1], %[t2], %[mem]\n\t"
"stxp %w[t1], %[l], %[h], %[mem]\n\t"
"cbnz %w[t1], 0b"
: [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2)
: [l] "r"(l), [h] "r"(h));
}
# define HAVE_ATOMIC128 1
#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128
static inline Int128 atomic16_read(Int128 *ptr)
{
/* Maybe replace 0 with 0, returning the old value. */
Int128 z = int128_make64(0);
return atomic16_cmpxchg(ptr, z, z);
}
static inline void atomic16_set(Int128 *ptr, Int128 val)
{
Int128 old = *ptr, cmp;
do {
cmp = old;
old = atomic16_cmpxchg(ptr, cmp, val);
} while (int128_ne(old, cmp));
}
# define HAVE_ATOMIC128 1
#else
/* Fallback definitions that must be optimized away, or error. */
Int128 QEMU_ERROR("unsupported atomic") atomic16_read(Int128 *ptr);
void QEMU_ERROR("unsupported atomic") atomic16_set(Int128 *ptr, Int128 val);
# define HAVE_ATOMIC128 0
#endif /* Some definition for HAVE_ATOMIC128 */
#endif /* QEMU_ATOMIC128_H */ #endif /* QEMU_ATOMIC128_H */

View File

@ -0,0 +1,17 @@
/* SPDX-License-Identifier: MIT */
/*
* Define tcg_debug_assert
* Copyright (c) 2008 Fabrice Bellard
*/
#ifndef TCG_DEBUG_ASSERT_H
#define TCG_DEBUG_ASSERT_H
#if defined CONFIG_DEBUG_TCG || defined QEMU_STATIC_ANALYSIS
# define tcg_debug_assert(X) do { assert(X); } while (0)
#else
# define tcg_debug_assert(X) \
do { if (!(X)) { __builtin_unreachable(); } } while (0)
#endif
#endif

View File

@ -34,6 +34,7 @@
#include "tcg/tcg-mo.h" #include "tcg/tcg-mo.h"
#include "tcg-target.h" #include "tcg-target.h"
#include "tcg/tcg-cond.h" #include "tcg/tcg-cond.h"
#include "tcg/debug-assert.h"
/* XXX: make safe guess about sizes */ /* XXX: make safe guess about sizes */
#define MAX_OP_PER_INSTR 266 #define MAX_OP_PER_INSTR 266
@ -222,14 +223,6 @@ typedef uint64_t tcg_insn_unit;
/* The port better have done this. */ /* The port better have done this. */
#endif #endif
#if defined CONFIG_DEBUG_TCG || defined QEMU_STATIC_ANALYSIS
# define tcg_debug_assert(X) do { assert(X); } while (0)
#else
# define tcg_debug_assert(X) \
do { if (!(X)) { __builtin_unreachable(); } } while (0)
#endif
typedef struct TCGRelocation TCGRelocation; typedef struct TCGRelocation TCGRelocation;
struct TCGRelocation { struct TCGRelocation {
QSIMPLEQ_ENTRY(TCGRelocation) next; QSIMPLEQ_ENTRY(TCGRelocation) next;

View File

@ -512,6 +512,16 @@ add_project_arguments('-iquote', '.',
'-iquote', meson.current_source_dir() / 'include', '-iquote', meson.current_source_dir() / 'include',
language: all_languages) language: all_languages)
# If a host-specific include directory exists, list that first...
host_include = meson.current_source_dir() / 'host/include/'
if fs.is_dir(host_include / host_arch)
add_project_arguments('-iquote', host_include / host_arch,
language: all_languages)
endif
# ... followed by the generic fallback.
add_project_arguments('-iquote', host_include / 'generic',
language: all_languages)
sparse = find_program('cgcc', required: get_option('sparse')) sparse = find_program('cgcc', required: get_option('sparse'))
if sparse.found() if sparse.found()
run_target('sparse', run_target('sparse',
@ -2547,7 +2557,7 @@ if has_int128
# __alignof(unsigned __int128) for the host. # __alignof(unsigned __int128) for the host.
atomic_test_128 = ''' atomic_test_128 = '''
int main(int ac, char **av) { int main(int ac, char **av) {
unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], sizeof(16)); unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], 16);
p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED); p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED);
__atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED); __atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED);
__atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); __atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);

View File

@ -8,7 +8,6 @@ migration_files = files(
'qemu-file.c', 'qemu-file.c',
'yank_functions.c', 'yank_functions.c',
) )
softmmu_ss.add(migration_files)
softmmu_ss.add(files( softmmu_ss.add(files(
'block-dirty-bitmap.c', 'block-dirty-bitmap.c',

View File

@ -90,34 +90,6 @@
#define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200 #define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200
/* We can't use any flag that is bigger than 0x200 */ /* We can't use any flag that is bigger than 0x200 */
int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
uint8_t *, int) = xbzrle_encode_buffer;
#if defined(CONFIG_AVX512BW_OPT)
#include "qemu/cpuid.h"
static void __attribute__((constructor)) init_cpu_flag(void)
{
unsigned max = __get_cpuid_max(0, NULL);
int a, b, c, d;
if (max >= 1) {
__cpuid(1, a, b, c, d);
/* We must check that AVX is not just available, but usable. */
if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
int bv;
__asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
__cpuid_count(7, 0, a, b, c, d);
/* 0xe6:
* XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
* and ZMM16-ZMM31 state are enabled by OS)
* XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
*/
if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
}
}
}
}
#endif
XBZRLECacheStats xbzrle_counters; XBZRLECacheStats xbzrle_counters;
/* used by the search for pages to send */ /* used by the search for pages to send */
@ -660,9 +632,9 @@ static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss,
memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE); memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
/* XBZRLE encoding (if there is no overflow) */ /* XBZRLE encoding (if there is no overflow) */
encoded_len = xbzrle_encode_buffer_func(prev_cached_page, XBZRLE.current_buf, encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
TARGET_PAGE_SIZE, XBZRLE.encoded_buf, TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
TARGET_PAGE_SIZE); TARGET_PAGE_SIZE);
/* /*
* Update the cache contents, so that it corresponds to the data * Update the cache contents, so that it corresponds to the data

View File

@ -15,6 +15,152 @@
#include "qemu/host-utils.h" #include "qemu/host-utils.h"
#include "xbzrle.h" #include "xbzrle.h"
#if defined(CONFIG_AVX512BW_OPT)
#include <immintrin.h>
#include "host/cpuinfo.h"
static int __attribute__((target("avx512bw")))
xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
uint8_t *dst, int dlen)
{
uint32_t zrun_len = 0, nzrun_len = 0;
int d = 0, i = 0, num = 0;
uint8_t *nzrun_start = NULL;
/* add 1 to include residual part in main loop */
uint32_t count512s = (slen >> 6) + 1;
/* countResidual is tail of data, i.e., countResidual = slen % 64 */
uint32_t count_residual = slen & 0b111111;
bool never_same = true;
uint64_t mask_residual = 1;
mask_residual <<= count_residual;
mask_residual -= 1;
__m512i r = _mm512_set1_epi32(0);
while (count512s) {
int bytes_to_check = 64;
uint64_t mask = 0xffffffffffffffff;
if (count512s == 1) {
bytes_to_check = count_residual;
mask = mask_residual;
}
__m512i old_data = _mm512_mask_loadu_epi8(r,
mask, old_buf + i);
__m512i new_data = _mm512_mask_loadu_epi8(r,
mask, new_buf + i);
uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
count512s--;
bool is_same = (comp & 0x1);
while (bytes_to_check) {
if (d + 2 > dlen) {
return -1;
}
if (is_same) {
if (nzrun_len) {
d += uleb128_encode_small(dst + d, nzrun_len);
if (d + nzrun_len > dlen) {
return -1;
}
nzrun_start = new_buf + i - nzrun_len;
memcpy(dst + d, nzrun_start, nzrun_len);
d += nzrun_len;
nzrun_len = 0;
}
/* 64 data at a time for speed */
if (count512s && (comp == 0xffffffffffffffff)) {
i += 64;
zrun_len += 64;
break;
}
never_same = false;
num = ctz64(~comp);
num = (num < bytes_to_check) ? num : bytes_to_check;
zrun_len += num;
bytes_to_check -= num;
comp >>= num;
i += num;
if (bytes_to_check) {
/* still has different data after same data */
d += uleb128_encode_small(dst + d, zrun_len);
zrun_len = 0;
} else {
break;
}
}
if (never_same || zrun_len) {
/*
* never_same only acts if
* data begins with diff in first count512s
*/
d += uleb128_encode_small(dst + d, zrun_len);
zrun_len = 0;
never_same = false;
}
/* has diff, 64 data at a time for speed */
if ((bytes_to_check == 64) && (comp == 0x0)) {
i += 64;
nzrun_len += 64;
break;
}
num = ctz64(comp);
num = (num < bytes_to_check) ? num : bytes_to_check;
nzrun_len += num;
bytes_to_check -= num;
comp >>= num;
i += num;
if (bytes_to_check) {
/* mask like 111000 */
d += uleb128_encode_small(dst + d, nzrun_len);
/* overflow */
if (d + nzrun_len > dlen) {
return -1;
}
nzrun_start = new_buf + i - nzrun_len;
memcpy(dst + d, nzrun_start, nzrun_len);
d += nzrun_len;
nzrun_len = 0;
is_same = true;
}
}
}
if (nzrun_len != 0) {
d += uleb128_encode_small(dst + d, nzrun_len);
/* overflow */
if (d + nzrun_len > dlen) {
return -1;
}
nzrun_start = new_buf + i - nzrun_len;
memcpy(dst + d, nzrun_start, nzrun_len);
d += nzrun_len;
}
return d;
}
static int xbzrle_encode_buffer_int(uint8_t *old_buf, uint8_t *new_buf,
int slen, uint8_t *dst, int dlen);
static int (*accel_func)(uint8_t *, uint8_t *, int, uint8_t *, int);
static void __attribute__((constructor)) init_accel(void)
{
unsigned info = cpuinfo_init();
if (info & CPUINFO_AVX512BW) {
accel_func = xbzrle_encode_buffer_avx512;
} else {
accel_func = xbzrle_encode_buffer_int;
}
}
int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
uint8_t *dst, int dlen)
{
return accel_func(old_buf, new_buf, slen, dst, dlen);
}
#define xbzrle_encode_buffer xbzrle_encode_buffer_int
#endif
/* /*
page = zrun nzrun page = zrun nzrun
| zrun nzrun page | zrun nzrun page
@ -175,125 +321,3 @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
return d; return d;
} }
#if defined(CONFIG_AVX512BW_OPT)
#include <immintrin.h>
int __attribute__((target("avx512bw")))
xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
uint8_t *dst, int dlen)
{
uint32_t zrun_len = 0, nzrun_len = 0;
int d = 0, i = 0, num = 0;
uint8_t *nzrun_start = NULL;
/* add 1 to include residual part in main loop */
uint32_t count512s = (slen >> 6) + 1;
/* countResidual is tail of data, i.e., countResidual = slen % 64 */
uint32_t count_residual = slen & 0b111111;
bool never_same = true;
uint64_t mask_residual = 1;
mask_residual <<= count_residual;
mask_residual -= 1;
__m512i r = _mm512_set1_epi32(0);
while (count512s) {
int bytes_to_check = 64;
uint64_t mask = 0xffffffffffffffff;
if (count512s == 1) {
bytes_to_check = count_residual;
mask = mask_residual;
}
__m512i old_data = _mm512_mask_loadu_epi8(r,
mask, old_buf + i);
__m512i new_data = _mm512_mask_loadu_epi8(r,
mask, new_buf + i);
uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
count512s--;
bool is_same = (comp & 0x1);
while (bytes_to_check) {
if (d + 2 > dlen) {
return -1;
}
if (is_same) {
if (nzrun_len) {
d += uleb128_encode_small(dst + d, nzrun_len);
if (d + nzrun_len > dlen) {
return -1;
}
nzrun_start = new_buf + i - nzrun_len;
memcpy(dst + d, nzrun_start, nzrun_len);
d += nzrun_len;
nzrun_len = 0;
}
/* 64 data at a time for speed */
if (count512s && (comp == 0xffffffffffffffff)) {
i += 64;
zrun_len += 64;
break;
}
never_same = false;
num = ctz64(~comp);
num = (num < bytes_to_check) ? num : bytes_to_check;
zrun_len += num;
bytes_to_check -= num;
comp >>= num;
i += num;
if (bytes_to_check) {
/* still has different data after same data */
d += uleb128_encode_small(dst + d, zrun_len);
zrun_len = 0;
} else {
break;
}
}
if (never_same || zrun_len) {
/*
* never_same only acts if
* data begins with diff in first count512s
*/
d += uleb128_encode_small(dst + d, zrun_len);
zrun_len = 0;
never_same = false;
}
/* has diff, 64 data at a time for speed */
if ((bytes_to_check == 64) && (comp == 0x0)) {
i += 64;
nzrun_len += 64;
break;
}
num = ctz64(comp);
num = (num < bytes_to_check) ? num : bytes_to_check;
nzrun_len += num;
bytes_to_check -= num;
comp >>= num;
i += num;
if (bytes_to_check) {
/* mask like 111000 */
d += uleb128_encode_small(dst + d, nzrun_len);
/* overflow */
if (d + nzrun_len > dlen) {
return -1;
}
nzrun_start = new_buf + i - nzrun_len;
memcpy(dst + d, nzrun_start, nzrun_len);
d += nzrun_len;
nzrun_len = 0;
is_same = true;
}
}
}
if (nzrun_len != 0) {
d += uleb128_encode_small(dst + d, nzrun_len);
/* overflow */
if (d + nzrun_len > dlen) {
return -1;
}
nzrun_start = new_buf + i - nzrun_len;
memcpy(dst + d, nzrun_start, nzrun_len);
d += nzrun_len;
}
return d;
}
#endif

View File

@ -18,8 +18,5 @@ int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
uint8_t *dst, int dlen); uint8_t *dst, int dlen);
int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen); int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
#if defined(CONFIG_AVX512BW_OPT)
int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
uint8_t *dst, int dlen);
#endif
#endif #endif

View File

@ -1937,8 +1937,8 @@ static bool do_v7m_function_return(ARMCPU *cpu)
*/ */
mmu_idx = arm_v7m_mmu_idx_for_secstate(env, true); mmu_idx = arm_v7m_mmu_idx_for_secstate(env, true);
oi = make_memop_idx(MO_LEUL, arm_to_core_mmu_idx(mmu_idx)); oi = make_memop_idx(MO_LEUL, arm_to_core_mmu_idx(mmu_idx));
newpc = cpu_ldl_le_mmu(env, frameptr, oi, 0); newpc = cpu_ldl_mmu(env, frameptr, oi, 0);
newpsr = cpu_ldl_le_mmu(env, frameptr + 4, oi, 0); newpsr = cpu_ldl_mmu(env, frameptr + 4, oi, 0);
/* Consistency checks on new IPSR */ /* Consistency checks on new IPSR */
newpsr_exc = newpsr & XPSR_EXCP; newpsr_exc = newpsr & XPSR_EXCP;

View File

@ -1124,7 +1124,6 @@ struct CPUArchState {
/* used to speed-up TLB assist handlers */ /* used to speed-up TLB assist handlers */
target_ulong nip; /* next instruction pointer */ target_ulong nip; /* next instruction pointer */
uint64_t retxh; /* high part of 128-bit helper return */
/* when a memory exception occurs, the access type is stored here */ /* when a memory exception occurs, the access type is stored here */
int access_type; int access_type;

View File

@ -810,12 +810,3 @@ DEF_HELPER_4(DSCLIQ, void, env, fprp, fprp, i32)
DEF_HELPER_1(tbegin, void, env) DEF_HELPER_1(tbegin, void, env)
DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env) DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env)
#ifdef TARGET_PPC64
DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG,
void, env, tl, i64, i64, i32)
DEF_HELPER_FLAGS_5(stq_be_parallel, TCG_CALL_NO_WG,
void, env, tl, i64, i64, i32)
#endif

View File

@ -367,54 +367,6 @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg,
return i; return i;
} }
#ifdef TARGET_PPC64
uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr,
uint32_t opidx)
{
Int128 ret;
/* We will have raised EXCP_ATOMIC from the translator. */
assert(HAVE_ATOMIC128);
ret = cpu_atomic_ldo_le_mmu(env, addr, opidx, GETPC());
env->retxh = int128_gethi(ret);
return int128_getlo(ret);
}
uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr,
uint32_t opidx)
{
Int128 ret;
/* We will have raised EXCP_ATOMIC from the translator. */
assert(HAVE_ATOMIC128);
ret = cpu_atomic_ldo_be_mmu(env, addr, opidx, GETPC());
env->retxh = int128_gethi(ret);
return int128_getlo(ret);
}
void helper_stq_le_parallel(CPUPPCState *env, target_ulong addr,
uint64_t lo, uint64_t hi, uint32_t opidx)
{
Int128 val;
/* We will have raised EXCP_ATOMIC from the translator. */
assert(HAVE_ATOMIC128);
val = int128_make128(lo, hi);
cpu_atomic_sto_le_mmu(env, addr, val, opidx, GETPC());
}
void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr,
uint64_t lo, uint64_t hi, uint32_t opidx)
{
Int128 val;
/* We will have raised EXCP_ATOMIC from the translator. */
assert(HAVE_ATOMIC128);
val = int128_make128(lo, hi);
cpu_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
}
#endif
/*****************************************************************************/ /*****************************************************************************/
/* Altivec extension helpers */ /* Altivec extension helpers */
#if HOST_BIG_ENDIAN #if HOST_BIG_ENDIAN

View File

@ -3757,6 +3757,7 @@ static void gen_lqarx(DisasContext *ctx)
{ {
int rd = rD(ctx->opcode); int rd = rD(ctx->opcode);
TCGv EA, hi, lo; TCGv EA, hi, lo;
TCGv_i128 t16;
if (unlikely((rd & 1) || (rd == rA(ctx->opcode)) || if (unlikely((rd & 1) || (rd == rA(ctx->opcode)) ||
(rd == rB(ctx->opcode)))) { (rd == rB(ctx->opcode)))) {
@ -3772,36 +3773,9 @@ static void gen_lqarx(DisasContext *ctx)
lo = cpu_gpr[rd + 1]; lo = cpu_gpr[rd + 1];
hi = cpu_gpr[rd]; hi = cpu_gpr[rd];
if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { t16 = tcg_temp_new_i128();
if (HAVE_ATOMIC128) { tcg_gen_qemu_ld_i128(t16, EA, ctx->mem_idx, DEF_MEMOP(MO_128 | MO_ALIGN));
TCGv_i32 oi = tcg_temp_new_i32(); tcg_gen_extr_i128_i64(lo, hi, t16);
if (ctx->le_mode) {
tcg_gen_movi_i32(oi, make_memop_idx(MO_LE | MO_128 | MO_ALIGN,
ctx->mem_idx));
gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
} else {
tcg_gen_movi_i32(oi, make_memop_idx(MO_BE | MO_128 | MO_ALIGN,
ctx->mem_idx));
gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
}
tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
} else {
/* Restart with exclusive lock. */
gen_helper_exit_atomic(cpu_env);
ctx->base.is_jmp = DISAS_NORETURN;
return;
}
} else if (ctx->le_mode) {
tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEUQ | MO_ALIGN_16);
tcg_gen_mov_tl(cpu_reserve, EA);
gen_addr_add(ctx, EA, EA, 8);
tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_LEUQ);
} else {
tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_BEUQ | MO_ALIGN_16);
tcg_gen_mov_tl(cpu_reserve, EA);
gen_addr_add(ctx, EA, EA, 8);
tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_BEUQ);
}
tcg_gen_st_tl(hi, cpu_env, offsetof(CPUPPCState, reserve_val)); tcg_gen_st_tl(hi, cpu_env, offsetof(CPUPPCState, reserve_val));
tcg_gen_st_tl(lo, cpu_env, offsetof(CPUPPCState, reserve_val2)); tcg_gen_st_tl(lo, cpu_env, offsetof(CPUPPCState, reserve_val2));

View File

@ -72,7 +72,7 @@ static bool do_ldst_quad(DisasContext *ctx, arg_D *a, bool store, bool prefixed)
#if defined(TARGET_PPC64) #if defined(TARGET_PPC64)
TCGv ea; TCGv ea;
TCGv_i64 low_addr_gpr, high_addr_gpr; TCGv_i64 low_addr_gpr, high_addr_gpr;
MemOp mop; TCGv_i128 t16;
REQUIRE_INSNS_FLAGS(ctx, 64BX); REQUIRE_INSNS_FLAGS(ctx, 64BX);
@ -101,51 +101,14 @@ static bool do_ldst_quad(DisasContext *ctx, arg_D *a, bool store, bool prefixed)
low_addr_gpr = cpu_gpr[a->rt + 1]; low_addr_gpr = cpu_gpr[a->rt + 1];
high_addr_gpr = cpu_gpr[a->rt]; high_addr_gpr = cpu_gpr[a->rt];
} }
t16 = tcg_temp_new_i128();
if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { if (store) {
if (HAVE_ATOMIC128) { tcg_gen_concat_i64_i128(t16, low_addr_gpr, high_addr_gpr);
mop = DEF_MEMOP(MO_128); tcg_gen_qemu_st_i128(t16, ea, ctx->mem_idx, DEF_MEMOP(MO_128));
TCGv_i32 oi = tcg_constant_i32(make_memop_idx(mop, ctx->mem_idx));
if (store) {
if (ctx->le_mode) {
gen_helper_stq_le_parallel(cpu_env, ea, low_addr_gpr,
high_addr_gpr, oi);
} else {
gen_helper_stq_be_parallel(cpu_env, ea, high_addr_gpr,
low_addr_gpr, oi);
}
} else {
if (ctx->le_mode) {
gen_helper_lq_le_parallel(low_addr_gpr, cpu_env, ea, oi);
tcg_gen_ld_i64(high_addr_gpr, cpu_env,
offsetof(CPUPPCState, retxh));
} else {
gen_helper_lq_be_parallel(high_addr_gpr, cpu_env, ea, oi);
tcg_gen_ld_i64(low_addr_gpr, cpu_env,
offsetof(CPUPPCState, retxh));
}
}
} else {
/* Restart with exclusive lock. */
gen_helper_exit_atomic(cpu_env);
ctx->base.is_jmp = DISAS_NORETURN;
}
} else { } else {
mop = DEF_MEMOP(MO_UQ); tcg_gen_qemu_ld_i128(t16, ea, ctx->mem_idx, DEF_MEMOP(MO_128));
if (store) { tcg_gen_extr_i128_i64(low_addr_gpr, high_addr_gpr, t16);
tcg_gen_qemu_st_i64(low_addr_gpr, ea, ctx->mem_idx, mop);
} else {
tcg_gen_qemu_ld_i64(low_addr_gpr, ea, ctx->mem_idx, mop);
}
gen_addr_add(ctx, ea, ea, 8);
if (store) {
tcg_gen_qemu_st_i64(high_addr_gpr, ea, ctx->mem_idx, mop);
} else {
tcg_gen_qemu_ld_i64(high_addr_gpr, ea, ctx->mem_idx, mop);
}
} }
#else #else
qemu_build_not_reached(); qemu_build_not_reached();

View File

@ -76,9 +76,6 @@ struct CPUArchState {
float_status fpu_status; /* passed to softfloat lib */ float_status fpu_status; /* passed to softfloat lib */
/* The low part of a 128-bit return, or remainder of a divide. */
uint64_t retxl;
PSW psw; PSW psw;
S390CrashReason crash_reason; S390CrashReason crash_reason;

View File

@ -108,10 +108,6 @@ DEF_HELPER_FLAGS_2(sfas, TCG_CALL_NO_WG, void, env, i64)
DEF_HELPER_FLAGS_2(srnm, TCG_CALL_NO_WG, void, env, i64) DEF_HELPER_FLAGS_2(srnm, TCG_CALL_NO_WG, void, env, i64)
DEF_HELPER_FLAGS_1(popcnt, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_FLAGS_1(popcnt, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_2(stfle, i32, env, i64) DEF_HELPER_2(stfle, i32, env, i64)
DEF_HELPER_FLAGS_2(lpq, TCG_CALL_NO_WG, i64, env, i64)
DEF_HELPER_FLAGS_2(lpq_parallel, TCG_CALL_NO_WG, i64, env, i64)
DEF_HELPER_FLAGS_4(stpq, TCG_CALL_NO_WG, void, env, i64, i64, i64)
DEF_HELPER_FLAGS_4(stpq_parallel, TCG_CALL_NO_WG, void, env, i64, i64, i64)
DEF_HELPER_4(mvcos, i32, env, i64, i64, i64) DEF_HELPER_4(mvcos, i32, env, i64, i64, i64)
DEF_HELPER_4(cu12, i32, env, i32, i32, i32) DEF_HELPER_4(cu12, i32, env, i32, i32, i32)
DEF_HELPER_4(cu14, i32, env, i32, i32, i32) DEF_HELPER_4(cu14, i32, env, i32, i32, i32)

View File

@ -570,7 +570,7 @@
D(0xc804, LPD, SSF, ILA, 0, 0, new_P, r3_P32, lpd, 0, MO_TEUL) D(0xc804, LPD, SSF, ILA, 0, 0, new_P, r3_P32, lpd, 0, MO_TEUL)
D(0xc805, LPDG, SSF, ILA, 0, 0, new_P, r3_P64, lpd, 0, MO_TEUQ) D(0xc805, LPDG, SSF, ILA, 0, 0, new_P, r3_P64, lpd, 0, MO_TEUQ)
/* LOAD PAIR FROM QUADWORD */ /* LOAD PAIR FROM QUADWORD */
C(0xe38f, LPQ, RXY_a, Z, 0, a2, r1_P, 0, lpq, 0) C(0xe38f, LPQ, RXY_a, Z, 0, a2, 0, r1_D64, lpq, 0)
/* LOAD POSITIVE */ /* LOAD POSITIVE */
C(0x1000, LPR, RR_a, Z, 0, r2_32s, new, r1_32, abs, abs32) C(0x1000, LPR, RR_a, Z, 0, r2_32s, new, r1_32, abs, abs32)
C(0xb900, LPGR, RRE, Z, 0, r2, r1, 0, abs, abs64) C(0xb900, LPGR, RRE, Z, 0, r2, r1, 0, abs, abs64)

View File

@ -1737,6 +1737,11 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
uint64_t a2, bool parallel) uint64_t a2, bool parallel)
{ {
uint32_t mem_idx = cpu_mmu_index(env, false); uint32_t mem_idx = cpu_mmu_index(env, false);
MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, mem_idx);
MemOpIdx oi8 = make_memop_idx(MO_TE | MO_64, mem_idx);
MemOpIdx oi4 = make_memop_idx(MO_TE | MO_32, mem_idx);
MemOpIdx oi2 = make_memop_idx(MO_TE | MO_16, mem_idx);
MemOpIdx oi1 = make_memop_idx(MO_8, mem_idx);
uintptr_t ra = GETPC(); uintptr_t ra = GETPC();
uint32_t fc = extract32(env->regs[0], 0, 8); uint32_t fc = extract32(env->regs[0], 0, 8);
uint32_t sc = extract32(env->regs[0], 8, 8); uint32_t sc = extract32(env->regs[0], 8, 8);
@ -1775,34 +1780,30 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
max = 3; max = 3;
#endif #endif
if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) || if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
(HAVE_ATOMIC128 ? 0 : sc > max)) { (HAVE_ATOMIC128_RW ? 0 : sc > max)) {
cpu_loop_exit_atomic(env_cpu(env), ra); cpu_loop_exit_atomic(env_cpu(env), ra);
} }
} }
/* All loads happen before all stores. For simplicity, load the entire /*
store value area from the parameter list. */ * All loads happen before all stores. For simplicity, load the entire
svh = cpu_ldq_data_ra(env, pl + 16, ra); * store value area from the parameter list.
svl = cpu_ldq_data_ra(env, pl + 24, ra); */
svh = cpu_ldq_mmu(env, pl + 16, oi8, ra);
svl = cpu_ldq_mmu(env, pl + 24, oi8, ra);
switch (fc) { switch (fc) {
case 0: case 0:
{ {
uint32_t nv = cpu_ldl_data_ra(env, pl, ra); uint32_t nv = cpu_ldl_mmu(env, pl, oi4, ra);
uint32_t cv = env->regs[r3]; uint32_t cv = env->regs[r3];
uint32_t ov; uint32_t ov;
if (parallel) { if (parallel) {
#ifdef CONFIG_USER_ONLY ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi4, ra);
uint32_t *haddr = g2h(env_cpu(env), a1);
ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
#else
MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
#endif
} else { } else {
ov = cpu_ldl_data_ra(env, a1, ra); ov = cpu_ldl_mmu(env, a1, oi4, ra);
cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra); cpu_stl_mmu(env, a1, (ov == cv ? nv : ov), oi4, ra);
} }
cc = (ov != cv); cc = (ov != cv);
env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov); env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
@ -1811,21 +1812,20 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
case 1: case 1:
{ {
uint64_t nv = cpu_ldq_data_ra(env, pl, ra); uint64_t nv = cpu_ldq_mmu(env, pl, oi8, ra);
uint64_t cv = env->regs[r3]; uint64_t cv = env->regs[r3];
uint64_t ov; uint64_t ov;
if (parallel) { if (parallel) {
#ifdef CONFIG_ATOMIC64 #ifdef CONFIG_ATOMIC64
MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN, mem_idx); ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi8, ra);
ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
#else #else
/* Note that we asserted !parallel above. */ /* Note that we asserted !parallel above. */
g_assert_not_reached(); g_assert_not_reached();
#endif #endif
} else { } else {
ov = cpu_ldq_data_ra(env, a1, ra); ov = cpu_ldq_mmu(env, a1, oi8, ra);
cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra); cpu_stq_mmu(env, a1, (ov == cv ? nv : ov), oi8, ra);
} }
cc = (ov != cv); cc = (ov != cv);
env->regs[r3] = ov; env->regs[r3] = ov;
@ -1834,27 +1834,19 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
case 2: case 2:
{ {
uint64_t nvh = cpu_ldq_data_ra(env, pl, ra); Int128 nv = cpu_ld16_mmu(env, pl, oi16, ra);
uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
Int128 nv = int128_make128(nvl, nvh);
Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]); Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
Int128 ov; Int128 ov;
if (!parallel) { if (!parallel) {
uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra); ov = cpu_ld16_mmu(env, a1, oi16, ra);
uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
ov = int128_make128(ol, oh);
cc = !int128_eq(ov, cv); cc = !int128_eq(ov, cv);
if (cc) { if (cc) {
nv = ov; nv = ov;
} }
cpu_st16_mmu(env, a1, nv, oi16, ra);
cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
} else if (HAVE_CMPXCHG128) { } else if (HAVE_CMPXCHG128) {
MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx); ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi16, ra);
ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
cc = !int128_eq(ov, cv); cc = !int128_eq(ov, cv);
} else { } else {
/* Note that we asserted !parallel above. */ /* Note that we asserted !parallel above. */
@ -1876,29 +1868,19 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
if (cc == 0) { if (cc == 0) {
switch (sc) { switch (sc) {
case 0: case 0:
cpu_stb_data_ra(env, a2, svh >> 56, ra); cpu_stb_mmu(env, a2, svh >> 56, oi1, ra);
break; break;
case 1: case 1:
cpu_stw_data_ra(env, a2, svh >> 48, ra); cpu_stw_mmu(env, a2, svh >> 48, oi2, ra);
break; break;
case 2: case 2:
cpu_stl_data_ra(env, a2, svh >> 32, ra); cpu_stl_mmu(env, a2, svh >> 32, oi4, ra);
break; break;
case 3: case 3:
cpu_stq_data_ra(env, a2, svh, ra); cpu_stq_mmu(env, a2, svh, oi8, ra);
break; break;
case 4: case 4:
if (!parallel) { cpu_st16_mmu(env, a2, int128_make128(svl, svh), oi16, ra);
cpu_stq_data_ra(env, a2 + 0, svh, ra);
cpu_stq_data_ra(env, a2 + 8, svl, ra);
} else if (HAVE_ATOMIC128) {
MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
Int128 sv = int128_make128(svl, svh);
cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
} else {
/* Note that we asserted !parallel above. */
g_assert_not_reached();
}
break; break;
default: default:
g_assert_not_reached(); g_assert_not_reached();
@ -2398,67 +2380,6 @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
} }
#endif #endif
/* load pair from quadword */
uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
{
uintptr_t ra = GETPC();
uint64_t hi, lo;
check_alignment(env, addr, 16, ra);
hi = cpu_ldq_data_ra(env, addr + 0, ra);
lo = cpu_ldq_data_ra(env, addr + 8, ra);
env->retxl = lo;
return hi;
}
uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
{
uintptr_t ra = GETPC();
uint64_t hi, lo;
int mem_idx;
MemOpIdx oi;
Int128 v;
assert(HAVE_ATOMIC128);
mem_idx = cpu_mmu_index(env, false);
oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
hi = int128_gethi(v);
lo = int128_getlo(v);
env->retxl = lo;
return hi;
}
/* store pair to quadword */
void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
uint64_t low, uint64_t high)
{
uintptr_t ra = GETPC();
check_alignment(env, addr, 16, ra);
cpu_stq_data_ra(env, addr + 0, high, ra);
cpu_stq_data_ra(env, addr + 8, low, ra);
}
void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
uint64_t low, uint64_t high)
{
uintptr_t ra = GETPC();
int mem_idx;
MemOpIdx oi;
Int128 v;
assert(HAVE_ATOMIC128);
mem_idx = cpu_mmu_index(env, false);
oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
v = int128_make128(low, high);
cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
}
/* Execute instruction. This instruction executes an insn modified with /* Execute instruction. This instruction executes an insn modified with
the contents of r1. It does not change the executed instruction in memory; the contents of r1. It does not change the executed instruction in memory;
it does not change the program counter. it does not change the program counter.

View File

@ -335,11 +335,6 @@ static void store_freg32_i64(int reg, TCGv_i64 v)
tcg_gen_st32_i64(v, cpu_env, freg32_offset(reg)); tcg_gen_st32_i64(v, cpu_env, freg32_offset(reg));
} }
static void return_low128(TCGv_i64 dest)
{
tcg_gen_ld_i64(dest, cpu_env, offsetof(CPUS390XState, retxl));
}
static void update_psw_addr(DisasContext *s) static void update_psw_addr(DisasContext *s)
{ {
/* psw.addr */ /* psw.addr */
@ -3130,15 +3125,9 @@ static DisasJumpType op_lpd(DisasContext *s, DisasOps *o)
static DisasJumpType op_lpq(DisasContext *s, DisasOps *o) static DisasJumpType op_lpq(DisasContext *s, DisasOps *o)
{ {
if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { o->out_128 = tcg_temp_new_i128();
gen_helper_lpq(o->out, cpu_env, o->in2); tcg_gen_qemu_ld_i128(o->out_128, o->in2, get_mem_index(s),
} else if (HAVE_ATOMIC128) { MO_TE | MO_128 | MO_ALIGN);
gen_helper_lpq_parallel(o->out, cpu_env, o->in2);
} else {
gen_helper_exit_atomic(cpu_env);
return DISAS_NORETURN;
}
return_low128(o->out2);
return DISAS_NEXT; return DISAS_NEXT;
} }
@ -4533,14 +4522,11 @@ static DisasJumpType op_stmh(DisasContext *s, DisasOps *o)
static DisasJumpType op_stpq(DisasContext *s, DisasOps *o) static DisasJumpType op_stpq(DisasContext *s, DisasOps *o)
{ {
if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { TCGv_i128 t16 = tcg_temp_new_i128();
gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
} else if (HAVE_ATOMIC128) { tcg_gen_concat_i64_i128(t16, o->out2, o->out);
gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out); tcg_gen_qemu_st_i128(t16, o->in2, get_mem_index(s),
} else { MO_TE | MO_128 | MO_ALIGN);
gen_helper_exit_atomic(cpu_env);
return DISAS_NORETURN;
}
return DISAS_NEXT; return DISAS_NEXT;
} }

View File

@ -17,8 +17,6 @@
* License along with this library; if not, see <http://www.gnu.org/licenses/>. * License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/ */
#define DEBUG_DISAS
#include "qemu/osdep.h" #include "qemu/osdep.h"
#include "cpu.h" #include "cpu.h"
#include "disas/disas.h" #include "disas/disas.h"

View File

@ -1334,25 +1334,13 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
ret = cpu_ldb_mmu(env, addr, oi, GETPC()); ret = cpu_ldb_mmu(env, addr, oi, GETPC());
break; break;
case 2: case 2:
if (asi & 8) { ret = cpu_ldw_mmu(env, addr, oi, GETPC());
ret = cpu_ldw_le_mmu(env, addr, oi, GETPC());
} else {
ret = cpu_ldw_be_mmu(env, addr, oi, GETPC());
}
break; break;
case 4: case 4:
if (asi & 8) { ret = cpu_ldl_mmu(env, addr, oi, GETPC());
ret = cpu_ldl_le_mmu(env, addr, oi, GETPC());
} else {
ret = cpu_ldl_be_mmu(env, addr, oi, GETPC());
}
break; break;
case 8: case 8:
if (asi & 8) { ret = cpu_ldq_mmu(env, addr, oi, GETPC());
ret = cpu_ldq_le_mmu(env, addr, oi, GETPC());
} else {
ret = cpu_ldq_be_mmu(env, addr, oi, GETPC());
}
break; break;
default: default:
g_assert_not_reached(); g_assert_not_reached();

View File

@ -34,8 +34,6 @@
#include "asi.h" #include "asi.h"
#define DEBUG_DISAS
#define DYNAMIC_PC 1 /* dynamic pc value */ #define DYNAMIC_PC 1 /* dynamic pc value */
#define JUMP_PC 2 /* dynamic pc value which takes only two values #define JUMP_PC 2 /* dynamic pc value which takes only two values
according to jump_pc[T2] */ according to jump_pc[T2] */

View File

@ -13,12 +13,6 @@
#include "../tcg-ldst.c.inc" #include "../tcg-ldst.c.inc"
#include "../tcg-pool.c.inc" #include "../tcg-pool.c.inc"
#include "qemu/bitops.h" #include "qemu/bitops.h"
#ifdef __linux__
#include <asm/hwcap.h>
#endif
#ifdef CONFIG_DARWIN
#include <sys/sysctl.h>
#endif
/* We're going to re-use TCGType in setting of the SF bit, which controls /* We're going to re-use TCGType in setting of the SF bit, which controls
the size of the operation performed. If we know the values match, it the size of the operation performed. If we know the values match, it
@ -77,9 +71,6 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
return TCG_REG_X0 + slot; return TCG_REG_X0 + slot;
} }
bool have_lse;
bool have_lse2;
#define TCG_REG_TMP TCG_REG_X30 #define TCG_REG_TMP TCG_REG_X30
#define TCG_VEC_TMP TCG_REG_V31 #define TCG_VEC_TMP TCG_REG_V31
@ -2878,39 +2869,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
} }
} }
#ifdef CONFIG_DARWIN
static bool sysctl_for_bool(const char *name)
{
int val = 0;
size_t len = sizeof(val);
if (sysctlbyname(name, &val, &len, NULL, 0) == 0) {
return val != 0;
}
/*
* We might in the future ask for properties not present in older kernels,
* but we're only asking about static properties, all of which should be
* 'int'. So we shouln't see ENOMEM (val too small), or any of the other
* more exotic errors.
*/
assert(errno == ENOENT);
return false;
}
#endif
static void tcg_target_init(TCGContext *s) static void tcg_target_init(TCGContext *s)
{ {
#ifdef __linux__
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
have_lse = hwcap & HWCAP_ATOMICS;
have_lse2 = hwcap & HWCAP_USCAT;
#endif
#ifdef CONFIG_DARWIN
have_lse = sysctl_for_bool("hw.optional.arm.FEAT_LSE");
have_lse2 = sysctl_for_bool("hw.optional.arm.FEAT_LSE2");
#endif
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;

View File

@ -13,6 +13,8 @@
#ifndef AARCH64_TCG_TARGET_H #ifndef AARCH64_TCG_TARGET_H
#define AARCH64_TCG_TARGET_H #define AARCH64_TCG_TARGET_H
#include "host/cpuinfo.h"
#define TCG_TARGET_INSN_UNIT_SIZE 4 #define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) #define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
@ -57,8 +59,8 @@ typedef enum {
#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN #define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL #define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
extern bool have_lse; #define have_lse (cpuinfo & CPUINFO_LSE)
extern bool have_lse2; #define have_lse2 (cpuinfo & CPUINFO_LSE2)
/* optional instructions */ /* optional instructions */
#define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_div_i32 1

View File

@ -158,42 +158,14 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
# define SOFTMMU_RESERVE_REGS 0 # define SOFTMMU_RESERVE_REGS 0
#endif #endif
/* The host compiler should supply <cpuid.h> to enable runtime features
detection, as we're not going to go so far as our own inline assembly.
If not available, default values will be assumed. */
#if defined(CONFIG_CPUID_H)
#include "qemu/cpuid.h"
#endif
/* For 64-bit, we always know that CMOV is available. */ /* For 64-bit, we always know that CMOV is available. */
#if TCG_TARGET_REG_BITS == 64 #if TCG_TARGET_REG_BITS == 64
# define have_cmov 1 # define have_cmov true
#elif defined(CONFIG_CPUID_H)
static bool have_cmov;
#else #else
# define have_cmov 0 # define have_cmov (cpuinfo & CPUINFO_CMOV)
#endif
/* We need these symbols in tcg-target.h, and we can't properly conditionalize
it there. Therefore we always define the variable. */
bool have_bmi1;
bool have_popcnt;
bool have_avx1;
bool have_avx2;
bool have_avx512bw;
bool have_avx512dq;
bool have_avx512vbmi2;
bool have_avx512vl;
bool have_movbe;
bool have_atomic16;
#ifdef CONFIG_CPUID_H
static bool have_bmi2;
static bool have_lzcnt;
#else
# define have_bmi2 0
# define have_lzcnt 0
#endif #endif
#define have_bmi2 (cpuinfo & CPUINFO_BMI2)
#define have_lzcnt (cpuinfo & CPUINFO_LZCNT)
static const tcg_insn_unit *tb_ret_addr; static const tcg_insn_unit *tb_ret_addr;
@ -3961,93 +3933,6 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
static void tcg_target_init(TCGContext *s) static void tcg_target_init(TCGContext *s)
{ {
#ifdef CONFIG_CPUID_H
unsigned a, b, c, d, b7 = 0, c7 = 0;
unsigned max = __get_cpuid_max(0, 0);
if (max >= 7) {
/* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
__cpuid_count(7, 0, a, b7, c7, d);
have_bmi1 = (b7 & bit_BMI) != 0;
have_bmi2 = (b7 & bit_BMI2) != 0;
}
if (max >= 1) {
__cpuid(1, a, b, c, d);
#ifndef have_cmov
/* For 32-bit, 99% certainty that we're running on hardware that
supports cmov, but we still need to check. In case cmov is not
available, we'll use a small forward branch. */
have_cmov = (d & bit_CMOV) != 0;
#endif
/* MOVBE is only available on Intel Atom and Haswell CPUs, so we
need to probe for it. */
have_movbe = (c & bit_MOVBE) != 0;
have_popcnt = (c & bit_POPCNT) != 0;
/* There are a number of things we must check before we can be
sure of not hitting invalid opcode. */
if (c & bit_OSXSAVE) {
unsigned bv = xgetbv_low(0);
if ((bv & 6) == 6) {
have_avx1 = (c & bit_AVX) != 0;
have_avx2 = (b7 & bit_AVX2) != 0;
/*
* There are interesting instructions in AVX512, so long
* as we have AVX512VL, which indicates support for EVEX
* on sizes smaller than 512 bits. We are required to
* check that OPMASK and all extended ZMM state are enabled
* even if we're not using them -- the insns will fault.
*/
if ((bv & 0xe0) == 0xe0
&& (b7 & bit_AVX512F)
&& (b7 & bit_AVX512VL)) {
have_avx512vl = true;
have_avx512bw = (b7 & bit_AVX512BW) != 0;
have_avx512dq = (b7 & bit_AVX512DQ) != 0;
have_avx512vbmi2 = (c7 & bit_AVX512VBMI2) != 0;
}
/*
* The Intel SDM has added:
* Processors that enumerate support for Intel® AVX
* (by setting the feature flag CPUID.01H:ECX.AVX[bit 28])
* guarantee that the 16-byte memory operations performed
* by the following instructions will always be carried
* out atomically:
* - MOVAPD, MOVAPS, and MOVDQA.
* - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
* - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
* with EVEX.128 and k0 (masking disabled).
* Note that these instructions require the linear addresses
* of their memory operands to be 16-byte aligned.
*
* AMD has provided an even stronger guarantee that processors
* with AVX provide 16-byte atomicity for all cachable,
* naturally aligned single loads and stores, e.g. MOVDQU.
*
* See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
*/
if (have_avx1) {
__cpuid(0, a, b, c, d);
have_atomic16 = (c == signature_INTEL_ecx ||
c == signature_AMD_ecx);
}
}
}
}
max = __get_cpuid_max(0x8000000, 0);
if (max >= 1) {
__cpuid(0x80000001, a, b, c, d);
/* LZCNT was introduced with AMD Barcelona and Intel Haswell CPUs. */
have_lzcnt = (c & bit_LZCNT) != 0;
}
#endif /* CONFIG_CPUID_H */
tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS; tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
if (TCG_TARGET_REG_BITS == 64) { if (TCG_TARGET_REG_BITS == 64) {
tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS; tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;

View File

@ -25,6 +25,8 @@
#ifndef I386_TCG_TARGET_H #ifndef I386_TCG_TARGET_H
#define I386_TCG_TARGET_H #define I386_TCG_TARGET_H
#include "host/cpuinfo.h"
#define TCG_TARGET_INSN_UNIT_SIZE 1 #define TCG_TARGET_INSN_UNIT_SIZE 1
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 31 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 31
@ -111,16 +113,22 @@ typedef enum {
# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF # define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
#endif #endif
extern bool have_bmi1; #define have_bmi1 (cpuinfo & CPUINFO_BMI1)
extern bool have_popcnt; #define have_popcnt (cpuinfo & CPUINFO_POPCNT)
extern bool have_avx1; #define have_avx1 (cpuinfo & CPUINFO_AVX1)
extern bool have_avx2; #define have_avx2 (cpuinfo & CPUINFO_AVX2)
extern bool have_avx512bw; #define have_movbe (cpuinfo & CPUINFO_MOVBE)
extern bool have_avx512dq; #define have_atomic16 (cpuinfo & CPUINFO_ATOMIC_VMOVDQA)
extern bool have_avx512vbmi2;
extern bool have_avx512vl; /*
extern bool have_movbe; * There are interesting instructions in AVX512, so long as we have AVX512VL,
extern bool have_atomic16; * which indicates support for EVEX on sizes smaller than 512 bits.
*/
#define have_avx512vl ((cpuinfo & CPUINFO_AVX512VL) && \
(cpuinfo & CPUINFO_AVX512F))
#define have_avx512bw ((cpuinfo & CPUINFO_AVX512BW) && have_avx512vl)
#define have_avx512dq ((cpuinfo & CPUINFO_AVX512DQ) && have_avx512vl)
#define have_avx512vbmi2 ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl)
/* optional instructions */ /* optional instructions */
#define TCG_TARGET_HAS_div2_i32 1 #define TCG_TARGET_HAS_div2_i32 1

View File

@ -975,13 +975,11 @@ static void tcg_gen_nonatomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
{ {
if (TCG_TARGET_REG_BITS == 32) { if (TCG_TARGET_REG_BITS == 32) {
/* Inline expansion below is simply too large for 32-bit hosts. */ /* Inline expansion below is simply too large for 32-bit hosts. */
gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
? gen_helper_nonatomic_cmpxchgo_le
: gen_helper_nonatomic_cmpxchgo_be);
MemOpIdx oi = make_memop_idx(memop, idx); MemOpIdx oi = make_memop_idx(memop, idx);
TCGv_i64 a64 = maybe_extend_addr64(addr); TCGv_i64 a64 = maybe_extend_addr64(addr);
gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi)); gen_helper_nonatomic_cmpxchgo(retv, cpu_env, a64, cmpv, newv,
tcg_constant_i32(oi));
maybe_free_addr64(a64); maybe_free_addr64(a64);
} else { } else {
TCGv_i128 oldv = tcg_temp_ebb_new_i128(); TCGv_i128 oldv = tcg_temp_ebb_new_i128();

View File

@ -22,9 +22,6 @@
* THE SOFTWARE. * THE SOFTWARE.
*/ */
/* define it to use liveness analysis (better code) */
#define USE_TCG_OPTIMIZATIONS
#include "qemu/osdep.h" #include "qemu/osdep.h"
/* Define to jump the ELF file used to communicate with GDB. */ /* Define to jump the ELF file used to communicate with GDB. */
@ -1451,7 +1448,6 @@ void tcg_prologue_init(TCGContext *s)
(uintptr_t)s->code_buf, prologue_size); (uintptr_t)s->code_buf, prologue_size);
#endif #endif
#ifdef DEBUG_DISAS
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
FILE *logfile = qemu_log_trylock(); FILE *logfile = qemu_log_trylock();
if (logfile) { if (logfile) {
@ -1483,7 +1479,6 @@ void tcg_prologue_init(TCGContext *s)
qemu_log_unlock(logfile); qemu_log_unlock(logfile);
} }
} }
#endif
#ifndef CONFIG_TCG_INTERPRETER #ifndef CONFIG_TCG_INTERPRETER
/* /*
@ -5998,7 +5993,6 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
} }
#endif #endif
#ifdef DEBUG_DISAS
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
&& qemu_log_in_addr_range(pc_start))) { && qemu_log_in_addr_range(pc_start))) {
FILE *logfile = qemu_log_trylock(); FILE *logfile = qemu_log_trylock();
@ -6009,7 +6003,6 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
qemu_log_unlock(logfile); qemu_log_unlock(logfile);
} }
} }
#endif
#ifdef CONFIG_DEBUG_TCG #ifdef CONFIG_DEBUG_TCG
/* Ensure all labels referenced have been emitted. */ /* Ensure all labels referenced have been emitted. */
@ -6032,9 +6025,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
#endif #endif
#ifdef USE_TCG_OPTIMIZATIONS
tcg_optimize(s); tcg_optimize(s);
#endif
#ifdef CONFIG_PROFILER #ifdef CONFIG_PROFILER
qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
@ -6046,7 +6037,6 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
liveness_pass_1(s); liveness_pass_1(s);
if (s->nb_indirects > 0) { if (s->nb_indirects > 0) {
#ifdef DEBUG_DISAS
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
&& qemu_log_in_addr_range(pc_start))) { && qemu_log_in_addr_range(pc_start))) {
FILE *logfile = qemu_log_trylock(); FILE *logfile = qemu_log_trylock();
@ -6057,7 +6047,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
qemu_log_unlock(logfile); qemu_log_unlock(logfile);
} }
} }
#endif
/* Replace indirect temps with direct temps. */ /* Replace indirect temps with direct temps. */
if (liveness_pass_2(s)) { if (liveness_pass_2(s)) {
/* If changes were made, re-run liveness. */ /* If changes were made, re-run liveness. */
@ -6069,7 +6059,6 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
#endif #endif
#ifdef DEBUG_DISAS
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
&& qemu_log_in_addr_range(pc_start))) { && qemu_log_in_addr_range(pc_start))) {
FILE *logfile = qemu_log_trylock(); FILE *logfile = qemu_log_trylock();
@ -6080,7 +6069,6 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
qemu_log_unlock(logfile); qemu_log_unlock(logfile);
} }
} }
#endif
/* Initialize goto_tb jump offsets. */ /* Initialize goto_tb jump offsets. */
tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;

View File

@ -3,12 +3,6 @@ qht_bench = executable('qht-bench',
sources: 'qht-bench.c', sources: 'qht-bench.c',
dependencies: [qemuutil]) dependencies: [qemuutil])
if have_system
xbzrle_bench = executable('xbzrle-bench',
sources: 'xbzrle-bench.c',
dependencies: [qemuutil,migration])
endif
qtree_bench = executable('qtree-bench', qtree_bench = executable('qtree-bench',
sources: 'qtree-bench.c', sources: 'qtree-bench.c',
dependencies: [qemuutil]) dependencies: [qemuutil])

View File

@ -1,469 +0,0 @@
/*
* Xor Based Zero Run Length Encoding unit tests.
*
* Copyright 2013 Red Hat, Inc. and/or its affiliates
*
* Authors:
* Orit Wasserman <owasserm@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
*/
#include "qemu/osdep.h"
#include "qemu/cutils.h"
#include "../migration/xbzrle.h"
#if defined(CONFIG_AVX512BW_OPT)
#define XBZRLE_PAGE_SIZE 4096
static bool is_cpu_support_avx512bw;
#include "qemu/cpuid.h"
static void __attribute__((constructor)) init_cpu_flag(void)
{
unsigned max = __get_cpuid_max(0, NULL);
int a, b, c, d;
is_cpu_support_avx512bw = false;
if (max >= 1) {
__cpuid(1, a, b, c, d);
/* We must check that AVX is not just available, but usable. */
if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
int bv;
__asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
__cpuid_count(7, 0, a, b, c, d);
/* 0xe6:
* XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
* and ZMM16-ZMM31 state are enabled by OS)
* XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
*/
if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
is_cpu_support_avx512bw = true;
}
}
}
return ;
}
struct ResTime {
float t_raw;
float t_512;
};
/* Function prototypes
int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
uint8_t *dst, int dlen);
*/
static void encode_decode_zero(struct ResTime *res)
{
uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
int i = 0;
int dlen = 0, dlen512 = 0;
int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
for (i = diff_len; i > 0; i--) {
buffer[1000 + i] = i;
buffer512[1000 + i] = i;
}
buffer[1000 + diff_len + 3] = 103;
buffer[1000 + diff_len + 5] = 105;
buffer512[1000 + diff_len + 3] = 103;
buffer512[1000 + diff_len + 5] = 105;
/* encode zero page */
time_t t_start, t_end, t_start512, t_end512;
t_start = clock();
dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
XBZRLE_PAGE_SIZE);
t_end = clock();
float time_val = difftime(t_end, t_start);
g_assert(dlen == 0);
t_start512 = clock();
dlen512 = xbzrle_encode_buffer_avx512(buffer512, buffer512, XBZRLE_PAGE_SIZE,
compressed512, XBZRLE_PAGE_SIZE);
t_end512 = clock();
float time_val512 = difftime(t_end512, t_start512);
g_assert(dlen512 == 0);
res->t_raw = time_val;
res->t_512 = time_val512;
g_free(buffer);
g_free(compressed);
g_free(buffer512);
g_free(compressed512);
}
static void test_encode_decode_zero_avx512(void)
{
int i;
float time_raw = 0.0, time_512 = 0.0;
struct ResTime res;
for (i = 0; i < 10000; i++) {
encode_decode_zero(&res);
time_raw += res.t_raw;
time_512 += res.t_512;
}
printf("Zero test:\n");
printf("Raw xbzrle_encode time is %f ms\n", time_raw);
printf("512 xbzrle_encode time is %f ms\n", time_512);
}
static void encode_decode_unchanged(struct ResTime *res)
{
uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
int i = 0;
int dlen = 0, dlen512 = 0;
int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
for (i = diff_len; i > 0; i--) {
test[1000 + i] = i + 4;
test512[1000 + i] = i + 4;
}
test[1000 + diff_len + 3] = 107;
test[1000 + diff_len + 5] = 109;
test512[1000 + diff_len + 3] = 107;
test512[1000 + diff_len + 5] = 109;
/* test unchanged buffer */
time_t t_start, t_end, t_start512, t_end512;
t_start = clock();
dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE, compressed,
XBZRLE_PAGE_SIZE);
t_end = clock();
float time_val = difftime(t_end, t_start);
g_assert(dlen == 0);
t_start512 = clock();
dlen512 = xbzrle_encode_buffer_avx512(test512, test512, XBZRLE_PAGE_SIZE,
compressed512, XBZRLE_PAGE_SIZE);
t_end512 = clock();
float time_val512 = difftime(t_end512, t_start512);
g_assert(dlen512 == 0);
res->t_raw = time_val;
res->t_512 = time_val512;
g_free(test);
g_free(compressed);
g_free(test512);
g_free(compressed512);
}
static void test_encode_decode_unchanged_avx512(void)
{
int i;
float time_raw = 0.0, time_512 = 0.0;
struct ResTime res;
for (i = 0; i < 10000; i++) {
encode_decode_unchanged(&res);
time_raw += res.t_raw;
time_512 += res.t_512;
}
printf("Unchanged test:\n");
printf("Raw xbzrle_encode time is %f ms\n", time_raw);
printf("512 xbzrle_encode time is %f ms\n", time_512);
}
static void encode_decode_1_byte(struct ResTime *res)
{
uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
int dlen = 0, rc = 0, dlen512 = 0, rc512 = 0;
uint8_t buf[2];
uint8_t buf512[2];
test[XBZRLE_PAGE_SIZE - 1] = 1;
test512[XBZRLE_PAGE_SIZE - 1] = 1;
time_t t_start, t_end, t_start512, t_end512;
t_start = clock();
dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
XBZRLE_PAGE_SIZE);
t_end = clock();
float time_val = difftime(t_end, t_start);
g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2));
rc = xbzrle_decode_buffer(compressed, dlen, buffer, XBZRLE_PAGE_SIZE);
g_assert(rc == XBZRLE_PAGE_SIZE);
g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0);
t_start512 = clock();
dlen512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE,
compressed512, XBZRLE_PAGE_SIZE);
t_end512 = clock();
float time_val512 = difftime(t_end512, t_start512);
g_assert(dlen512 == (uleb128_encode_small(&buf512[0], 4095) + 2));
rc512 = xbzrle_decode_buffer(compressed512, dlen512, buffer512,
XBZRLE_PAGE_SIZE);
g_assert(rc512 == XBZRLE_PAGE_SIZE);
g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0);
res->t_raw = time_val;
res->t_512 = time_val512;
g_free(buffer);
g_free(compressed);
g_free(test);
g_free(buffer512);
g_free(compressed512);
g_free(test512);
}
static void test_encode_decode_1_byte_avx512(void)
{
int i;
float time_raw = 0.0, time_512 = 0.0;
struct ResTime res;
for (i = 0; i < 10000; i++) {
encode_decode_1_byte(&res);
time_raw += res.t_raw;
time_512 += res.t_512;
}
printf("1 byte test:\n");
printf("Raw xbzrle_encode time is %f ms\n", time_raw);
printf("512 xbzrle_encode time is %f ms\n", time_512);
}
static void encode_decode_overflow(struct ResTime *res)
{
uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
int i = 0, rc = 0, rc512 = 0;
for (i = 0; i < XBZRLE_PAGE_SIZE / 2 - 1; i++) {
test[i * 2] = 1;
test512[i * 2] = 1;
}
/* encode overflow */
time_t t_start, t_end, t_start512, t_end512;
t_start = clock();
rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
XBZRLE_PAGE_SIZE);
t_end = clock();
float time_val = difftime(t_end, t_start);
g_assert(rc == -1);
t_start512 = clock();
rc512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE,
compressed512, XBZRLE_PAGE_SIZE);
t_end512 = clock();
float time_val512 = difftime(t_end512, t_start512);
g_assert(rc512 == -1);
res->t_raw = time_val;
res->t_512 = time_val512;
g_free(buffer);
g_free(compressed);
g_free(test);
g_free(buffer512);
g_free(compressed512);
g_free(test512);
}
static void test_encode_decode_overflow_avx512(void)
{
int i;
float time_raw = 0.0, time_512 = 0.0;
struct ResTime res;
for (i = 0; i < 10000; i++) {
encode_decode_overflow(&res);
time_raw += res.t_raw;
time_512 += res.t_512;
}
printf("Overflow test:\n");
printf("Raw xbzrle_encode time is %f ms\n", time_raw);
printf("512 xbzrle_encode time is %f ms\n", time_512);
}
static void encode_decode_range_avx512(struct ResTime *res)
{
uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
int i = 0, rc = 0, rc512 = 0;
int dlen = 0, dlen512 = 0;
int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
for (i = diff_len; i > 0; i--) {
buffer[1000 + i] = i;
test[1000 + i] = i + 4;
buffer512[1000 + i] = i;
test512[1000 + i] = i + 4;
}
buffer[1000 + diff_len + 3] = 103;
test[1000 + diff_len + 3] = 107;
buffer[1000 + diff_len + 5] = 105;
test[1000 + diff_len + 5] = 109;
buffer512[1000 + diff_len + 3] = 103;
test512[1000 + diff_len + 3] = 107;
buffer512[1000 + diff_len + 5] = 105;
test512[1000 + diff_len + 5] = 109;
/* test encode/decode */
time_t t_start, t_end, t_start512, t_end512;
t_start = clock();
dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
XBZRLE_PAGE_SIZE);
t_end = clock();
float time_val = difftime(t_end, t_start);
rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
g_assert(rc < XBZRLE_PAGE_SIZE);
g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0);
t_start512 = clock();
dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE,
compressed512, XBZRLE_PAGE_SIZE);
t_end512 = clock();
float time_val512 = difftime(t_end512, t_start512);
rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE);
g_assert(rc512 < XBZRLE_PAGE_SIZE);
g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0);
res->t_raw = time_val;
res->t_512 = time_val512;
g_free(buffer);
g_free(compressed);
g_free(test);
g_free(buffer512);
g_free(compressed512);
g_free(test512);
}
static void test_encode_decode_avx512(void)
{
int i;
float time_raw = 0.0, time_512 = 0.0;
struct ResTime res;
for (i = 0; i < 10000; i++) {
encode_decode_range_avx512(&res);
time_raw += res.t_raw;
time_512 += res.t_512;
}
printf("Encode decode test:\n");
printf("Raw xbzrle_encode time is %f ms\n", time_raw);
printf("512 xbzrle_encode time is %f ms\n", time_512);
}
static void encode_decode_random(struct ResTime *res)
{
uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
int i = 0, rc = 0, rc512 = 0;
int dlen = 0, dlen512 = 0;
int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1);
/* store the index of diff */
int dirty_index[diff_len];
for (int j = 0; j < diff_len; j++) {
dirty_index[j] = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1);
}
for (i = diff_len - 1; i >= 0; i--) {
buffer[dirty_index[i]] = i;
test[dirty_index[i]] = i + 4;
buffer512[dirty_index[i]] = i;
test512[dirty_index[i]] = i + 4;
}
time_t t_start, t_end, t_start512, t_end512;
t_start = clock();
dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
XBZRLE_PAGE_SIZE);
t_end = clock();
float time_val = difftime(t_end, t_start);
rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
g_assert(rc < XBZRLE_PAGE_SIZE);
t_start512 = clock();
dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE,
compressed512, XBZRLE_PAGE_SIZE);
t_end512 = clock();
float time_val512 = difftime(t_end512, t_start512);
rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE);
g_assert(rc512 < XBZRLE_PAGE_SIZE);
res->t_raw = time_val;
res->t_512 = time_val512;
g_free(buffer);
g_free(compressed);
g_free(test);
g_free(buffer512);
g_free(compressed512);
g_free(test512);
}
static void test_encode_decode_random_avx512(void)
{
int i;
float time_raw = 0.0, time_512 = 0.0;
struct ResTime res;
for (i = 0; i < 10000; i++) {
encode_decode_random(&res);
time_raw += res.t_raw;
time_512 += res.t_512;
}
printf("Random test:\n");
printf("Raw xbzrle_encode time is %f ms\n", time_raw);
printf("512 xbzrle_encode time is %f ms\n", time_512);
}
#endif
int main(int argc, char **argv)
{
g_test_init(&argc, &argv, NULL);
g_test_rand_int();
#if defined(CONFIG_AVX512BW_OPT)
if (likely(is_cpu_support_avx512bw)) {
g_test_add_func("/xbzrle/encode_decode_zero", test_encode_decode_zero_avx512);
g_test_add_func("/xbzrle/encode_decode_unchanged",
test_encode_decode_unchanged_avx512);
g_test_add_func("/xbzrle/encode_decode_1_byte", test_encode_decode_1_byte_avx512);
g_test_add_func("/xbzrle/encode_decode_overflow",
test_encode_decode_overflow_avx512);
g_test_add_func("/xbzrle/encode_decode", test_encode_decode_avx512);
g_test_add_func("/xbzrle/encode_decode_random", test_encode_decode_random_avx512);
}
#endif
return g_test_run();
}

View File

@ -16,35 +16,6 @@
#define XBZRLE_PAGE_SIZE 4096 #define XBZRLE_PAGE_SIZE 4096
int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
uint8_t *, int) = xbzrle_encode_buffer;
#if defined(CONFIG_AVX512BW_OPT)
#include "qemu/cpuid.h"
static void __attribute__((constructor)) init_cpu_flag(void)
{
unsigned max = __get_cpuid_max(0, NULL);
int a, b, c, d;
if (max >= 1) {
__cpuid(1, a, b, c, d);
/* We must check that AVX is not just available, but usable. */
if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
int bv;
__asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
__cpuid_count(7, 0, a, b, c, d);
/* 0xe6:
* XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
* and ZMM16-ZMM31 state are enabled by OS)
* XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
*/
if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
}
}
}
return ;
}
#endif
static void test_uleb(void) static void test_uleb(void)
{ {
uint32_t i, val; uint32_t i, val;
@ -83,8 +54,8 @@ static void test_encode_decode_zero(void)
buffer[1000 + diff_len + 5] = 105; buffer[1000 + diff_len + 5] = 105;
/* encode zero page */ /* encode zero page */
dlen = xbzrle_encode_buffer_func(buffer, buffer, XBZRLE_PAGE_SIZE, compressed, dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE,
XBZRLE_PAGE_SIZE); compressed, XBZRLE_PAGE_SIZE);
g_assert(dlen == 0); g_assert(dlen == 0);
g_free(buffer); g_free(buffer);
@ -107,8 +78,8 @@ static void test_encode_decode_unchanged(void)
test[1000 + diff_len + 5] = 109; test[1000 + diff_len + 5] = 109;
/* test unchanged buffer */ /* test unchanged buffer */
dlen = xbzrle_encode_buffer_func(test, test, XBZRLE_PAGE_SIZE, compressed, dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE,
XBZRLE_PAGE_SIZE); compressed, XBZRLE_PAGE_SIZE);
g_assert(dlen == 0); g_assert(dlen == 0);
g_free(test); g_free(test);
@ -125,8 +96,8 @@ static void test_encode_decode_1_byte(void)
test[XBZRLE_PAGE_SIZE - 1] = 1; test[XBZRLE_PAGE_SIZE - 1] = 1;
dlen = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed, dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE,
XBZRLE_PAGE_SIZE); compressed, XBZRLE_PAGE_SIZE);
g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2)); g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2));
rc = xbzrle_decode_buffer(compressed, dlen, buffer, XBZRLE_PAGE_SIZE); rc = xbzrle_decode_buffer(compressed, dlen, buffer, XBZRLE_PAGE_SIZE);
@ -150,8 +121,8 @@ static void test_encode_decode_overflow(void)
} }
/* encode overflow */ /* encode overflow */
rc = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed, rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE,
XBZRLE_PAGE_SIZE); compressed, XBZRLE_PAGE_SIZE);
g_assert(rc == -1); g_assert(rc == -1);
g_free(buffer); g_free(buffer);
@ -181,8 +152,8 @@ static void encode_decode_range(void)
test[1000 + diff_len + 5] = 109; test[1000 + diff_len + 5] = 109;
/* test encode/decode */ /* test encode/decode */
dlen = xbzrle_encode_buffer_func(test, buffer, XBZRLE_PAGE_SIZE, compressed, dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE,
XBZRLE_PAGE_SIZE); compressed, XBZRLE_PAGE_SIZE);
rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE); rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
g_assert(rc < XBZRLE_PAGE_SIZE); g_assert(rc < XBZRLE_PAGE_SIZE);

View File

@ -24,6 +24,7 @@
#include "qemu/osdep.h" #include "qemu/osdep.h"
#include "qemu/cutils.h" #include "qemu/cutils.h"
#include "qemu/bswap.h" #include "qemu/bswap.h"
#include "host/cpuinfo.h"
static bool static bool
buffer_zero_int(const void *buf, size_t len) buffer_zero_int(const void *buf, size_t len)
@ -184,111 +185,75 @@ buffer_zero_avx512(const void *buf, size_t len)
} }
#endif /* CONFIG_AVX512F_OPT */ #endif /* CONFIG_AVX512F_OPT */
/*
/* Note that for test_buffer_is_zero_next_accel, the most preferred * Make sure that these variables are appropriately initialized when
* ISA must have the least significant bit.
*/
#define CACHE_AVX512F 1
#define CACHE_AVX2 2
#define CACHE_SSE4 4
#define CACHE_SSE2 8
/* Make sure that these variables are appropriately initialized when
* SSE2 is enabled on the compiler command-line, but the compiler is * SSE2 is enabled on the compiler command-line, but the compiler is
* too old to support CONFIG_AVX2_OPT. * too old to support CONFIG_AVX2_OPT.
*/ */
#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT) #if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
# define INIT_CACHE 0 # define INIT_USED 0
# define INIT_ACCEL buffer_zero_int # define INIT_LENGTH 0
# define INIT_ACCEL buffer_zero_int
#else #else
# ifndef __SSE2__ # ifndef __SSE2__
# error "ISA selection confusion" # error "ISA selection confusion"
# endif # endif
# define INIT_CACHE CACHE_SSE2 # define INIT_USED CPUINFO_SSE2
# define INIT_ACCEL buffer_zero_sse2 # define INIT_LENGTH 64
# define INIT_ACCEL buffer_zero_sse2
#endif #endif
static unsigned cpuid_cache = INIT_CACHE; static unsigned used_accel = INIT_USED;
static unsigned length_to_accel = INIT_LENGTH;
static bool (*buffer_accel)(const void *, size_t) = INIT_ACCEL; static bool (*buffer_accel)(const void *, size_t) = INIT_ACCEL;
static int length_to_accel = 64;
static void init_accel(unsigned cache) static unsigned __attribute__((noinline))
select_accel_cpuinfo(unsigned info)
{ {
bool (*fn)(const void *, size_t) = buffer_zero_int; /* Array is sorted in order of algorithm preference. */
if (cache & CACHE_SSE2) { static const struct {
fn = buffer_zero_sse2; unsigned bit;
length_to_accel = 64; unsigned len;
} bool (*fn)(const void *, size_t);
#ifdef CONFIG_AVX2_OPT } all[] = {
if (cache & CACHE_SSE4) {
fn = buffer_zero_sse4;
length_to_accel = 64;
}
if (cache & CACHE_AVX2) {
fn = buffer_zero_avx2;
length_to_accel = 128;
}
#endif
#ifdef CONFIG_AVX512F_OPT #ifdef CONFIG_AVX512F_OPT
if (cache & CACHE_AVX512F) { { CPUINFO_AVX512F, 256, buffer_zero_avx512 },
fn = buffer_zero_avx512;
length_to_accel = 256;
}
#endif #endif
buffer_accel = fn; #ifdef CONFIG_AVX2_OPT
{ CPUINFO_AVX2, 128, buffer_zero_avx2 },
{ CPUINFO_SSE4, 64, buffer_zero_sse4 },
#endif
{ CPUINFO_SSE2, 64, buffer_zero_sse2 },
{ CPUINFO_ALWAYS, 0, buffer_zero_int },
};
for (unsigned i = 0; i < ARRAY_SIZE(all); ++i) {
if (info & all[i].bit) {
length_to_accel = all[i].len;
buffer_accel = all[i].fn;
return all[i].bit;
}
}
return 0;
} }
#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT) #if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
#include "qemu/cpuid.h" static void __attribute__((constructor)) init_accel(void)
static void __attribute__((constructor)) init_cpuid_cache(void)
{ {
unsigned max = __get_cpuid_max(0, NULL); used_accel = select_accel_cpuinfo(cpuinfo_init());
int a, b, c, d;
unsigned cache = 0;
if (max >= 1) {
__cpuid(1, a, b, c, d);
if (d & bit_SSE2) {
cache |= CACHE_SSE2;
}
if (c & bit_SSE4_1) {
cache |= CACHE_SSE4;
}
/* We must check that AVX is not just available, but usable. */
if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
unsigned bv = xgetbv_low(0);
__cpuid_count(7, 0, a, b, c, d);
if ((bv & 0x6) == 0x6 && (b & bit_AVX2)) {
cache |= CACHE_AVX2;
}
/* 0xe6:
* XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
* and ZMM16-ZMM31 state are enabled by OS)
* XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
*/
if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512F)) {
cache |= CACHE_AVX512F;
}
}
}
cpuid_cache = cache;
init_accel(cache);
} }
#endif /* CONFIG_AVX2_OPT */ #endif /* CONFIG_AVX2_OPT */
bool test_buffer_is_zero_next_accel(void) bool test_buffer_is_zero_next_accel(void)
{ {
/* If no bits set, we just tested buffer_zero_int, and there /*
are no more acceleration options to test. */ * Accumulate the accelerators that we've already tested, and
if (cpuid_cache == 0) { * remove them from the set to test this round. We'll get back
return false; * a zero from select_accel_cpuinfo when there are no more.
} */
/* Disable the accelerator we used before and select a new one. */ unsigned used = select_accel_cpuinfo(cpuinfo & ~used_accel);
cpuid_cache &= cpuid_cache - 1; used_accel |= used;
init_accel(cpuid_cache); return used;
return true;
} }
static bool select_accel_fn(const void *buf, size_t len) static bool select_accel_fn(const void *buf, size_t len)

67
util/cpuinfo-aarch64.c Normal file
View File

@ -0,0 +1,67 @@
/*
* SPDX-License-Identifier: GPL-2.0-or-later
* Host specific cpu indentification for AArch64.
*/
#include "qemu/osdep.h"
#include "host/cpuinfo.h"
#ifdef CONFIG_LINUX
# ifdef CONFIG_GETAUXVAL
# include <sys/auxv.h>
# else
# include <asm/hwcap.h>
# include "elf.h"
# endif
#endif
#ifdef CONFIG_DARWIN
# include <sys/sysctl.h>
#endif
unsigned cpuinfo;
#ifdef CONFIG_DARWIN
static bool sysctl_for_bool(const char *name)
{
int val = 0;
size_t len = sizeof(val);
if (sysctlbyname(name, &val, &len, NULL, 0) == 0) {
return val != 0;
}
/*
* We might in the future ask for properties not present in older kernels,
* but we're only asking about static properties, all of which should be
* 'int'. So we shouln't see ENOMEM (val too small), or any of the other
* more exotic errors.
*/
assert(errno == ENOENT);
return false;
}
#endif
/* Called both as constructor and (possibly) via other constructors. */
unsigned __attribute__((constructor)) cpuinfo_init(void)
{
unsigned info = cpuinfo;
if (info) {
return info;
}
info = CPUINFO_ALWAYS;
#ifdef CONFIG_LINUX
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
info |= (hwcap & HWCAP_ATOMICS ? CPUINFO_LSE : 0);
info |= (hwcap & HWCAP_USCAT ? CPUINFO_LSE2 : 0);
#endif
#ifdef CONFIG_DARWIN
info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE") * CPUINFO_LSE;
info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE2") * CPUINFO_LSE2;
#endif
cpuinfo = info;
return info;
}

99
util/cpuinfo-i386.c Normal file
View File

@ -0,0 +1,99 @@
/*
* SPDX-License-Identifier: GPL-2.0-or-later
* Host specific cpu indentification for x86.
*/
#include "qemu/osdep.h"
#include "host/cpuinfo.h"
#ifdef CONFIG_CPUID_H
# include "qemu/cpuid.h"
#endif
unsigned cpuinfo;
/* Called both as constructor and (possibly) via other constructors. */
unsigned __attribute__((constructor)) cpuinfo_init(void)
{
unsigned info = cpuinfo;
if (info) {
return info;
}
#ifdef CONFIG_CPUID_H
unsigned max, a, b, c, d, b7 = 0, c7 = 0;
max = __get_cpuid_max(0, 0);
if (max >= 7) {
__cpuid_count(7, 0, a, b7, c7, d);
info |= (b7 & bit_BMI ? CPUINFO_BMI1 : 0);
info |= (b7 & bit_BMI2 ? CPUINFO_BMI2 : 0);
}
if (max >= 1) {
__cpuid(1, a, b, c, d);
info |= (d & bit_CMOV ? CPUINFO_CMOV : 0);
info |= (d & bit_SSE2 ? CPUINFO_SSE2 : 0);
info |= (c & bit_SSE4_1 ? CPUINFO_SSE4 : 0);
info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0);
info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0);
/* For AVX features, we must check available and usable. */
if ((c & bit_AVX) && (c & bit_OSXSAVE)) {
unsigned bv = xgetbv_low(0);
if ((bv & 6) == 6) {
info |= CPUINFO_AVX1;
info |= (b7 & bit_AVX2 ? CPUINFO_AVX2 : 0);
if ((bv & 0xe0) == 0xe0) {
info |= (b7 & bit_AVX512F ? CPUINFO_AVX512F : 0);
info |= (b7 & bit_AVX512VL ? CPUINFO_AVX512VL : 0);
info |= (b7 & bit_AVX512BW ? CPUINFO_AVX512BW : 0);
info |= (b7 & bit_AVX512DQ ? CPUINFO_AVX512DQ : 0);
info |= (c7 & bit_AVX512VBMI2 ? CPUINFO_AVX512VBMI2 : 0);
}
/*
* The Intel SDM has added:
* Processors that enumerate support for Intel® AVX
* (by setting the feature flag CPUID.01H:ECX.AVX[bit 28])
* guarantee that the 16-byte memory operations performed
* by the following instructions will always be carried
* out atomically:
* - MOVAPD, MOVAPS, and MOVDQA.
* - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
* - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
* with EVEX.128 and k0 (masking disabled).
* Note that these instructions require the linear addresses
* of their memory operands to be 16-byte aligned.
*
* AMD has provided an even stronger guarantee that processors
* with AVX provide 16-byte atomicity for all cachable,
* naturally aligned single loads and stores, e.g. MOVDQU.
*
* See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
*/
__cpuid(0, a, b, c, d);
if (c == signature_INTEL_ecx) {
info |= CPUINFO_ATOMIC_VMOVDQA;
} else if (c == signature_AMD_ecx) {
info |= CPUINFO_ATOMIC_VMOVDQA | CPUINFO_ATOMIC_VMOVDQU;
}
}
}
}
max = __get_cpuid_max(0x8000000, 0);
if (max >= 1) {
__cpuid(0x80000001, a, b, c, d);
info |= (c & bit_LZCNT ? CPUINFO_LZCNT : 0);
}
#endif
info |= CPUINFO_ALWAYS;
cpuinfo = info;
return info;
}

View File

@ -108,3 +108,9 @@ if have_block
endif endif
util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c')) util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c'))
endif endif
if cpu == 'aarch64'
util_ss.add(files('cpuinfo-aarch64.c'))
elif cpu in ['x86', 'x86_64']
util_ss.add(files('cpuinfo-i386.c'))
endif