From 1966855e567686e6547b058d8664aefd0e969a0f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 1 Aug 2023 07:55:38 -0700 Subject: [PATCH 01/16] accel/tcg: Adjust parameters and locking with do_{ld,st}_mmio_* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace MMULookupPageData* with CPUTLBEntryFull, addr, size. Move QEMU_IOTHREAD_LOCK_GUARD to the caller. This simplifies the usage from do_ld16_beN and do_st16_leN, where we weren't locking the entire operation, and required hoop jumping for passing addr and size. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 67 +++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index ba44501a7c..23386ecfde 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -2066,24 +2066,22 @@ static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi, /** * do_ld_mmio_beN: * @env: cpu context - * @p: translation parameters + * @full: page parameters * @ret_be: accumulated data + * @addr: virtual address + * @size: number of bytes * @mmu_idx: virtual address context * @ra: return address into tcg generated code, or 0 + * Context: iothread lock held * - * Load @p->size bytes from @p->addr, which is memory-mapped i/o. + * Load @size bytes from @addr, which is memory-mapped i/o. * The bytes are concatenated in big-endian order with @ret_be. */ -static uint64_t do_ld_mmio_beN(CPUArchState *env, MMULookupPageData *p, - uint64_t ret_be, int mmu_idx, - MMUAccessType type, uintptr_t ra) +static uint64_t do_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full, + uint64_t ret_be, vaddr addr, int size, + int mmu_idx, MMUAccessType type, uintptr_t ra) { - CPUTLBEntryFull *full = p->full; - vaddr addr = p->addr; - int i, size = p->size; - - QEMU_IOTHREAD_LOCK_GUARD(); - for (i = 0; i < size; i++) { + for (int i = 0; i < size; i++) { uint8_t x = io_readx(env, full, mmu_idx, addr + i, ra, type, MO_UB); ret_be = (ret_be << 8) | x; } @@ -2232,7 +2230,9 @@ static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p, unsigned tmp, half_size; if (unlikely(p->flags & TLB_MMIO)) { - return do_ld_mmio_beN(env, p, ret_be, mmu_idx, type, ra); + QEMU_IOTHREAD_LOCK_GUARD(); + return do_ld_mmio_beN(env, p->full, ret_be, p->addr, p->size, + mmu_idx, type, ra); } /* @@ -2281,11 +2281,11 @@ static Int128 do_ld16_beN(CPUArchState *env, MMULookupPageData *p, MemOp atom; if (unlikely(p->flags & TLB_MMIO)) { - p->size = size - 8; - a = do_ld_mmio_beN(env, p, a, mmu_idx, MMU_DATA_LOAD, ra); - p->addr += p->size; - p->size = 8; - b = do_ld_mmio_beN(env, p, 0, mmu_idx, MMU_DATA_LOAD, ra); + QEMU_IOTHREAD_LOCK_GUARD(); + a = do_ld_mmio_beN(env, p->full, a, p->addr, size - 8, + mmu_idx, MMU_DATA_LOAD, ra); + b = do_ld_mmio_beN(env, p->full, 0, p->addr + 8, 8, + mmu_idx, MMU_DATA_LOAD, ra); return int128_make128(b, a); } @@ -2664,24 +2664,23 @@ Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr, /** * do_st_mmio_leN: * @env: cpu context - * @p: translation parameters + * @full: page parameters * @val_le: data to store + * @addr: virtual address + * @size: number of bytes * @mmu_idx: virtual address context * @ra: return address into tcg generated code, or 0 + * Context: iothread lock held * - * Store @p->size bytes at @p->addr, which is memory-mapped i/o. + * Store @size bytes at @addr, which is memory-mapped i/o. * The bytes to store are extracted in little-endian order from @val_le; * return the bytes of @val_le beyond @p->size that have not been stored. */ -static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p, - uint64_t val_le, int mmu_idx, uintptr_t ra) +static uint64_t do_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full, + uint64_t val_le, vaddr addr, int size, + int mmu_idx, uintptr_t ra) { - CPUTLBEntryFull *full = p->full; - vaddr addr = p->addr; - int i, size = p->size; - - QEMU_IOTHREAD_LOCK_GUARD(); - for (i = 0; i < size; i++, val_le >>= 8) { + for (int i = 0; i < size; i++, val_le >>= 8) { io_writex(env, full, mmu_idx, val_le, addr + i, ra, MO_UB); } return val_le; @@ -2698,7 +2697,9 @@ static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p, unsigned tmp, half_size; if (unlikely(p->flags & TLB_MMIO)) { - return do_st_mmio_leN(env, p, val_le, mmu_idx, ra); + QEMU_IOTHREAD_LOCK_GUARD(); + return do_st_mmio_leN(env, p->full, val_le, p->addr, + p->size, mmu_idx, ra); } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { return val_le >> (p->size * 8); } @@ -2751,11 +2752,11 @@ static uint64_t do_st16_leN(CPUArchState *env, MMULookupPageData *p, MemOp atom; if (unlikely(p->flags & TLB_MMIO)) { - p->size = 8; - do_st_mmio_leN(env, p, int128_getlo(val_le), mmu_idx, ra); - p->size = size - 8; - p->addr += 8; - return do_st_mmio_leN(env, p, int128_gethi(val_le), mmu_idx, ra); + QEMU_IOTHREAD_LOCK_GUARD(); + do_st_mmio_leN(env, p->full, int128_getlo(val_le), + p->addr, 8, mmu_idx, ra); + return do_st_mmio_leN(env, p->full, int128_gethi(val_le), + p->addr + 8, size - 8, mmu_idx, ra); } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { return int128_gethi(val_le) >> ((size - 8) * 8); } From 190aba803fbcac30b29bdab25f454b1284a96201 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 1 Aug 2023 10:10:45 -0700 Subject: [PATCH 02/16] accel/tcg: Issue wider aligned i/o in do_{ld,st}_mmio_* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the address and size are aligned, send larger chunks to the memory subsystem. This will be required to make more use of these helpers. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 76 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 7 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 23386ecfde..a308cb7534 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -2081,10 +2081,40 @@ static uint64_t do_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full, uint64_t ret_be, vaddr addr, int size, int mmu_idx, MMUAccessType type, uintptr_t ra) { - for (int i = 0; i < size; i++) { - uint8_t x = io_readx(env, full, mmu_idx, addr + i, ra, type, MO_UB); - ret_be = (ret_be << 8) | x; - } + uint64_t t; + + tcg_debug_assert(size > 0 && size <= 8); + do { + /* Read aligned pieces up to 8 bytes. */ + switch ((size | (int)addr) & 7) { + case 1: + case 3: + case 5: + case 7: + t = io_readx(env, full, mmu_idx, addr, ra, type, MO_UB); + ret_be = (ret_be << 8) | t; + size -= 1; + addr += 1; + break; + case 2: + case 6: + t = io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUW); + ret_be = (ret_be << 16) | t; + size -= 2; + addr += 2; + break; + case 4: + t = io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUL); + ret_be = (ret_be << 32) | t; + size -= 4; + addr += 4; + break; + case 0: + return io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUQ); + default: + qemu_build_not_reached(); + } + } while (size); return ret_be; } @@ -2680,9 +2710,41 @@ static uint64_t do_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full, uint64_t val_le, vaddr addr, int size, int mmu_idx, uintptr_t ra) { - for (int i = 0; i < size; i++, val_le >>= 8) { - io_writex(env, full, mmu_idx, val_le, addr + i, ra, MO_UB); - } + tcg_debug_assert(size > 0 && size <= 8); + + do { + /* Store aligned pieces up to 8 bytes. */ + switch ((size | (int)addr) & 7) { + case 1: + case 3: + case 5: + case 7: + io_writex(env, full, mmu_idx, val_le, addr, ra, MO_UB); + val_le >>= 8; + size -= 1; + addr += 1; + break; + case 2: + case 6: + io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUW); + val_le >>= 16; + size -= 2; + addr += 2; + break; + case 4: + io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUL); + val_le >>= 32; + size -= 4; + addr += 4; + break; + case 0: + io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUQ); + return 0; + default: + qemu_build_not_reached(); + } + } while (size); + return val_le; } From f7eaf9d702efdd02481d5f1c25f7d8e0ffb64c6e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 1 Aug 2023 10:46:03 -0700 Subject: [PATCH 03/16] accel/tcg: Do not issue misaligned i/o MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the single-page case we were issuing misaligned i/o to the memory subsystem, which does not handle it properly. Split such accesses via do_{ld,st}_mmio_*. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1800 Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 118 +++++++++++++++++++++++++++------------------ 1 file changed, 72 insertions(+), 46 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index a308cb7534..4b1bfaa53d 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -2370,16 +2370,20 @@ static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx, static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx, MMUAccessType type, MemOp memop, uintptr_t ra) { - uint64_t ret; + uint16_t ret; if (unlikely(p->flags & TLB_MMIO)) { - return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop); - } - - /* Perform the load host endian, then swap if necessary. */ - ret = load_atom_2(env, ra, p->haddr, memop); - if (memop & MO_BSWAP) { - ret = bswap16(ret); + QEMU_IOTHREAD_LOCK_GUARD(); + ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 2, mmu_idx, type, ra); + if ((memop & MO_BSWAP) == MO_LE) { + ret = bswap16(ret); + } + } else { + /* Perform the load host endian, then swap if necessary. */ + ret = load_atom_2(env, ra, p->haddr, memop); + if (memop & MO_BSWAP) { + ret = bswap16(ret); + } } return ret; } @@ -2390,13 +2394,17 @@ static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx, uint32_t ret; if (unlikely(p->flags & TLB_MMIO)) { - return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop); - } - - /* Perform the load host endian. */ - ret = load_atom_4(env, ra, p->haddr, memop); - if (memop & MO_BSWAP) { - ret = bswap32(ret); + QEMU_IOTHREAD_LOCK_GUARD(); + ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 4, mmu_idx, type, ra); + if ((memop & MO_BSWAP) == MO_LE) { + ret = bswap32(ret); + } + } else { + /* Perform the load host endian. */ + ret = load_atom_4(env, ra, p->haddr, memop); + if (memop & MO_BSWAP) { + ret = bswap32(ret); + } } return ret; } @@ -2407,13 +2415,17 @@ static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx, uint64_t ret; if (unlikely(p->flags & TLB_MMIO)) { - return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop); - } - - /* Perform the load host endian. */ - ret = load_atom_8(env, ra, p->haddr, memop); - if (memop & MO_BSWAP) { - ret = bswap64(ret); + QEMU_IOTHREAD_LOCK_GUARD(); + ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 8, mmu_idx, type, ra); + if ((memop & MO_BSWAP) == MO_LE) { + ret = bswap64(ret); + } + } else { + /* Perform the load host endian. */ + ret = load_atom_8(env, ra, p->haddr, memop); + if (memop & MO_BSWAP) { + ret = bswap64(ret); + } } return ret; } @@ -2561,20 +2573,22 @@ static Int128 do_ld16_mmu(CPUArchState *env, vaddr addr, cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD, &l); if (likely(!crosspage)) { - /* Perform the load host endian. */ if (unlikely(l.page[0].flags & TLB_MMIO)) { QEMU_IOTHREAD_LOCK_GUARD(); - a = io_readx(env, l.page[0].full, l.mmu_idx, addr, - ra, MMU_DATA_LOAD, MO_64); - b = io_readx(env, l.page[0].full, l.mmu_idx, addr + 8, - ra, MMU_DATA_LOAD, MO_64); - ret = int128_make128(HOST_BIG_ENDIAN ? b : a, - HOST_BIG_ENDIAN ? a : b); + a = do_ld_mmio_beN(env, l.page[0].full, 0, addr, 8, + l.mmu_idx, MMU_DATA_LOAD, ra); + b = do_ld_mmio_beN(env, l.page[0].full, 0, addr + 8, 8, + l.mmu_idx, MMU_DATA_LOAD, ra); + ret = int128_make128(b, a); + if ((l.memop & MO_BSWAP) == MO_LE) { + ret = bswap128(ret); + } } else { + /* Perform the load host endian. */ ret = load_atom_16(env, ra, l.page[0].haddr, l.memop); - } - if (l.memop & MO_BSWAP) { - ret = bswap128(ret); + if (l.memop & MO_BSWAP) { + ret = bswap128(ret); + } } return ret; } @@ -2874,7 +2888,11 @@ static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val, int mmu_idx, MemOp memop, uintptr_t ra) { if (unlikely(p->flags & TLB_MMIO)) { - io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop); + if ((memop & MO_BSWAP) != MO_LE) { + val = bswap16(val); + } + QEMU_IOTHREAD_LOCK_GUARD(); + do_st_mmio_leN(env, p->full, val, p->addr, 2, mmu_idx, ra); } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { /* nothing */ } else { @@ -2890,7 +2908,11 @@ static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val, int mmu_idx, MemOp memop, uintptr_t ra) { if (unlikely(p->flags & TLB_MMIO)) { - io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop); + if ((memop & MO_BSWAP) != MO_LE) { + val = bswap32(val); + } + QEMU_IOTHREAD_LOCK_GUARD(); + do_st_mmio_leN(env, p->full, val, p->addr, 4, mmu_idx, ra); } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { /* nothing */ } else { @@ -2906,7 +2928,11 @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val, int mmu_idx, MemOp memop, uintptr_t ra) { if (unlikely(p->flags & TLB_MMIO)) { - io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop); + if ((memop & MO_BSWAP) != MO_LE) { + val = bswap64(val); + } + QEMU_IOTHREAD_LOCK_GUARD(); + do_st_mmio_leN(env, p->full, val, p->addr, 8, mmu_idx, ra); } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { /* nothing */ } else { @@ -3029,22 +3055,22 @@ static void do_st16_mmu(CPUArchState *env, vaddr addr, Int128 val, cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); if (likely(!crosspage)) { - /* Swap to host endian if necessary, then store. */ - if (l.memop & MO_BSWAP) { - val = bswap128(val); - } if (unlikely(l.page[0].flags & TLB_MMIO)) { - QEMU_IOTHREAD_LOCK_GUARD(); - if (HOST_BIG_ENDIAN) { - b = int128_getlo(val), a = int128_gethi(val); - } else { - a = int128_getlo(val), b = int128_gethi(val); + if ((l.memop & MO_BSWAP) != MO_LE) { + val = bswap128(val); } - io_writex(env, l.page[0].full, l.mmu_idx, a, addr, ra, MO_64); - io_writex(env, l.page[0].full, l.mmu_idx, b, addr + 8, ra, MO_64); + a = int128_getlo(val); + b = int128_gethi(val); + QEMU_IOTHREAD_LOCK_GUARD(); + do_st_mmio_leN(env, l.page[0].full, a, addr, 8, l.mmu_idx, ra); + do_st_mmio_leN(env, l.page[0].full, b, addr + 8, 8, l.mmu_idx, ra); } else if (unlikely(l.page[0].flags & TLB_DISCARD_WRITE)) { /* nothing */ } else { + /* Swap to host endian if necessary, then store. */ + if (l.memop & MO_BSWAP) { + val = bswap128(val); + } store_atom_16(env, ra, l.page[0].haddr, l.memop, val); } return; From c30d0b861cea8539ee0acb55a1a949ed4b5ec82a Mon Sep 17 00:00:00 2001 From: Mikhail Tyutin Date: Fri, 4 Aug 2023 14:09:03 +0300 Subject: [PATCH 04/16] accel/tcg: Call save_iotlb_data from io_readx as well Apply save_iotlb_data() to io_readx() as well as to io_writex(). This fixes SEGFAULT on qemu_plugin_hwaddr_phys_addr() call plugins for addresses inside of MMIO region. Signed-off-by: Dmitriy Solovev Signed-off-by: Mikhail Tyutin Reviewed-by: Richard Henderson Message-Id: <20230804110903.19968-1-m.tyutin@yadro.com> Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 4b1bfaa53d..d68fa6867c 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1363,6 +1363,21 @@ static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr, } } +/* + * Save a potentially trashed CPUTLBEntryFull for later lookup by plugin. + * This is read by tlb_plugin_lookup if the fulltlb entry doesn't match + * because of the side effect of io_writex changing memory layout. + */ +static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section, + hwaddr mr_offset) +{ +#ifdef CONFIG_PLUGIN + SavedIOTLB *saved = &cs->saved_iotlb; + saved->section = section; + saved->mr_offset = mr_offset; +#endif +} + static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full, int mmu_idx, vaddr addr, uintptr_t retaddr, MMUAccessType access_type, MemOp op) @@ -1382,6 +1397,12 @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full, cpu_io_recompile(cpu, retaddr); } + /* + * The memory_region_dispatch may trigger a flush/resize + * so for plugins we save the iotlb_data just in case. + */ + save_iotlb_data(cpu, section, mr_offset); + { QEMU_IOTHREAD_LOCK_GUARD(); r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs); @@ -1398,21 +1419,6 @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full, return val; } -/* - * Save a potentially trashed CPUTLBEntryFull for later lookup by plugin. - * This is read by tlb_plugin_lookup if the fulltlb entry doesn't match - * because of the side effect of io_writex changing memory layout. - */ -static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section, - hwaddr mr_offset) -{ -#ifdef CONFIG_PLUGIN - SavedIOTLB *saved = &cs->saved_iotlb; - saved->section = section; - saved->mr_offset = mr_offset; -#endif -} - static void io_writex(CPUArchState *env, CPUTLBEntryFull *full, int mmu_idx, uint64_t val, vaddr addr, uintptr_t retaddr, MemOp op) From 6c78de6eb6f986b2e06e95fabad62731a44aaafd Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino Date: Tue, 1 Aug 2023 12:37:11 -0300 Subject: [PATCH 05/16] gdbstub: use 0 ("any process") on packets with no PID Previously, qemu-user would always report PID 1 to GDB. This was changed at dc14a7a6e9 (gdbstub: Report the actual qemu-user pid, 2023-06-30), but read_thread_id() still considers GDB packets with "no PID" as "PID 1", which is not the qemu-user PID. Fix that by parsing "no PID" as "0", which the GDB Remote Protocol defines as "any process". Note that this should have no effect for system emulation as, in this case, gdb_create_default_process() will assign PID 1 for the first process and that is what the gdbstub uses for GDB requests with no PID, or PID 0. This issue was found with hexagon-lldb, which sends a "Hg" packet with only the thread-id, but no process-id, leading to the invalid usage of "PID 1" by qemu-hexagon and a subsequent "E22" reply. Signed-off-by: Matheus Tavares Bernardino Acked-by: Ilya Leoshkevich Message-Id: <78a3b06f6ab90a7ff8e73ae14a996eb27ec76c85.1690904195.git.quic_mathbern@quicinc.com> Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson --- gdbstub/gdbstub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c index ce8b42eb15..e74ecc78cc 100644 --- a/gdbstub/gdbstub.c +++ b/gdbstub/gdbstub.c @@ -537,7 +537,7 @@ static GDBThreadIdKind read_thread_id(const char *buf, const char **end_buf, /* Skip '.' */ buf++; } else { - p = 1; + p = 0; } ret = qemu_strtoul(buf, &buf, 16, &t); From 89e5b7935e92af6f3b4e4cdb19dfddef4e9dde36 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 5 Aug 2023 17:38:06 +0000 Subject: [PATCH 06/16] configure: Fix linux-user host detection for riscv64 Mirror the host_arch variable from meson.build, so that we probe for the correct linux-user/include/host/ directory. Fixes: e3e477c3bca0 ("configure: Fix cross-building for RISCV host") Signed-off-by: Richard Henderson --- configure | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/configure b/configure index afb25fd558..98dc78280e 100755 --- a/configure +++ b/configure @@ -469,6 +469,13 @@ else echo "WARNING: unrecognized host CPU, proceeding with 'uname -m' output '$cpu'" fi +case "$cpu" in + riscv*) + host_arch=riscv ;; + *) + host_arch="$cpu" ;; +esac + # Normalise host CPU name and set multilib cflags. The canonicalization # isn't really necessary, because the architectures that we check for # should not hit the 'uname -m' case, but better safe than sorry. @@ -803,7 +810,7 @@ default_target_list="" mak_wilds="" if [ "$linux_user" != no ]; then - if [ "$targetos" = linux ] && [ -d "$source_path/linux-user/include/host/$cpu" ]; then + if [ "$targetos" = linux ] && [ -d "$source_path/linux-user/include/host/$host_arch" ]; then linux_user=yes elif [ "$linux_user" = yes ]; then error_exit "linux-user not supported on this architecture" From 4333f0924c2f2ca8efaebaed8c24f55f77d8b013 Mon Sep 17 00:00:00 2001 From: Nathan Egge Date: Thu, 3 Aug 2023 09:14:24 -0400 Subject: [PATCH 07/16] linux-user/elfload: Set V in ELF_HWCAP for RISC-V Set V bit for hwcap if misa is set. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1793 Signed-off-by: Nathan Egge Reviewed-by: Daniel Henrique Barboza Tested-by: Daniel Henrique Barboza Message-Id: <20230803131424.40744-1-negge@xiph.org> Signed-off-by: Richard Henderson --- linux-user/elfload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 861ec07abc..a299ba7300 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1710,7 +1710,8 @@ static uint32_t get_elf_hwcap(void) #define MISA_BIT(EXT) (1 << (EXT - 'A')) RISCVCPU *cpu = RISCV_CPU(thread_cpu); uint32_t mask = MISA_BIT('I') | MISA_BIT('M') | MISA_BIT('A') - | MISA_BIT('F') | MISA_BIT('D') | MISA_BIT('C'); + | MISA_BIT('F') | MISA_BIT('D') | MISA_BIT('C') + | MISA_BIT('V'); return cpu->env.misa_ext & mask; #undef MISA_BIT From c3dd50da0f4d00fffe8ea5e211c2c189fe6ad4fb Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Wed, 2 Aug 2023 16:17:47 +0900 Subject: [PATCH 08/16] linux-user: Unset MAP_FIXED_NOREPLACE for host Passing MAP_FIXED_NOREPLACE to host will fail for reserved_va because the address space is reserved with mmap. Replace it with MAP_FIXED in that case. Signed-off-by: Akihiko Odaki Message-Id: <20230802071754.14876-2-akihiko.odaki@daynix.com> [rth: Expand inline commentary.] Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson --- linux-user/mmap.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index a5dfb56545..a11c630a7b 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -603,11 +603,26 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, goto fail; } - /* Validate that the chosen range is empty. */ - if ((flags & MAP_FIXED_NOREPLACE) - && !page_check_range_empty(start, last)) { - errno = EEXIST; - goto fail; + if (flags & MAP_FIXED_NOREPLACE) { + /* Validate that the chosen range is empty. */ + if (!page_check_range_empty(start, last)) { + errno = EEXIST; + goto fail; + } + + /* + * With reserved_va, the entire address space is mmaped in the + * host to ensure it isn't accidentally used for something else. + * We have just checked that the guest address is not mapped + * within the guest, but need to replace the host reservation. + * + * Without reserved_va, despite the guest address check above, + * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite + * any host address mappings. + */ + if (reserved_va) { + flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED; + } } /* From ddcdd8c48fc48b2d528756fc98f1ce0ec3d7b617 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Wed, 2 Aug 2023 16:17:48 +0900 Subject: [PATCH 09/16] linux-user: Fix MAP_FIXED_NOREPLACE on old kernels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The man page states: > Note that older kernels which do not recognize the MAP_FIXED_NOREPLACE > flag will typically (upon detecting a collision with a preexisting > mapping) fall back to a “non-MAP_FIXED” type of behavior: they will > return an address that is different from the requested address. > Therefore, backward-compatible software should check the returned > address against the requested address. https://man7.org/linux/man-pages/man2/mmap.2.html Signed-off-by: Akihiko Odaki Message-Id: <20230802071754.14876-3-akihiko.odaki@daynix.com> Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson --- linux-user/mmap.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index a11c630a7b..90b3ef2140 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -263,7 +263,11 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, void *p = mmap(host_start, qemu_host_page_size, target_to_host_prot(prot), flags | MAP_ANONYMOUS, -1, 0); - if (p == MAP_FAILED) { + if (p != host_start) { + if (p != MAP_FAILED) { + munmap(p, qemu_host_page_size); + errno = EEXIST; + } return false; } prot_old = prot; @@ -687,17 +691,25 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, /* map the middle (easier) */ if (real_start < real_last) { - void *p; + void *p, *want_p; off_t offset1; + size_t len1; if (flags & MAP_ANONYMOUS) { offset1 = 0; } else { offset1 = offset + real_start - start; } - p = mmap(g2h_untagged(real_start), real_last - real_start + 1, - target_to_host_prot(target_prot), flags, fd, offset1); - if (p == MAP_FAILED) { + len1 = real_last - real_start + 1; + want_p = g2h_untagged(real_start); + + p = mmap(want_p, len1, target_to_host_prot(target_prot), + flags, fd, offset1); + if (p != want_p) { + if (p != MAP_FAILED) { + munmap(p, len1); + errno = EEXIST; + } goto fail; } passthrough_start = real_start; From c6cc059eca18d9f6e4e26bb8b6d1135ddb35d81a Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Wed, 2 Aug 2023 16:17:49 +0900 Subject: [PATCH 10/16] linux-user: Do not call get_errno() in do_brk() Later the returned value is compared with -1, and negated errno is not expected. Fixes: 00faf08c95 ("linux-user: Don't use MAP_FIXED in do_brk()") Reviewed-by: Helge Deller Signed-off-by: Akihiko Odaki Message-Id: <20230802071754.14876-4-akihiko.odaki@daynix.com> Signed-off-by: Richard Henderson --- linux-user/syscall.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 95727a816a..b08276bdf8 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -862,9 +862,9 @@ abi_long do_brk(abi_ulong brk_val) */ if (new_host_brk_page > brk_page) { new_alloc_size = new_host_brk_page - brk_page; - mapped_addr = get_errno(target_mmap(brk_page, new_alloc_size, - PROT_READ|PROT_WRITE, - MAP_ANON|MAP_PRIVATE, 0, 0)); + mapped_addr = target_mmap(brk_page, new_alloc_size, + PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); } else { new_alloc_size = 0; mapped_addr = brk_page; From e69e032d1a8ee8d754ca119009a3c2c997f8bb30 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Wed, 2 Aug 2023 16:17:50 +0900 Subject: [PATCH 11/16] linux-user: Use MAP_FIXED_NOREPLACE for do_brk() MAP_FIXED_NOREPLACE can ensure the mapped address is fixed without concerning that the new mapping overwrites something else. Signed-off-by: Akihiko Odaki Message-Id: <20230802071754.14876-5-akihiko.odaki@daynix.com> Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson --- linux-user/syscall.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index b08276bdf8..f64024273f 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -854,17 +854,12 @@ abi_long do_brk(abi_ulong brk_val) return target_brk; } - /* We need to allocate more memory after the brk... Note that - * we don't use MAP_FIXED because that will map over the top of - * any existing mapping (like the one with the host libc or qemu - * itself); instead we treat "mapped but at wrong address" as - * a failure and unmap again. - */ if (new_host_brk_page > brk_page) { new_alloc_size = new_host_brk_page - brk_page; mapped_addr = target_mmap(brk_page, new_alloc_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + MAP_FIXED_NOREPLACE | MAP_ANON | MAP_PRIVATE, + -1, 0); } else { new_alloc_size = 0; mapped_addr = brk_page; @@ -883,12 +878,6 @@ abi_long do_brk(abi_ulong brk_val) target_brk = brk_val; brk_page = new_host_brk_page; return target_brk; - } else if (mapped_addr != -1) { - /* Mapped but at wrong address, meaning there wasn't actually - * enough space for this brk. - */ - target_munmap(mapped_addr, new_alloc_size); - mapped_addr = -1; } #if defined(TARGET_ALPHA) From cb9d5d1fda0bc2312fc0c779b4ea1d7bf826f31f Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Wed, 2 Aug 2023 16:17:51 +0900 Subject: [PATCH 12/16] linux-user: Do nothing if too small brk is specified Linux 6.4.7 does nothing when a value smaller than the initial brk is specified. Fixes: 86f04735ac ("linux-user: Fix brk() to release pages") Reviewed-by: Helge Deller Signed-off-by: Akihiko Odaki Message-Id: <20230802071754.14876-6-akihiko.odaki@daynix.com> Signed-off-by: Richard Henderson --- linux-user/syscall.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index f64024273f..e1436a3962 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -820,14 +820,14 @@ abi_long do_brk(abi_ulong brk_val) /* brk pointers are always untagged */ - /* return old brk value if brk_val unchanged or zero */ - if (!brk_val || brk_val == target_brk) { + /* return old brk value if brk_val unchanged */ + if (brk_val == target_brk) { return target_brk; } /* do not allow to shrink below initial brk value */ if (brk_val < initial_target_brk) { - brk_val = initial_target_brk; + return target_brk; } new_brk = TARGET_PAGE_ALIGN(brk_val); From 2aea137a425a87b930a33590177b04368fd7cc12 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Wed, 2 Aug 2023 16:17:52 +0900 Subject: [PATCH 13/16] linux-user: Do not align brk with host page size do_brk() minimizes calls into target_mmap() by aligning the address with host page size, which is potentially larger than the target page size. However, the current implementation of this optimization has two bugs: - The start of brk is rounded up with the host page size while brk advertises an address aligned with the target page size as the beginning of brk. This makes the beginning of brk unmapped. - Content clearing after mapping is flawed. The size to clear is specified as HOST_PAGE_ALIGN(brk_page) - brk_page, but brk_page is aligned with the host page size so it is always zero. This optimization actually has no practical benefit. It makes difference when brk() is called multiple times with values in a range of the host page size. However, sophisticated memory allocators try to avoid to make such frequent brk() calls. For example, glibc 2.37 calls brk() to shrink the heap only when there is a room more than 128 KiB. It is rare to have a page size larger than 128 KiB if it happens. Let's remove the optimization to fix the bugs and make the code simpler. Fixes: 86f04735ac ("linux-user: Fix brk() to release pages") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1616 Signed-off-by: Akihiko Odaki Message-Id: <20230802071754.14876-7-akihiko.odaki@daynix.com> Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson --- linux-user/elfload.c | 4 ++-- linux-user/syscall.c | 54 ++++++++++---------------------------------- 2 files changed, 14 insertions(+), 44 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index a299ba7300..36e4026f05 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -3679,8 +3679,8 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) * to mmap pages in this space. */ if (info->reserve_brk) { - abi_ulong start_brk = HOST_PAGE_ALIGN(info->brk); - abi_ulong end_brk = HOST_PAGE_ALIGN(info->brk + info->reserve_brk); + abi_ulong start_brk = TARGET_PAGE_ALIGN(info->brk); + abi_ulong end_brk = TARGET_PAGE_ALIGN(info->brk + info->reserve_brk); target_munmap(start_brk, end_brk - start_brk); } diff --git a/linux-user/syscall.c b/linux-user/syscall.c index e1436a3962..7c2c2f6e2f 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -802,81 +802,51 @@ static inline int host_to_target_sock_type(int host_type) } static abi_ulong target_brk, initial_target_brk; -static abi_ulong brk_page; void target_set_brk(abi_ulong new_brk) { target_brk = TARGET_PAGE_ALIGN(new_brk); initial_target_brk = target_brk; - brk_page = HOST_PAGE_ALIGN(target_brk); } /* do_brk() must return target values and target errnos. */ abi_long do_brk(abi_ulong brk_val) { abi_long mapped_addr; - abi_ulong new_alloc_size; - abi_ulong new_brk, new_host_brk_page; + abi_ulong new_brk; + abi_ulong old_brk; /* brk pointers are always untagged */ - /* return old brk value if brk_val unchanged */ - if (brk_val == target_brk) { - return target_brk; - } - /* do not allow to shrink below initial brk value */ if (brk_val < initial_target_brk) { return target_brk; } new_brk = TARGET_PAGE_ALIGN(brk_val); - new_host_brk_page = HOST_PAGE_ALIGN(brk_val); + old_brk = TARGET_PAGE_ALIGN(target_brk); - /* brk_val and old target_brk might be on the same page */ - if (new_brk == TARGET_PAGE_ALIGN(target_brk)) { - /* empty remaining bytes in (possibly larger) host page */ - memset(g2h_untagged(new_brk), 0, new_host_brk_page - new_brk); + /* new and old target_brk might be on the same page */ + if (new_brk == old_brk) { target_brk = brk_val; return target_brk; } /* Release heap if necesary */ - if (new_brk < target_brk) { - /* empty remaining bytes in (possibly larger) host page */ - memset(g2h_untagged(new_brk), 0, new_host_brk_page - new_brk); - - /* free unused host pages and set new brk_page */ - target_munmap(new_host_brk_page, brk_page - new_host_brk_page); - brk_page = new_host_brk_page; + if (new_brk < old_brk) { + target_munmap(new_brk, old_brk - new_brk); target_brk = brk_val; return target_brk; } - if (new_host_brk_page > brk_page) { - new_alloc_size = new_host_brk_page - brk_page; - mapped_addr = target_mmap(brk_page, new_alloc_size, - PROT_READ | PROT_WRITE, - MAP_FIXED_NOREPLACE | MAP_ANON | MAP_PRIVATE, - -1, 0); - } else { - new_alloc_size = 0; - mapped_addr = brk_page; - } - - if (mapped_addr == brk_page) { - /* Heap contents are initialized to zero, as for anonymous - * mapped pages. Technically the new pages are already - * initialized to zero since they *are* anonymous mapped - * pages, however we have to take care with the contents that - * come from the remaining part of the previous page: it may - * contains garbage data due to a previous heap usage (grown - * then shrunken). */ - memset(g2h_untagged(brk_page), 0, HOST_PAGE_ALIGN(brk_page) - brk_page); + mapped_addr = target_mmap(old_brk, new_brk - old_brk, + PROT_READ | PROT_WRITE, + MAP_FIXED_NOREPLACE | MAP_ANON | MAP_PRIVATE, + -1, 0); + if (mapped_addr == old_brk) { target_brk = brk_val; - brk_page = new_host_brk_page; return target_brk; } From 0662a626a712dde5f8a91e1b078644332336e9fa Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 2 Aug 2023 16:25:37 -0700 Subject: [PATCH 14/16] linux-user: Properly set image_info.brk in flatload The heap starts at "brk" not "start_brk". With this fixed, image_info.start_brk is unused and may be removed. Tested-by: Helge Deller Reviewed-by: Helge Deller Reviewed-by: Akihiko Odaki Signed-off-by: Richard Henderson --- linux-user/flatload.c | 2 +- linux-user/main.c | 2 -- linux-user/qemu.h | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/linux-user/flatload.c b/linux-user/flatload.c index 5efec2630e..8f5e9f489b 100644 --- a/linux-user/flatload.c +++ b/linux-user/flatload.c @@ -811,7 +811,7 @@ int load_flt_binary(struct linux_binprm *bprm, struct image_info *info) info->end_code = libinfo[0].start_code + libinfo[0].text_len; info->start_data = libinfo[0].start_data; info->end_data = libinfo[0].end_data; - info->start_brk = libinfo[0].start_brk; + info->brk = libinfo[0].start_brk; info->start_stack = sp; info->stack_limit = libinfo[0].start_brk; info->entry = start_addr; diff --git a/linux-user/main.c b/linux-user/main.c index dba67ffa36..556956c363 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -920,8 +920,6 @@ int main(int argc, char **argv, char **envp) fprintf(f, "page layout changed following binary load\n"); page_dump(f); - fprintf(f, "start_brk 0x" TARGET_ABI_FMT_lx "\n", - info->start_brk); fprintf(f, "end_code 0x" TARGET_ABI_FMT_lx "\n", info->end_code); fprintf(f, "start_code 0x" TARGET_ABI_FMT_lx "\n", diff --git a/linux-user/qemu.h b/linux-user/qemu.h index 802794db63..2046a23037 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -29,7 +29,6 @@ struct image_info { abi_ulong end_code; abi_ulong start_data; abi_ulong end_data; - abi_ulong start_brk; abi_ulong brk; abi_ulong reserve_brk; abi_ulong start_mmap; From 62cbf0815005f0ba91b0c36fbcabd479c6e3a2f2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 2 Aug 2023 14:02:46 -0700 Subject: [PATCH 15/16] linux-user: Remove last_brk This variable is unused. Reviewed-by: Helge Deller Reviewed-by: Akihiko Odaki Signed-off-by: Richard Henderson --- linux-user/mmap.c | 2 -- linux-user/user-mmap.h | 1 - 2 files changed, 3 deletions(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 90b3ef2140..eb04fab8ab 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -314,8 +314,6 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, #endif abi_ulong mmap_next_start = TASK_UNMAPPED_BASE; -unsigned long last_brk; - /* * Subroutine of mmap_find_vma, used when we have pre-allocated * a chunk of guest address space. diff --git a/linux-user/user-mmap.h b/linux-user/user-mmap.h index 3fc986f92f..7265c2c116 100644 --- a/linux-user/user-mmap.h +++ b/linux-user/user-mmap.h @@ -26,7 +26,6 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, abi_ulong new_size, unsigned long flags, abi_ulong new_addr); abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice); -extern unsigned long last_brk; extern abi_ulong mmap_next_start; abi_ulong mmap_find_vma(abi_ulong, abi_ulong, abi_ulong); void mmap_fork_start(void); From 3c4a8a8fdae95611ca48e7c40e56fbf60c3267f4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 4 Aug 2023 09:33:58 -0700 Subject: [PATCH 16/16] bsd-user: Remove last_brk This variable is unused. Signed-off-by: Richard Henderson --- bsd-user/mmap.c | 2 -- bsd-user/qemu.h | 1 - 2 files changed, 3 deletions(-) diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c index b62a69bd07..8e148a2ea3 100644 --- a/bsd-user/mmap.c +++ b/bsd-user/mmap.c @@ -214,8 +214,6 @@ static int mmap_frag(abi_ulong real_start, #endif abi_ulong mmap_next_start = TASK_UNMAPPED_BASE; -unsigned long last_brk; - /* * Subroutine of mmap_find_vma, used when we have pre-allocated a chunk of guest * address space. diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h index edf9602f9b..8f2d6a3c78 100644 --- a/bsd-user/qemu.h +++ b/bsd-user/qemu.h @@ -232,7 +232,6 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, abi_ulong new_size, unsigned long flags, abi_ulong new_addr); int target_msync(abi_ulong start, abi_ulong len, int flags); -extern unsigned long last_brk; extern abi_ulong mmap_next_start; abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size); void TSA_NO_TSA mmap_fork_start(void);