linux-user: Adjust guest image layout vs reserved_va

linux-user: Do not adjust image mapping for host page size
 linux-user: Adjust initial brk when interpreter is close to executable
 util/selfmap: Rewrite using qemu/interval-tree.h
 linux-user: Rewrite probe_guest_base
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmTSrp4dHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV9lTQf/W/Tbd6CFnZpVE8Sb
 BPrhdmo+x6Jftt1Ha66b/4xnasX7DuVaI1ECDh4CQQKIOh9A4LETx6ue9/UGi4vT
 Fe4UrrJcAjt/CPaZhwXniJM9CvEnw1gkl3AgKAtZOBEConuPnkTiSWjySmCt3T4r
 EGQxDe0HLpWYavNtvyywak/sEbwOD4hNAunFpJB6PLZ+KEoCDZwtcQdl55kg5bIt
 WBMgUSXnWhC45t+26OcSDeHovqxHoA647H10T0y0U6bNVkW0tRW51xCTvHt+iDyR
 s8UOCe1Q+w8F18fN68HIWBJ6NCzUts/AmQrWwc/MWiK1z91/ht5mlKAuNYnoZ6jH
 htCSEA==
 =ERAI
 -----END PGP SIGNATURE-----

Merge tag 'pull-lu-20230808' of https://gitlab.com/rth7680/qemu into staging

linux-user: Adjust guest image layout vs reserved_va
linux-user: Do not adjust image mapping for host page size
linux-user: Adjust initial brk when interpreter is close to executable
util/selfmap: Rewrite using qemu/interval-tree.h
linux-user: Rewrite probe_guest_base

# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmTSrp4dHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV9lTQf/W/Tbd6CFnZpVE8Sb
# BPrhdmo+x6Jftt1Ha66b/4xnasX7DuVaI1ECDh4CQQKIOh9A4LETx6ue9/UGi4vT
# Fe4UrrJcAjt/CPaZhwXniJM9CvEnw1gkl3AgKAtZOBEConuPnkTiSWjySmCt3T4r
# EGQxDe0HLpWYavNtvyywak/sEbwOD4hNAunFpJB6PLZ+KEoCDZwtcQdl55kg5bIt
# WBMgUSXnWhC45t+26OcSDeHovqxHoA647H10T0y0U6bNVkW0tRW51xCTvHt+iDyR
# s8UOCe1Q+w8F18fN68HIWBJ6NCzUts/AmQrWwc/MWiK1z91/ht5mlKAuNYnoZ6jH
# htCSEA==
# =ERAI
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 08 Aug 2023 02:07:42 PM PDT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate]

* tag 'pull-lu-20230808' of https://gitlab.com/rth7680/qemu:
  linux-user: Rewrite non-fixed probe_guest_base
  linux-user: Rewrite fixed probe_guest_base
  linux-user: Consolidate guest bounds check in probe_guest_base
  linux-user: Remove duplicate CPU_LOG_PAGE from probe_guest_base
  util/selfmap: Rewrite using qemu/interval-tree.h
  linux-user: Use zero_bss for PT_LOAD with no file contents too
  linux-user: Do not adjust zero_bss for host page size
  linux-user: Do not adjust image mapping for host page size
  linux-user: Adjust initial brk when interpreter is close to executable
  linux-user: Use elf_et_dyn_base for ET_DYN with interpreter
  linux-user: Use MAP_FIXED_NOREPLACE for initial image mmap
  linux-user: Define ELF_ET_DYN_BASE in $guest/target_mman.h
  linux-user: Define TASK_UNMAPPED_BASE in $guest/target_mman.h
  linux-user: Adjust task_unmapped_base for reserved_va

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2023-08-08 14:10:06 -07:00
commit 32e07fddc6
28 changed files with 803 additions and 478 deletions

View File

@ -9,9 +9,10 @@
#ifndef SELFMAP_H
#define SELFMAP_H
#include "qemu/interval-tree.h"
typedef struct {
unsigned long start;
unsigned long end;
IntervalTreeNode itree;
/* flags */
bool is_read;
@ -19,26 +20,25 @@ typedef struct {
bool is_exec;
bool is_priv;
unsigned long offset;
gchar *dev;
uint64_t offset;
uint64_t inode;
gchar *path;
const char *path;
char dev[];
} MapInfo;
/**
* read_self_maps:
*
* Read /proc/self/maps and return a list of MapInfo structures.
* Read /proc/self/maps and return a tree of MapInfo structures.
*/
GSList *read_self_maps(void);
IntervalTreeRoot *read_self_maps(void);
/**
* free_self_maps:
* @info: a GSlist
* @info: an interval tree
*
* Free a list of MapInfo structures.
* Free a tree of MapInfo structures.
*/
void free_self_maps(GSList *info);
void free_self_maps(IntervalTreeRoot *root);
#endif /* SELFMAP_H */

View File

@ -4,6 +4,19 @@
#define TARGET_PROT_BTI 0x10
#define TARGET_PROT_MTE 0x20
/*
* arch/arm64/include/asm/processor.h:
*
* TASK_UNMAPPED_BASE DEFAULT_MAP_WINDOW / 4
* DEFAULT_MAP_WINDOW DEFAULT_MAP_WINDOW_64
* DEFAULT_MAP_WINDOW_64 UL(1) << VA_BITS_MIN
* VA_BITS_MIN 48 (unless explicitly configured smaller)
*/
#define TASK_UNMAPPED_BASE (1ull << (48 - 2))
/* arch/arm64/include/asm/elf.h */
#define ELF_ET_DYN_BASE TARGET_PAGE_ALIGN((1ull << 48) / 3 * 2)
#include "../generic/target_mman.h"
#endif

View File

@ -20,6 +20,17 @@
#define TARGET_MS_SYNC 2
#define TARGET_MS_INVALIDATE 4
/*
* arch/alpha/include/asm/processor.h:
*
* TASK_UNMAPPED_BASE TASK_SIZE / 2
* TASK_SIZE 0x40000000000UL
*/
#define TASK_UNMAPPED_BASE 0x20000000000ull
/* arch/alpha/include/asm/elf.h */
#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000)
#include "../generic/target_mman.h"
#endif

View File

@ -1 +1,12 @@
/*
* arch/arm/include/asm/memory.h
* TASK_UNMAPPED_BASE ALIGN(TASK_SIZE / 3, SZ_16M)
* TASK_SIZE CONFIG_PAGE_OFFSET
* CONFIG_PAGE_OFFSET 0xC0000000 (default in Kconfig)
*/
#define TASK_UNMAPPED_BASE 0x40000000
/* arch/arm/include/asm/elf.h */
#define ELF_ET_DYN_BASE 0x00400000
#include "../generic/target_mman.h"

View File

@ -1 +1,13 @@
/*
* arch/cris/include/asm/processor.h:
* TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3))
*
* arch/cris/include/arch-v32/arch/processor.h
* TASK_SIZE 0xb0000000
*/
#define TASK_UNMAPPED_BASE TARGET_PAGE_ALIGN(0xb0000000 / 3)
/* arch/cris/include/uapi/asm/elf.h */
#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE * 2)
#include "../generic/target_mman.h"

View File

@ -1960,15 +1960,6 @@ struct exec
#define ZMAGIC 0413
#define QMAGIC 0314
/* Necessary parameters */
#define TARGET_ELF_EXEC_PAGESIZE \
(((eppnt->p_align & ~qemu_host_page_mask) != 0) ? \
TARGET_PAGE_SIZE : MAX(qemu_host_page_size, TARGET_PAGE_SIZE))
#define TARGET_ELF_PAGELENGTH(_v) ROUND_UP((_v), TARGET_ELF_EXEC_PAGESIZE)
#define TARGET_ELF_PAGESTART(_v) ((_v) & \
~(abi_ulong)(TARGET_ELF_EXEC_PAGESIZE-1))
#define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE-1))
#define DLINFO_ITEMS 16
static inline void memcpy_fromfs(void * to, const void * from, unsigned long n)
@ -2220,47 +2211,37 @@ static abi_ulong setup_arg_pages(struct linux_binprm *bprm,
}
}
/* Map and zero the bss. We need to explicitly zero any fractional pages
after the data section (i.e. bss). */
static void zero_bss(abi_ulong elf_bss, abi_ulong last_bss, int prot)
/**
* zero_bss:
*
* Map and zero the bss. We need to explicitly zero any fractional pages
* after the data section (i.e. bss). Return false on mapping failure.
*/
static bool zero_bss(abi_ulong start_bss, abi_ulong end_bss, int prot)
{
uintptr_t host_start, host_map_start, host_end;
abi_ulong align_bss;
last_bss = TARGET_PAGE_ALIGN(last_bss);
align_bss = TARGET_PAGE_ALIGN(start_bss);
end_bss = TARGET_PAGE_ALIGN(end_bss);
/* ??? There is confusion between qemu_real_host_page_size and
qemu_host_page_size here and elsewhere in target_mmap, which
may lead to the end of the data section mapping from the file
not being mapped. At least there was an explicit test and
comment for that here, suggesting that "the file size must
be known". The comment probably pre-dates the introduction
of the fstat system call in target_mmap which does in fact
find out the size. What isn't clear is if the workaround
here is still actually needed. For now, continue with it,
but merge it with the "normal" mmap that would allocate the bss. */
if (start_bss < align_bss) {
int flags = page_get_flags(start_bss);
host_start = (uintptr_t) g2h_untagged(elf_bss);
host_end = (uintptr_t) g2h_untagged(last_bss);
host_map_start = REAL_HOST_PAGE_ALIGN(host_start);
if (host_map_start < host_end) {
void *p = mmap((void *)host_map_start, host_end - host_map_start,
prot, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (p == MAP_FAILED) {
perror("cannot mmap brk");
exit(-1);
if (!(flags & PAGE_VALID)) {
/* Map the start of the bss. */
align_bss -= TARGET_PAGE_SIZE;
} else if (flags & PAGE_WRITE) {
/* The page is already mapped writable. */
memset(g2h_untagged(start_bss), 0, align_bss - start_bss);
} else {
/* Read-only zeros? */
g_assert_not_reached();
}
}
/* Ensure that the bss page(s) are valid */
if ((page_get_flags(last_bss-1) & prot) != prot) {
page_set_flags(elf_bss & TARGET_PAGE_MASK, last_bss - 1,
prot | PAGE_VALID);
}
if (host_start < host_map_start) {
memset((void *)host_start, 0, host_map_start - host_start);
}
return align_bss >= end_bss ||
target_mmap(align_bss, end_bss - align_bss, prot,
MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) != -1;
}
#if defined(TARGET_ARM)
@ -2523,6 +2504,157 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
#endif
#endif
/**
* pgb_try_mmap:
* @addr: host start address
* @addr_last: host last address
* @keep: do not unmap the probe region
*
* Return 1 if [@addr, @addr_last] is not mapped in the host,
* return 0 if it is not available to map, and -1 on mmap error.
* If @keep, the region is left mapped on success, otherwise unmapped.
*/
static int pgb_try_mmap(uintptr_t addr, uintptr_t addr_last, bool keep)
{
size_t size = addr_last - addr + 1;
void *p = mmap((void *)addr, size, PROT_NONE,
MAP_ANONYMOUS | MAP_PRIVATE |
MAP_NORESERVE | MAP_FIXED_NOREPLACE, -1, 0);
int ret;
if (p == MAP_FAILED) {
return errno == EEXIST ? 0 : -1;
}
ret = p == (void *)addr;
if (!keep || !ret) {
munmap(p, size);
}
return ret;
}
/**
* pgb_try_mmap_skip_brk(uintptr_t addr, uintptr_t size, uintptr_t brk)
* @addr: host address
* @addr_last: host last address
* @brk: host brk
*
* Like pgb_try_mmap, but additionally reserve some memory following brk.
*/
static int pgb_try_mmap_skip_brk(uintptr_t addr, uintptr_t addr_last,
uintptr_t brk, bool keep)
{
uintptr_t brk_last = brk + 16 * MiB - 1;
/* Do not map anything close to the host brk. */
if (addr <= brk_last && brk <= addr_last) {
return 0;
}
return pgb_try_mmap(addr, addr_last, keep);
}
/**
* pgb_try_mmap_set:
* @ga: set of guest addrs
* @base: guest_base
* @brk: host brk
*
* Return true if all @ga can be mapped by the host at @base.
* On success, retain the mapping at index 0 for reserved_va.
*/
typedef struct PGBAddrs {
uintptr_t bounds[3][2]; /* start/last pairs */
int nbounds;
} PGBAddrs;
static bool pgb_try_mmap_set(const PGBAddrs *ga, uintptr_t base, uintptr_t brk)
{
for (int i = ga->nbounds - 1; i >= 0; --i) {
if (pgb_try_mmap_skip_brk(ga->bounds[i][0] + base,
ga->bounds[i][1] + base,
brk, i == 0 && reserved_va) <= 0) {
return false;
}
}
return true;
}
/**
* pgb_addr_set:
* @ga: output set of guest addrs
* @guest_loaddr: guest image low address
* @guest_loaddr: guest image high address
* @identity: create for identity mapping
*
* Fill in @ga with the image, COMMPAGE and NULL page.
*/
static bool pgb_addr_set(PGBAddrs *ga, abi_ulong guest_loaddr,
abi_ulong guest_hiaddr, bool try_identity)
{
int n;
/*
* With a low commpage, or a guest mapped very low,
* we may not be able to use the identity map.
*/
if (try_identity) {
if (LO_COMMPAGE != -1 && LO_COMMPAGE < mmap_min_addr) {
return false;
}
if (guest_loaddr != 0 && guest_loaddr < mmap_min_addr) {
return false;
}
}
memset(ga, 0, sizeof(*ga));
n = 0;
if (reserved_va) {
ga->bounds[n][0] = try_identity ? mmap_min_addr : 0;
ga->bounds[n][1] = reserved_va;
n++;
/* LO_COMMPAGE and NULL handled by reserving from 0. */
} else {
/* Add any LO_COMMPAGE or NULL page. */
if (LO_COMMPAGE != -1) {
ga->bounds[n][0] = 0;
ga->bounds[n][1] = LO_COMMPAGE + TARGET_PAGE_SIZE - 1;
n++;
} else if (!try_identity) {
ga->bounds[n][0] = 0;
ga->bounds[n][1] = TARGET_PAGE_SIZE - 1;
n++;
}
/* Add the guest image for ET_EXEC. */
if (guest_loaddr) {
ga->bounds[n][0] = guest_loaddr;
ga->bounds[n][1] = guest_hiaddr;
n++;
}
}
/*
* Temporarily disable
* "comparison is always false due to limited range of data type"
* due to comparison between unsigned and (possible) 0.
*/
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtype-limits"
/* Add any HI_COMMPAGE not covered by reserved_va. */
if (reserved_va < HI_COMMPAGE) {
ga->bounds[n][0] = HI_COMMPAGE & qemu_host_page_mask;
ga->bounds[n][1] = HI_COMMPAGE + TARGET_PAGE_SIZE - 1;
n++;
}
#pragma GCC diagnostic pop
ga->nbounds = n;
return true;
}
static void pgb_fail_in_use(const char *image_name)
{
error_report("%s: requires virtual address space that is in use "
@ -2531,19 +2663,171 @@ static void pgb_fail_in_use(const char *image_name)
exit(EXIT_FAILURE);
}
static void pgb_have_guest_base(const char *image_name, abi_ulong guest_loaddr,
abi_ulong guest_hiaddr, long align)
static void pgb_fixed(const char *image_name, uintptr_t guest_loaddr,
uintptr_t guest_hiaddr, uintptr_t align)
{
const int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE;
void *addr, *test;
PGBAddrs ga;
uintptr_t brk = (uintptr_t)sbrk(0);
if (!QEMU_IS_ALIGNED(guest_base, align)) {
fprintf(stderr, "Requested guest base %p does not satisfy "
"host minimum alignment (0x%lx)\n",
"host minimum alignment (0x%" PRIxPTR ")\n",
(void *)guest_base, align);
exit(EXIT_FAILURE);
}
if (!pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, !guest_base)
|| !pgb_try_mmap_set(&ga, guest_base, brk)) {
pgb_fail_in_use(image_name);
}
}
/**
* pgb_find_fallback:
*
* This is a fallback method for finding holes in the host address space
* if we don't have the benefit of being able to access /proc/self/map.
* It can potentially take a very long time as we can only dumbly iterate
* up the host address space seeing if the allocation would work.
*/
static uintptr_t pgb_find_fallback(const PGBAddrs *ga, uintptr_t align,
uintptr_t brk)
{
/* TODO: come up with a better estimate of how much to skip. */
uintptr_t skip = sizeof(uintptr_t) == 4 ? MiB : GiB;
for (uintptr_t base = skip; ; base += skip) {
base = ROUND_UP(base, align);
if (pgb_try_mmap_set(ga, base, brk)) {
return base;
}
if (base >= -skip) {
return -1;
}
}
}
static uintptr_t pgb_try_itree(const PGBAddrs *ga, uintptr_t base,
IntervalTreeRoot *root)
{
for (int i = ga->nbounds - 1; i >= 0; --i) {
uintptr_t s = base + ga->bounds[i][0];
uintptr_t l = base + ga->bounds[i][1];
IntervalTreeNode *n;
if (l < s) {
/* Wraparound. Skip to advance S to mmap_min_addr. */
return mmap_min_addr - s;
}
n = interval_tree_iter_first(root, s, l);
if (n != NULL) {
/* Conflict. Skip to advance S to LAST + 1. */
return n->last - s + 1;
}
}
return 0; /* success */
}
static uintptr_t pgb_find_itree(const PGBAddrs *ga, IntervalTreeRoot *root,
uintptr_t align, uintptr_t brk)
{
uintptr_t last = mmap_min_addr;
uintptr_t base, skip;
while (true) {
base = ROUND_UP(last, align);
if (base < last) {
return -1;
}
skip = pgb_try_itree(ga, base, root);
if (skip == 0) {
break;
}
last = base + skip;
if (last < base) {
return -1;
}
}
/*
* We've chosen 'base' based on holes in the interval tree,
* but we don't yet know if it is a valid host address.
* Because it is the first matching hole, if the host addresses
* are invalid we know there are no further matches.
*/
return pgb_try_mmap_set(ga, base, brk) ? base : -1;
}
static void pgb_dynamic(const char *image_name, uintptr_t guest_loaddr,
uintptr_t guest_hiaddr, uintptr_t align)
{
IntervalTreeRoot *root;
uintptr_t brk, ret;
PGBAddrs ga;
assert(QEMU_IS_ALIGNED(guest_loaddr, align));
/* Try the identity map first. */
if (pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, true)) {
brk = (uintptr_t)sbrk(0);
if (pgb_try_mmap_set(&ga, 0, brk)) {
guest_base = 0;
return;
}
}
/*
* Rebuild the address set for non-identity map.
* This differs in the mapping of the guest NULL page.
*/
pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, false);
root = read_self_maps();
/* Read brk after we've read the maps, which will malloc. */
brk = (uintptr_t)sbrk(0);
if (!root) {
ret = pgb_find_fallback(&ga, align, brk);
} else {
/*
* Reserve the area close to the host brk.
* This will be freed with the rest of the tree.
*/
IntervalTreeNode *b = g_new0(IntervalTreeNode, 1);
b->start = brk;
b->last = brk + 16 * MiB - 1;
interval_tree_insert(b, root);
ret = pgb_find_itree(&ga, root, align, brk);
free_self_maps(root);
}
if (ret == -1) {
int w = TARGET_LONG_BITS / 4;
error_report("%s: Unable to find a guest_base to satisfy all "
"guest address mapping requirements", image_name);
for (int i = 0; i < ga.nbounds; ++i) {
error_printf(" %0*" PRIx64 "-%0*" PRIx64 "\n",
w, (uint64_t)ga.bounds[i][0],
w, (uint64_t)ga.bounds[i][1]);
}
exit(EXIT_FAILURE);
}
guest_base = ret;
}
void probe_guest_base(const char *image_name, abi_ulong guest_loaddr,
abi_ulong guest_hiaddr)
{
/* In order to use host shmat, we must be able to honor SHMLBA. */
uintptr_t align = MAX(SHMLBA, qemu_host_page_size);
/* Sanity check the guest binary. */
if (reserved_va) {
if (guest_hiaddr > reserved_va) {
@ -2553,305 +2837,24 @@ static void pgb_have_guest_base(const char *image_name, abi_ulong guest_loaddr,
exit(EXIT_FAILURE);
}
} else {
#if HOST_LONG_BITS < TARGET_ABI_BITS
if ((guest_hiaddr - guest_base) > ~(uintptr_t)0) {
if (guest_hiaddr != (uintptr_t)guest_hiaddr) {
error_report("%s: requires more virtual address space "
"than the host can provide (0x%" PRIx64 ")",
image_name, (uint64_t)guest_hiaddr + 1 - guest_base);
image_name, (uint64_t)guest_hiaddr + 1);
exit(EXIT_FAILURE);
}
#endif
}
/*
* Expand the allocation to the entire reserved_va.
* Exclude the mmap_min_addr hole.
*/
if (reserved_va) {
guest_loaddr = (guest_base >= mmap_min_addr ? 0
: mmap_min_addr - guest_base);
guest_hiaddr = reserved_va;
}
/* Reserve the address space for the binary, or reserved_va. */
test = g2h_untagged(guest_loaddr);
addr = mmap(test, guest_hiaddr - guest_loaddr + 1, PROT_NONE, flags, -1, 0);
if (test != addr) {
pgb_fail_in_use(image_name);
}
qemu_log_mask(CPU_LOG_PAGE,
"%s: base @ %p for %" PRIu64 " bytes\n",
__func__, addr, (uint64_t)guest_hiaddr - guest_loaddr + 1);
}
/**
* pgd_find_hole_fallback: potential mmap address
* @guest_size: size of available space
* @brk: location of break
* @align: memory alignment
*
* This is a fallback method for finding a hole in the host address
* space if we don't have the benefit of being able to access
* /proc/self/map. It can potentially take a very long time as we can
* only dumbly iterate up the host address space seeing if the
* allocation would work.
*/
static uintptr_t pgd_find_hole_fallback(uintptr_t guest_size, uintptr_t brk,
long align, uintptr_t offset)
{
uintptr_t base;
/* Start (aligned) at the bottom and work our way up */
base = ROUND_UP(mmap_min_addr, align);
while (true) {
uintptr_t align_start, end;
align_start = ROUND_UP(base, align);
end = align_start + guest_size + offset;
/* if brk is anywhere in the range give ourselves some room to grow. */
if (align_start <= brk && brk < end) {
base = brk + (16 * MiB);
continue;
} else if (align_start + guest_size < align_start) {
/* we have run out of space */
return -1;
} else {
int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE |
MAP_FIXED_NOREPLACE;
void * mmap_start = mmap((void *) align_start, guest_size,
PROT_NONE, flags, -1, 0);
if (mmap_start != MAP_FAILED) {
munmap(mmap_start, guest_size);
if (mmap_start == (void *) align_start) {
qemu_log_mask(CPU_LOG_PAGE,
"%s: base @ %p for %" PRIdPTR" bytes\n",
__func__, mmap_start + offset, guest_size);
return (uintptr_t) mmap_start + offset;
}
}
base += qemu_host_page_size;
}
}
}
/* Return value for guest_base, or -1 if no hole found. */
static uintptr_t pgb_find_hole(uintptr_t guest_loaddr, uintptr_t guest_size,
long align, uintptr_t offset)
{
GSList *maps, *iter;
uintptr_t this_start, this_end, next_start, brk;
intptr_t ret = -1;
assert(QEMU_IS_ALIGNED(guest_loaddr, align));
maps = read_self_maps();
/* Read brk after we've read the maps, which will malloc. */
brk = (uintptr_t)sbrk(0);
if (!maps) {
return pgd_find_hole_fallback(guest_size, brk, align, offset);
}
/* The first hole is before the first map entry. */
this_start = mmap_min_addr;
for (iter = maps; iter;
this_start = next_start, iter = g_slist_next(iter)) {
uintptr_t align_start, hole_size;
this_end = ((MapInfo *)iter->data)->start;
next_start = ((MapInfo *)iter->data)->end;
align_start = ROUND_UP(this_start + offset, align);
/* Skip holes that are too small. */
if (align_start >= this_end) {
continue;
}
hole_size = this_end - align_start;
if (hole_size < guest_size) {
continue;
}
/* If this hole contains brk, give ourselves some room to grow. */
if (this_start <= brk && brk < this_end) {
hole_size -= guest_size;
if (sizeof(uintptr_t) == 8 && hole_size >= 1 * GiB) {
align_start += 1 * GiB;
} else if (hole_size >= 16 * MiB) {
align_start += 16 * MiB;
} else {
align_start = (this_end - guest_size) & -align;
if (align_start < this_start) {
continue;
}
}
}
/* Record the lowest successful match. */
if (ret < 0) {
ret = align_start;
}
/* If this hole contains the identity map, select it. */
if (align_start <= guest_loaddr &&
guest_loaddr + guest_size <= this_end) {
ret = 0;
}
/* If this hole ends above the identity map, stop looking. */
if (this_end >= guest_loaddr) {
break;
}
}
free_self_maps(maps);
if (ret != -1) {
qemu_log_mask(CPU_LOG_PAGE, "%s: base @ %" PRIxPTR
" for %" PRIuPTR " bytes\n",
__func__, ret, guest_size);
}
return ret;
}
static void pgb_static(const char *image_name, abi_ulong orig_loaddr,
abi_ulong orig_hiaddr, long align)
{
uintptr_t loaddr = orig_loaddr;
uintptr_t hiaddr = orig_hiaddr;
uintptr_t offset = 0;
uintptr_t addr;
if (hiaddr != orig_hiaddr) {
error_report("%s: requires virtual address space that the "
"host cannot provide (0x%" PRIx64 ")",
image_name, (uint64_t)orig_hiaddr + 1);
exit(EXIT_FAILURE);
}
loaddr &= -align;
if (HI_COMMPAGE) {
/*
* Extend the allocation to include the commpage.
* For a 64-bit host, this is just 4GiB; for a 32-bit host we
* need to ensure there is space bellow the guest_base so we
* can map the commpage in the place needed when the address
* arithmetic wraps around.
*/
if (sizeof(uintptr_t) == 8 || loaddr >= 0x80000000u) {
hiaddr = UINT32_MAX;
} else {
offset = -(HI_COMMPAGE & -align);
}
} else if (LO_COMMPAGE != -1) {
loaddr = MIN(loaddr, LO_COMMPAGE & -align);
}
addr = pgb_find_hole(loaddr, hiaddr - loaddr + 1, align, offset);
if (addr == -1) {
/*
* If HI_COMMPAGE, there *might* be a non-consecutive allocation
* that can satisfy both. But as the normal arm32 link base address
* is ~32k, and we extend down to include the commpage, making the
* overhead only ~96k, this is unlikely.
*/
error_report("%s: Unable to allocate %#zx bytes of "
"virtual address space", image_name,
(size_t)(hiaddr - loaddr));
exit(EXIT_FAILURE);
}
guest_base = addr;
qemu_log_mask(CPU_LOG_PAGE, "%s: base @ %"PRIxPTR" for %" PRIuPTR" bytes\n",
__func__, addr, hiaddr - loaddr);
}
static void pgb_dynamic(const char *image_name, long align)
{
/*
* The executable is dynamic and does not require a fixed address.
* All we need is a commpage that satisfies align.
* If we do not need a commpage, leave guest_base == 0.
*/
if (HI_COMMPAGE) {
uintptr_t addr, commpage;
/* 64-bit hosts should have used reserved_va. */
assert(sizeof(uintptr_t) == 4);
/*
* By putting the commpage at the first hole, that puts guest_base
* just above that, and maximises the positive guest addresses.
*/
commpage = HI_COMMPAGE & -align;
addr = pgb_find_hole(commpage, -commpage, align, 0);
assert(addr != -1);
guest_base = addr;
}
}
static void pgb_reserved_va(const char *image_name, abi_ulong guest_loaddr,
abi_ulong guest_hiaddr, long align)
{
int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE;
void *addr, *test;
if (guest_hiaddr > reserved_va) {
error_report("%s: requires more than reserved virtual "
"address space (0x%" PRIx64 " > 0x%lx)",
image_name, (uint64_t)guest_hiaddr, reserved_va);
exit(EXIT_FAILURE);
}
/* Widen the "image" to the entire reserved address space. */
pgb_static(image_name, 0, reserved_va, align);
/* osdep.h defines this as 0 if it's missing */
flags |= MAP_FIXED_NOREPLACE;
/* Reserve the memory on the host. */
assert(guest_base != 0);
test = g2h_untagged(0);
addr = mmap(test, reserved_va + 1, PROT_NONE, flags, -1, 0);
if (addr == MAP_FAILED || addr != test) {
error_report("Unable to reserve 0x%lx bytes of virtual address "
"space at %p (%s) for use as guest address space (check your "
"virtual memory ulimit setting, mmap_min_addr or reserve less "
"using qemu-user's -R option)",
reserved_va + 1, test, strerror(errno));
exit(EXIT_FAILURE);
}
qemu_log_mask(CPU_LOG_PAGE, "%s: base @ %p for %lu bytes\n",
__func__, addr, reserved_va + 1);
}
void probe_guest_base(const char *image_name, abi_ulong guest_loaddr,
abi_ulong guest_hiaddr)
{
/* In order to use host shmat, we must be able to honor SHMLBA. */
uintptr_t align = MAX(SHMLBA, qemu_host_page_size);
if (have_guest_base) {
pgb_have_guest_base(image_name, guest_loaddr, guest_hiaddr, align);
} else if (reserved_va) {
pgb_reserved_va(image_name, guest_loaddr, guest_hiaddr, align);
} else if (guest_loaddr) {
pgb_static(image_name, guest_loaddr, guest_hiaddr, align);
pgb_fixed(image_name, guest_loaddr, guest_hiaddr, align);
} else {
pgb_dynamic(image_name, align);
pgb_dynamic(image_name, guest_loaddr, guest_hiaddr, align);
}
/* Reserve and initialize the commpage. */
if (!init_guest_commpage()) {
/*
* With have_guest_base, the user has selected the address and
* we are trying to work with that. Otherwise, we have selected
* free space and init_guest_commpage must succeeded.
*/
assert(have_guest_base);
pgb_fail_in_use(image_name);
/* We have already probed for the commpage being free. */
g_assert_not_reached();
}
assert(QEMU_IS_ALIGNED(guest_base, align));
@ -3107,28 +3110,9 @@ static void load_elf_image(const char *image_name, int image_fd,
}
}
if (pinterp_name != NULL) {
/*
* This is the main executable.
*
* Reserve extra space for brk.
* We hold on to this space while placing the interpreter
* and the stack, lest they be placed immediately after
* the data segment and block allocation from the brk.
*
* 16MB is chosen as "large enough" without being so large as
* to allow the result to not fit with a 32-bit guest on a
* 32-bit host. However some 64 bit guests (e.g. s390x)
* attempt to place their heap further ahead and currently
* nothing stops them smashing into QEMUs address space.
*/
#if TARGET_LONG_BITS == 64
info->reserve_brk = 32 * MiB;
#else
info->reserve_brk = 16 * MiB;
#endif
hiaddr += info->reserve_brk;
load_addr = loaddr;
if (pinterp_name != NULL) {
if (ehdr->e_type == ET_EXEC) {
/*
* Make sure that the low address does not conflict with
@ -3136,31 +3120,55 @@ static void load_elf_image(const char *image_name, int image_fd,
*/
probe_guest_base(image_name, loaddr, hiaddr);
} else {
abi_ulong align;
/*
* The binary is dynamic, but we still need to
* select guest_base. In this case we pass a size.
*/
probe_guest_base(image_name, 0, hiaddr - loaddr);
/*
* Avoid collision with the loader by providing a different
* default load address.
*/
load_addr += elf_et_dyn_base;
/*
* TODO: Better support for mmap alignment is desirable.
* Since we do not have complete control over the guest
* address space, we prefer the kernel to choose some address
* rather than force the use of LOAD_ADDR via MAP_FIXED.
* But without MAP_FIXED we cannot guarantee alignment,
* only suggest it.
*/
align = pow2ceil(info->alignment);
if (align) {
load_addr &= -align;
}
}
}
/*
* Reserve address space for all of this.
*
* In the case of ET_EXEC, we supply MAP_FIXED so that we get
* exactly the address range that is required.
* In the case of ET_EXEC, we supply MAP_FIXED_NOREPLACE so that we get
* exactly the address range that is required. Without reserved_va,
* the guest address space is not isolated. We have attempted to avoid
* conflict with the host program itself via probe_guest_base, but using
* MAP_FIXED_NOREPLACE instead of MAP_FIXED provides an extra check.
*
* Otherwise this is ET_DYN, and we are searching for a location
* that can hold the memory space required. If the image is
* pre-linked, LOADDR will be non-zero, and the kernel should
* pre-linked, LOAD_ADDR will be non-zero, and the kernel should
* honor that address if it happens to be free.
*
* In both cases, we will overwrite pages in this range with mappings
* from the executable.
*/
load_addr = target_mmap(loaddr, (size_t)hiaddr - loaddr + 1, PROT_NONE,
load_addr = target_mmap(load_addr, (size_t)hiaddr - loaddr + 1, PROT_NONE,
MAP_PRIVATE | MAP_ANON | MAP_NORESERVE |
(ehdr->e_type == ET_EXEC ? MAP_FIXED : 0),
(ehdr->e_type == ET_EXEC ? MAP_FIXED_NOREPLACE : 0),
-1, 0);
if (load_addr == -1) {
goto exit_mmap;
@ -3195,7 +3203,8 @@ static void load_elf_image(const char *image_name, int image_fd,
info->end_code = 0;
info->start_data = -1;
info->end_data = 0;
info->brk = 0;
/* Usual start for brk is after all sections of the main executable. */
info->brk = TARGET_PAGE_ALIGN(hiaddr);
info->elf_flags = ehdr->e_flags;
prot_exec = PROT_EXEC;
@ -3221,7 +3230,7 @@ static void load_elf_image(const char *image_name, int image_fd,
for (i = 0; i < ehdr->e_phnum; i++) {
struct elf_phdr *eppnt = phdr + i;
if (eppnt->p_type == PT_LOAD) {
abi_ulong vaddr, vaddr_po, vaddr_ps, vaddr_ef, vaddr_em, vaddr_len;
abi_ulong vaddr, vaddr_po, vaddr_ps, vaddr_ef, vaddr_em;
int elf_prot = 0;
if (eppnt->p_flags & PF_R) {
@ -3235,8 +3244,8 @@ static void load_elf_image(const char *image_name, int image_fd,
}
vaddr = load_bias + eppnt->p_vaddr;
vaddr_po = TARGET_ELF_PAGEOFFSET(vaddr);
vaddr_ps = TARGET_ELF_PAGESTART(vaddr);
vaddr_po = vaddr & ~TARGET_PAGE_MASK;
vaddr_ps = vaddr & TARGET_PAGE_MASK;
vaddr_ef = vaddr + eppnt->p_filesz;
vaddr_em = vaddr + eppnt->p_memsz;
@ -3246,30 +3255,18 @@ static void load_elf_image(const char *image_name, int image_fd,
* but no backing file segment.
*/
if (eppnt->p_filesz != 0) {
vaddr_len = TARGET_ELF_PAGELENGTH(eppnt->p_filesz + vaddr_po);
error = target_mmap(vaddr_ps, vaddr_len, elf_prot,
MAP_PRIVATE | MAP_FIXED,
error = target_mmap(vaddr_ps, eppnt->p_filesz + vaddr_po,
elf_prot, MAP_PRIVATE | MAP_FIXED,
image_fd, eppnt->p_offset - vaddr_po);
if (error == -1) {
goto exit_mmap;
}
}
/*
* If the load segment requests extra zeros (e.g. bss), map it.
*/
if (eppnt->p_filesz < eppnt->p_memsz) {
zero_bss(vaddr_ef, vaddr_em, elf_prot);
}
} else if (eppnt->p_memsz != 0) {
vaddr_len = TARGET_ELF_PAGELENGTH(eppnt->p_memsz + vaddr_po);
error = target_mmap(vaddr_ps, vaddr_len, elf_prot,
MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS,
-1, 0);
if (error == -1) {
goto exit_mmap;
}
/* If the load segment requests extra zeros (e.g. bss), map it. */
if (vaddr_ef < vaddr_em &&
!zero_bss(vaddr_ef, vaddr_em, elf_prot)) {
goto exit_mmap;
}
/* Find the full program boundaries. */
@ -3289,9 +3286,6 @@ static void load_elf_image(const char *image_name, int image_fd,
info->end_data = vaddr_ef;
}
}
if (vaddr_em > info->brk) {
info->brk = vaddr_em;
}
#ifdef TARGET_MIPS
} else if (eppnt->p_type == PT_MIPS_ABIFLAGS) {
Mips_elf_abiflags_v0 abiflags;
@ -3620,6 +3614,19 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info)
if (elf_interpreter) {
load_elf_interp(elf_interpreter, &interp_info, bprm->buf);
/*
* While unusual because of ELF_ET_DYN_BASE, if we are unlucky
* with the mappings the interpreter can be loaded above but
* near the main executable, which can leave very little room
* for the heap.
* If the current brk has less than 16MB, use the end of the
* interpreter.
*/
if (interp_info.brk > info->brk &&
interp_info.load_bias - info->brk < 16 * MiB) {
info->brk = interp_info.brk;
}
/* If the program interpreter is one of these two, then assume
an iBCS2 image. Otherwise assume a native linux image. */
@ -3673,17 +3680,6 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info)
bprm->core_dump = &elf_core_dump;
#endif
/*
* If we reserved extra space for brk, release it now.
* The implementation of do_brk in syscalls.c expects to be able
* to mmap pages in this space.
*/
if (info->reserve_brk) {
abi_ulong start_brk = TARGET_PAGE_ALIGN(info->brk);
abi_ulong end_brk = TARGET_PAGE_ALIGN(info->brk + info->reserve_brk);
target_munmap(start_brk, end_brk - start_brk);
}
return 0;
}

View File

@ -1 +1,14 @@
/*
* arch/hexgon/include/asm/processor.h
* TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
*
* arch/hexagon/include/asm/mem-layout.h
* TASK_SIZE PAGE_OFFSET
* PAGE_OFFSET 0xc0000000
*/
#define TASK_UNMAPPED_BASE 0x40000000
/* arch/hexagon/include/asm/elf.h */
#define ELF_ET_DYN_BASE 0x08000000
#include "../generic/target_mman.h"

View File

@ -24,6 +24,12 @@
#define TARGET_MS_ASYNC 2
#define TARGET_MS_INVALIDATE 4
/* arch/parisc/include/asm/processor.h: DEFAULT_MAP_BASE32 */
#define TASK_UNMAPPED_BASE 0x40000000
/* arch/parisc/include/asm/elf.h */
#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x01000000)
#include "../generic/target_mman.h"
#endif

View File

@ -1 +1,17 @@
/*
* arch/x86/include/asm/processor.h:
* TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW)
* __TASK_UNMAPPED_BASE(S) PAGE_ALIGN(S / 3)
*
* arch/x86/include/asm/page_32_types.h:
* TASK_SIZE_LOW TASK_SIZE
* TASK_SIZE __PAGE_OFFSET
* __PAGE_OFFSET CONFIG_PAGE_OFFSET
* CONFIG_PAGE_OFFSET 0xc0000000 (default in Kconfig)
*/
#define TASK_UNMAPPED_BASE 0x40000000
/* arch/x86/include/asm/elf.h */
#define ELF_ET_DYN_BASE 0x00400000
#include "../generic/target_mman.h"

View File

@ -1 +1,12 @@
/*
* arch/loongarch/include/asm/processor.h:
* TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
* TASK_SIZE64 0x1UL << (... ? VA_BITS : ...)
*/
#define TASK_UNMAPPED_BASE \
TARGET_PAGE_ALIGN((1ull << TARGET_VIRT_ADDR_SPACE_BITS) / 3)
/* arch/loongarch/include/asm/elf.h */
#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE * 2)
#include "../generic/target_mman.h"

View File

@ -1 +1,6 @@
/* arch/m68k/include/asm/processor.h */
#define TASK_UNMAPPED_BASE 0xC0000000
/* arch/m68k/include/asm/elf.h */
#define ELF_ET_DYN_BASE 0xD0000000
#include "../generic/target_mman.h"

View File

@ -821,6 +821,49 @@ int main(int argc, char **argv, char **envp)
reserved_va = max_reserved_va;
}
/*
* Temporarily disable
* "comparison is always false due to limited range of data type"
* due to comparison between (possible) uint64_t and uintptr_t.
*/
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtype-limits"
/*
* Select an initial value for task_unmapped_base that is in range.
*/
if (reserved_va) {
if (TASK_UNMAPPED_BASE < reserved_va) {
task_unmapped_base = TASK_UNMAPPED_BASE;
} else {
/* The most common default formula is TASK_SIZE / 3. */
task_unmapped_base = TARGET_PAGE_ALIGN(reserved_va / 3);
}
} else if (TASK_UNMAPPED_BASE < UINTPTR_MAX) {
task_unmapped_base = TASK_UNMAPPED_BASE;
} else {
/* 32-bit host: pick something medium size. */
task_unmapped_base = 0x10000000;
}
mmap_next_start = task_unmapped_base;
/* Similarly for elf_et_dyn_base. */
if (reserved_va) {
if (ELF_ET_DYN_BASE < reserved_va) {
elf_et_dyn_base = ELF_ET_DYN_BASE;
} else {
/* The most common default formula is TASK_SIZE / 3 * 2. */
elf_et_dyn_base = TARGET_PAGE_ALIGN(reserved_va / 3) * 2;
}
} else if (ELF_ET_DYN_BASE < UINTPTR_MAX) {
elf_et_dyn_base = ELF_ET_DYN_BASE;
} else {
/* 32-bit host: pick something medium size. */
elf_et_dyn_base = 0x18000000;
}
#pragma GCC diagnostic pop
{
Error *err = NULL;
if (seed_optarg != NULL) {

View File

@ -1 +1,12 @@
/*
* arch/microblaze/include/asm/processor.h:
* TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3)
* TASK_SIZE CONFIG_KERNEL_START
* CONFIG_KERNEL_START 0xc0000000 (default in Kconfig)
*/
#define TASK_UNMAPPED_BASE 0x48000000
/* arch/microblaze/include/uapi/asm/elf.h */
#define ELF_ET_DYN_BASE 0x08000000
#include "../generic/target_mman.h"

View File

@ -14,6 +14,16 @@
#define TARGET_MAP_STACK 0x40000
#define TARGET_MAP_HUGETLB 0x80000
/*
* arch/mips/include/asm/processor.h:
* TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
*/
#define TASK_UNMAPPED_BASE \
TARGET_PAGE_ALIGN((1ull << TARGET_VIRT_ADDR_SPACE_BITS) / 3)
/* arch/mips/include/asm/elf.h */
#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE * 2)
#include "../generic/target_mman.h"
#endif

View File

@ -299,20 +299,9 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
return true;
}
#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
#ifdef TARGET_AARCH64
# define TASK_UNMAPPED_BASE 0x5500000000
#else
# define TASK_UNMAPPED_BASE (1ul << 38)
#endif
#else
#ifdef TARGET_HPPA
# define TASK_UNMAPPED_BASE 0xfa000000
#else
# define TASK_UNMAPPED_BASE 0x40000000
#endif
#endif
abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
abi_ulong task_unmapped_base;
abi_ulong elf_et_dyn_base;
abi_ulong mmap_next_start;
/*
* Subroutine of mmap_find_vma, used when we have pre-allocated
@ -391,7 +380,7 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
if ((addr & (align - 1)) == 0) {
/* Success. */
if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
if (start == mmap_next_start && addr >= task_unmapped_base) {
mmap_next_start = addr + size;
}
return addr;

View File

@ -1 +1,11 @@
/*
* arch/nios2/include/asm/processor.h:
* TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
* TASK_SIZE 0x7FFF0000UL
*/
#define TASK_UNMAPPED_BASE TARGET_PAGE_ALIGN(0x7FFF0000 / 3)
/* arch/nios2/include/asm/elf.h */
#define ELF_ET_DYN_BASE 0xD0000000
#include "../generic/target_mman.h"

View File

@ -1 +1,11 @@
/*
* arch/openrisc/include/asm/processor.h:
* TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3)
* TASK_SIZE (0x80000000UL)
*/
#define TASK_UNMAPPED_BASE 0x30000000
/* arch/openrisc/include/asm/elf.h */
#define ELF_ET_DYN_BASE 0x08000000
#include "../generic/target_mman.h"

View File

@ -4,6 +4,26 @@
#define TARGET_MAP_NORESERVE 0x40
#define TARGET_MAP_LOCKED 0x80
/*
* arch/powerpc/include/asm/task_size_64.h
* TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
* TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4))
* TASK_SIZE_USER32 (0x0000000100000000UL - (1 * PAGE_SIZE))
* DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB (with 4k pages)
*/
#ifdef TARGET_PPC64
#define TASK_UNMAPPED_BASE 0x0000100000000000ull
#else
#define TASK_UNMAPPED_BASE 0x40000000
#endif
/* arch/powerpc/include/asm/elf.h */
#ifdef TARGET_PPC64
#define ELF_ET_DYN_BASE 0x100000000ull
#else
#define ELF_ET_DYN_BASE 0x000400000
#endif
#include "../generic/target_mman.h"
#endif

View File

@ -30,7 +30,6 @@ struct image_info {
abi_ulong start_data;
abi_ulong end_data;
abi_ulong brk;
abi_ulong reserve_brk;
abi_ulong start_mmap;
abi_ulong start_stack;
abi_ulong stack_limit;

View File

@ -1 +1,11 @@
/*
* arch/loongarch/include/asm/processor.h:
* TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
*/
#define TASK_UNMAPPED_BASE \
TARGET_PAGE_ALIGN((1ull << (TARGET_VIRT_ADDR_SPACE_BITS - 1)) / 3)
/* arch/riscv/include/asm/elf.h */
#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE * 2)
#include "../generic/target_mman.h"

View File

@ -1 +1,21 @@
/*
* arch/s390/include/asm/processor.h:
* TASK_UNMAPPED_BASE (... : (_REGION2_SIZE >> 1))
*
* arch/s390/include/asm/pgtable.h:
* _REGION2_SIZE (1UL << _REGION2_SHIFT)
* _REGION2_SHIFT 42
*/
#define TASK_UNMAPPED_BASE (1ull << 41)
/*
* arch/s390/include/asm/elf.h:
* ELF_ET_DYN_BASE (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1)
*
* arch/s390/include/asm/processor.h:
* STACK_TOP VDSO_LIMIT - VDSO_SIZE - PAGE_SIZE
* VDSO_LIMIT _REGION2_SIZE
*/
#define ELF_ET_DYN_BASE (((1ull << 42) / 3 * 2) & ~0xffffffffull)
#include "../generic/target_mman.h"

View File

@ -1 +1,8 @@
/* arch/sh/include/asm/processor_32.h */
#define TASK_UNMAPPED_BASE \
TARGET_PAGE_ALIGN((1u << TARGET_VIRT_ADDR_SPACE_BITS) / 3)
/* arch/sh/include/asm/elf.h */
#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE * 2)
#include "../generic/target_mman.h"

View File

@ -5,6 +5,31 @@
#define TARGET_MAP_LOCKED 0x100
#define TARGET_MAP_GROWSDOWN 0x0200
/*
* arch/sparc/include/asm/page_64.h:
* TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \
* _AC(0x0000000070000000,UL) : \
* VA_EXCLUDE_END)
* But VA_EXCLUDE_END is > 0xffff800000000000UL which doesn't work
* in userland emulation.
*/
#ifdef TARGET_ABI32
#define TASK_UNMAPPED_BASE 0x70000000
#else
#define TASK_UNMAPPED_BASE (1ull << (TARGET_VIRT_ADDR_SPACE_BITS - 2))
#endif
/*
* arch/sparc/include/asm/elf_64.h
* Except that COMPAT_ELF_ET_DYN_BASE exactly matches TASK_UNMAPPED_BASE,
* so move it up a bit.
*/
#ifdef TARGET_ABI32
#define ELF_ET_DYN_BASE 0x78000000
#else
#define ELF_ET_DYN_BASE 0x0000010000000000ull
#endif
#include "../generic/target_mman.h"
#endif

View File

@ -8070,16 +8070,17 @@ static int open_self_maps_1(CPUArchState *cpu_env, int fd, bool smaps)
{
CPUState *cpu = env_cpu(cpu_env);
TaskState *ts = cpu->opaque;
GSList *map_info = read_self_maps();
GSList *s;
IntervalTreeRoot *map_info = read_self_maps();
IntervalTreeNode *s;
int count;
for (s = map_info; s; s = g_slist_next(s)) {
MapInfo *e = (MapInfo *) s->data;
for (s = interval_tree_iter_first(map_info, 0, -1); s;
s = interval_tree_iter_next(s, 0, -1)) {
MapInfo *e = container_of(s, MapInfo, itree);
if (h2g_valid(e->start)) {
unsigned long min = e->start;
unsigned long max = e->end;
if (h2g_valid(e->itree.start)) {
unsigned long min = e->itree.start;
unsigned long max = e->itree.last + 1;
int flags = page_get_flags(h2g(min));
const char *path;

View File

@ -18,6 +18,34 @@
#ifndef LINUX_USER_USER_MMAP_H
#define LINUX_USER_USER_MMAP_H
/*
* Guest parameters for the ADDR_COMPAT_LAYOUT personality
* (at present this is the only layout supported by QEMU).
*
* TASK_UNMAPPED_BASE: For mmap without hint (addr != 0), the search
* for unused virtual memory begins at TASK_UNMAPPED_BASE.
*
* ELF_ET_DYN_BASE: When the executable is ET_DYN (i.e. PIE), and requires
* an interpreter (i.e. not -static-pie), use ELF_ET_DYN_BASE instead of
* TASK_UNMAPPED_BASE for selecting the address of the executable.
* This provides some distance between the executable and the interpreter,
* which allows the initial brk to be placed immediately after the
* executable and also have room to grow.
*
* task_unmapped_base, elf_et_dyn_base: When the guest address space is
* limited via -R, the values of TASK_UNMAPPED_BASE and ELF_ET_DYN_BASE
* must be adjusted to fit.
*/
extern abi_ulong task_unmapped_base;
extern abi_ulong elf_et_dyn_base;
/*
* mmap_next_start: The base address for the next mmap without hint,
* increased after each successful map, starting at task_unmapped_base.
* This is an optimization within QEMU and not part of ADDR_COMPAT_LAYOUT.
*/
extern abi_ulong mmap_next_start;
int target_mprotect(abi_ulong start, abi_ulong len, int prot);
abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
int flags, int fd, off_t offset);
@ -26,7 +54,6 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
abi_ulong new_size, unsigned long flags,
abi_ulong new_addr);
abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice);
extern abi_ulong mmap_next_start;
abi_ulong mmap_find_vma(abi_ulong, abi_ulong, abi_ulong);
void mmap_fork_start(void);
void mmap_fork_end(int child);

View File

@ -1 +1,16 @@
/*
* arch/x86/include/asm/processor.h:
* TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW)
* __TASK_UNMAPPED_BASE(S) PAGE_ALIGN(S / 3)
*
* arch/x86/include/asm/page_64_types.h:
* TASK_SIZE_LOW DEFAULT_MAP_WINDOW
* DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
*/
#define TASK_UNMAPPED_BASE \
TARGET_PAGE_ALIGN((1ull << TARGET_VIRT_ADDR_SPACE_BITS) / 3)
/* arch/x86/include/asm/elf.h */
#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE * 2)
#include "../generic/target_mman.h"

View File

@ -14,6 +14,16 @@
#define TARGET_MAP_STACK 0x40000
#define TARGET_MAP_HUGETLB 0x80000
/*
* arch/xtensa/include/asm/processor.h:
* TASK_UNMAPPED_BASE (TASK_SIZE / 2)
*/
#define TASK_UNMAPPED_BASE (1u << (TARGET_VIRT_ADDR_SPACE_BITS - 1))
/* arch/xtensa/include/asm/elf.h */
#define ELF_ET_DYN_BASE \
TARGET_PAGE_ALIGN((1u << TARGET_VIRT_ADDR_SPACE_BITS) / 3)
#include "../generic/target_mman.h"
#endif

View File

@ -10,74 +10,98 @@
#include "qemu/cutils.h"
#include "qemu/selfmap.h"
GSList *read_self_maps(void)
IntervalTreeRoot *read_self_maps(void)
{
gchar *maps;
GSList *map_info = NULL;
IntervalTreeRoot *root;
gchar *maps, **lines;
guint i, nlines;
if (g_file_get_contents("/proc/self/maps", &maps, NULL, NULL)) {
gchar **lines = g_strsplit(maps, "\n", 0);
int i, entries = g_strv_length(lines);
if (!g_file_get_contents("/proc/self/maps", &maps, NULL, NULL)) {
return NULL;
}
for (i = 0; i < entries; i++) {
gchar **fields = g_strsplit(lines[i], " ", 6);
if (g_strv_length(fields) > 4) {
MapInfo *e = g_new0(MapInfo, 1);
int errors = 0;
const char *end;
root = g_new0(IntervalTreeRoot, 1);
lines = g_strsplit(maps, "\n", 0);
nlines = g_strv_length(lines);
errors |= qemu_strtoul(fields[0], &end, 16, &e->start);
errors |= qemu_strtoul(end + 1, NULL, 16, &e->end);
for (i = 0; i < nlines; i++) {
gchar **fields = g_strsplit(lines[i], " ", 6);
guint nfields = g_strv_length(fields);
if (nfields > 4) {
uint64_t start, end, offset, inode;
int errors = 0;
const char *p;
errors |= qemu_strtou64(fields[0], &p, 16, &start);
errors |= qemu_strtou64(p + 1, NULL, 16, &end);
errors |= qemu_strtou64(fields[2], NULL, 16, &offset);
errors |= qemu_strtou64(fields[4], NULL, 10, &inode);
if (!errors) {
size_t dev_len, path_len;
MapInfo *e;
dev_len = strlen(fields[3]) + 1;
if (nfields == 6) {
p = fields[5];
p += strspn(p, " ");
path_len = strlen(p) + 1;
} else {
p = NULL;
path_len = 0;
}
e = g_malloc0(sizeof(*e) + dev_len + path_len);
e->itree.start = start;
e->itree.last = end - 1;
e->offset = offset;
e->inode = inode;
e->is_read = fields[1][0] == 'r';
e->is_write = fields[1][1] == 'w';
e->is_exec = fields[1][2] == 'x';
e->is_priv = fields[1][3] == 'p';
errors |= qemu_strtoul(fields[2], NULL, 16, &e->offset);
e->dev = g_strdup(fields[3]);
errors |= qemu_strtou64(fields[4], NULL, 10, &e->inode);
if (!errors) {
/*
* The last field may have leading spaces which we
* need to strip.
*/
if (g_strv_length(fields) == 6) {
e->path = g_strdup(g_strchug(fields[5]));
}
map_info = g_slist_prepend(map_info, e);
} else {
g_free(e->dev);
g_free(e);
memcpy(e->dev, fields[3], dev_len);
if (path_len) {
e->path = memcpy(e->dev + dev_len, p, path_len);
}
interval_tree_insert(&e->itree, root);
}
g_strfreev(fields);
}
g_strfreev(lines);
g_free(maps);
g_strfreev(fields);
}
g_strfreev(lines);
g_free(maps);
/* ensure the map data is in the same order we collected it */
return g_slist_reverse(map_info);
return root;
}
/**
* free_self_maps:
* @info: a GSlist
* @root: an interval tree
*
* Free a list of MapInfo structures.
* Free a tree of MapInfo structures.
* Since we allocated each MapInfo in one chunk, we need not consider the
* contents and can simply free each RBNode.
*/
static void free_info(gpointer data)
static void free_rbnode(RBNode *n)
{
MapInfo *e = (MapInfo *) data;
g_free(e->dev);
g_free(e->path);
g_free(e);
if (n) {
free_rbnode(n->rb_left);
free_rbnode(n->rb_right);
g_free(n);
}
}
void free_self_maps(GSList *info)
void free_self_maps(IntervalTreeRoot *root)
{
g_slist_free_full(info, &free_info);
if (root) {
free_rbnode(root->rb_root.rb_node);
g_free(root);
}
}