diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index c2c7fe5c47..0a54bf7be8 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -195,6 +195,9 @@ extern "C" { #ifndef MAP_FIXED_NOREPLACE #define MAP_FIXED_NOREPLACE 0 #endif +#ifndef MAP_NORESERVE +#define MAP_NORESERVE 0 +#endif #ifndef ENOMEDIUM #define ENOMEDIUM ENODEV #endif diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 11ea8e19a6..9b171c9dbe 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -2251,6 +2251,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) flags = MAP_FIXED; flags |= block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE; + flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0; if (block->fd >= 0) { area = mmap(vaddr, length, PROT_READ | PROT_WRITE, flags, block->fd, offset); diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c index d0cf4aaee5..838e286ce5 100644 --- a/util/mmap-alloc.c +++ b/util/mmap-alloc.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "qemu/mmap-alloc.h" #include "qemu/host-utils.h" +#include "qemu/cutils.h" #include "qemu/error-report.h" #define HUGETLBFS_MAGIC 0x958458f6 @@ -83,6 +84,70 @@ size_t qemu_mempath_getpagesize(const char *mem_path) return qemu_real_host_page_size; } +#define OVERCOMMIT_MEMORY_PATH "/proc/sys/vm/overcommit_memory" +static bool map_noreserve_effective(int fd, uint32_t qemu_map_flags) +{ +#if defined(__linux__) + const bool readonly = qemu_map_flags & QEMU_MAP_READONLY; + const bool shared = qemu_map_flags & QEMU_MAP_SHARED; + gchar *content = NULL; + const char *endptr; + unsigned int tmp; + + /* + * hugeltb accounting is different than ordinary swap reservation: + * a) Hugetlb pages from the pool are reserved for both private and + * shared mappings. For shared mappings, all mappers have to specify + * MAP_NORESERVE. + * b) MAP_NORESERVE is not affected by /proc/sys/vm/overcommit_memory. + */ + if (qemu_fd_getpagesize(fd) != qemu_real_host_page_size) { + return true; + } + + /* + * Accountable mappings in the kernel that can be affected by MAP_NORESEVE + * are private writable mappings (see mm/mmap.c:accountable_mapping() in + * Linux). For all shared or readonly mappings, MAP_NORESERVE is always + * implicitly active -- no reservation; this includes shmem. The only + * exception is shared anonymous memory, it is accounted like private + * anonymous memory. + */ + if (readonly || (shared && fd >= 0)) { + return true; + } + + /* + * MAP_NORESERVE is globally ignored for applicable !hugetlb mappings when + * memory overcommit is set to "never". Sparse memory regions aren't really + * possible in this system configuration. + * + * Bail out now instead of silently committing way more memory than + * currently desired by the user. + */ + if (g_file_get_contents(OVERCOMMIT_MEMORY_PATH, &content, NULL, NULL) && + !qemu_strtoui(content, &endptr, 0, &tmp) && + (!endptr || *endptr == '\n')) { + if (tmp == 2) { + error_report("Skipping reservation of swap space is not supported:" + " \"" OVERCOMMIT_MEMORY_PATH "\" is \"2\""); + return false; + } + return true; + } + /* this interface has been around since Linux 2.6 */ + error_report("Skipping reservation of swap space is not supported:" + " Could not read: \"" OVERCOMMIT_MEMORY_PATH "\""); + return false; +#endif + /* + * E.g., FreeBSD used to define MAP_NORESERVE, never implemented it, + * and removed it a while ago. + */ + error_report("Skipping reservation of swap space is not supported"); + return false; +} + /* * Reserve a new memory region of the requested size to be used for mapping * from the given fd (if any). @@ -131,13 +196,13 @@ static void *mmap_activate(void *ptr, size_t size, int fd, int flags = MAP_FIXED; void *activated_ptr; - if (noreserve) { - error_report("Skipping reservation of swap space is not supported"); + if (noreserve && !map_noreserve_effective(fd, qemu_map_flags)) { return MAP_FAILED; } flags |= fd == -1 ? MAP_ANONYMOUS : 0; flags |= shared ? MAP_SHARED : MAP_PRIVATE; + flags |= noreserve ? MAP_NORESERVE : 0; if (shared && sync) { map_sync_flags = MAP_SYNC | MAP_SHARED_VALIDATE; }