qemu-e2k/include/qemu/osdep.h

775 lines
23 KiB
C
Raw Normal View History

/*
* OS includes and handling of OS dependencies
*
* This header exists to pull in some common system headers that
* most code in QEMU will want, and to fix up some possible issues with
* it (missing defines, Windows weirdness, and so on).
*
* To avoid getting into possible circular include dependencies, this
* file should not include any other QEMU headers, with the exceptions
2016-03-14 09:01:28 +01:00
* of config-host.h, config-target.h, qemu/compiler.h,
* sysemu/os-posix.h, sysemu/os-win32.h, glib-compat.h and
* qemu/typedefs.h, all of which are doing a similar job to this file
* and are under similar constraints.
*
* This header also contains prototypes for functions defined in
* os-*.c and util/oslib-*.c; those would probably be better split
* out into separate header files.
*
* In an ideal world this header would contain only:
* (1) things which everybody needs
* (2) things without which code would work on most platforms but
* fail to compile or misbehave on a minority of host OSes
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#ifndef QEMU_OSDEP_H
#define QEMU_OSDEP_H
#include "config-host.h"
#ifdef NEED_CPU_H
#include CONFIG_TARGET
#else
#include "exec/poison.h"
#endif
#include "qemu/compiler.h"
/* Older versions of C++ don't get definitions of various macros from
* stdlib.h unless we define these macros before first inclusion of
* that system header.
*/
#ifndef __STDC_CONSTANT_MACROS
#define __STDC_CONSTANT_MACROS
#endif
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS
#endif
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
/* The following block of code temporarily renames the daemon() function so the
* compiler does not see the warning associated with it in stdlib.h on OSX
*/
#ifdef __APPLE__
#define daemon qemu_fake_daemon_function
#include <stdlib.h>
#undef daemon
QEMU_EXTERN_C int daemon(int, int);
#endif
#ifdef _WIN32
/* as defined in sdkddkver.h */
#ifndef _WIN32_WINNT
#define _WIN32_WINNT 0x0600 /* Vista */
#endif
/* reduces the number of implicitly included headers */
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#endif
/* enable C99/POSIX format strings (needs mingw32-runtime 3.15 or later) */
#ifdef __MINGW32__
#define __USE_MINGW_ANSI_STDIO 1
#endif
#include <stdarg.h>
#include <stddef.h>
#include <stdbool.h>
#include <stdint.h>
#include <sys/types.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <inttypes.h>
#include <limits.h>
oslib-win32: only provide localtime_r/gmtime_r if missing The oslib-win32 file currently provides a localtime_r and gmtime_r replacement unconditionally. Some versions of Mingw-w64 would provide crude macros for localtime_r/gmtime_r which QEMU takes care to disable. Latest versions of Mingw-w64 now provide actual functions for localtime_r/gmtime_r, but with a twist that you have to include unistd.h or pthread.h before including time.h. By luck some files in QEMU have such an include order, resulting in compile errors: CC util/osdep.o In file included from include/qemu-common.h:48:0, from util/osdep.c:48: include/sysemu/os-win32.h:77:12: error: redundant redeclaration of 'gmtime_r' [-Werror=redundant-decls] struct tm *gmtime_r(const time_t *timep, struct tm *result); ^ In file included from include/qemu-common.h:35:0, from util/osdep.c:48: /usr/i686-w64-mingw32/sys-root/mingw/include/time.h:272:107: note: previous definition of 'gmtime_r' was here In file included from include/qemu-common.h:48:0, from util/osdep.c:48: include/sysemu/os-win32.h:79:12: error: redundant redeclaration of 'localtime_r' [-Werror=redundant-decls] struct tm *localtime_r(const time_t *timep, struct tm *result); ^ In file included from include/qemu-common.h:35:0, from util/osdep.c:48: /usr/i686-w64-mingw32/sys-root/mingw/include/time.h:269:107: note: previous definition of 'localtime_r' was here This change adds a configure test to see if localtime_r exits, and only enables the QEMU impl if missing. We also re-arrange qemu-common.h try attempt to guarantee that all source files get unistd.h before time.h and thus see the localtime_r/gmtime_r defs. [sw: Use "official" spellings for Mingw-w64, MinGW in comments.] [sw: Terminate sentences with a dot in comments.] Signed-off-by: Daniel P. Berrange <berrange@redhat.com> Reviewed-by: Denis V. Lunev <den@openvz.org> Signed-off-by: Stefan Weil <sw@weilnetz.de>
2015-09-22 16:13:26 +02:00
/* Put unistd.h before time.h as that triggers localtime_r/gmtime_r
* function availability on recentish Mingw-w64 platforms. */
#include <unistd.h>
#include <time.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
qemu-io: Add generic function for reinitializing optind. On FreeBSD 11.2: $ nbdkit memory size=1M --run './qemu-io -f raw -c "aio_write 0 512" $nbd' Parsing error: non-numeric argument, or extraneous/unrecognized suffix -- aio_write After main option parsing, we reinitialize optind so we can parse each command. However reinitializing optind to 0 does not work on FreeBSD. What happens when you do this is optind remains 0 after the option parsing loop, and the result is we try to parse argv[optind] == argv[0] == "aio_write" as if it was the first parameter. The FreeBSD manual page says: In order to use getopt() to evaluate multiple sets of arguments, or to evaluate a single set of arguments multiple times, the variable optreset must be set to 1 before the second and each additional set of calls to getopt(), and the variable optind must be reinitialized. (From the rest of the man page it is clear that optind must be reinitialized to 1). The glibc man page says: A program that scans multiple argument vectors, or rescans the same vector more than once, and wants to make use of GNU extensions such as '+' and '-' at the start of optstring, or changes the value of POSIXLY_CORRECT between scans, must reinitialize getopt() by resetting optind to 0, rather than the traditional value of 1. (Resetting to 0 forces the invocation of an internal initialization routine that rechecks POSIXLY_CORRECT and checks for GNU extensions in optstring.) This commit introduces an OS-portability function called qemu_reset_optind which provides a way of resetting optind that works on FreeBSD and platforms that use optreset, while keeping it the same as now on other platforms. Note that the qemu codebase sets optind in many other places, but in those other places it's setting a local variable and not using getopt. This change is only needed in places where we are using getopt and the associated global variable optind. Signed-off-by: Richard W.M. Jones <rjones@redhat.com> Message-id: 20190118101114.11759-2-rjones@redhat.com Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Signed-off-by: Max Reitz <mreitz@redhat.com>
2019-01-18 11:11:14 +01:00
#include <getopt.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <assert.h>
/* setjmp must be declared before sysemu/os-win32.h
* because it is redefined there. */
#include <setjmp.h>
#include <signal.h>
#ifdef CONFIG_IOVEC
#include <sys/uio.h>
#endif
#if defined(__linux__) && defined(__sparc__)
/* The SPARC definition of QEMU_VMALLOC_ALIGN needs SHMLBA */
#include <sys/shm.h>
#endif
#ifndef _WIN32
#include <sys/wait.h>
#else
#define WIFEXITED(x) 1
#define WEXITSTATUS(x) (x)
#endif
#ifdef __APPLE__
#include <AvailabilityMacros.h>
#endif
/*
* This is somewhat like a system header; it must be outside any extern "C"
* block because it includes system headers itself, including glib.h,
* which will not compile if inside an extern "C" block.
*/
#include "glib-compat.h"
#ifdef _WIN32
#include "sysemu/os-win32.h"
#endif
#ifdef CONFIG_POSIX
#include "sysemu/os-posix.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
2016-03-14 09:01:28 +01:00
#include "qemu/typedefs.h"
/*
* For mingw, as of v6.0.0, the function implementing the assert macro is
* not marked as noreturn, so the compiler cannot delete code following an
* assert(false) as unused. We rely on this within the code base to delete
* code that is unreachable when features are disabled.
* All supported versions of Glib's g_assert() satisfy this requirement.
*/
#ifdef __MINGW32__
#undef assert
#define assert(x) g_assert(x)
#endif
/*
* According to waitpid man page:
* WCOREDUMP
* This macro is not specified in POSIX.1-2001 and is not
* available on some UNIX implementations (e.g., AIX, SunOS).
* Therefore, enclose its use inside #ifdef WCOREDUMP ... #endif.
*/
#ifndef WCOREDUMP
#define WCOREDUMP(status) 0
#endif
osdep.h: Prohibit disabling assert() in supported builds We already have several files that knowingly require assert() to work, sometimes because refactoring the code for proper error handling has not been tackled yet; there are probably other files that have a similar situation but with no comments documenting the same. In fact, we have places in migration that handle untrusted input with assertions, where disabling the assertions risks a worse security hole than the current behavior of losing the guest to SIGABRT when migration fails because of the assertion. Promote our current per-file safety-valve to instead be project-wide, and expand it to also cover glib's g_assert(). Note that we do NOT want to encourage 'assert(side-effects);' (that is a bad practice that prevents copy-and-paste of code to other projects that CAN disable assertions; plus it costs unnecessary reviewer mental cycles to remember whether a project special-cases the crippling of asserts); and we would LIKE to fix migration to not rely on asserts (but that takes a big code audit). But in the meantime, we DO want to send a message that anyone that disables assertions has to tweak code in order to compile, making it obvious that they are taking on additional risk that we are not going to support. At the same time, leave comments mentioning NDEBUG in files that we know still need to be scrubbed, so there is at least something to grep for. It would be possible to come up with some other mechanism for doing runtime checking by default, but which does not abort the program on failure, while leaving side effects in place (unlike how crippling assert() avoids even the side effects), perhaps under the name q_verify(); but it was not deemed worth the effort (developers should not have to learn a replacement when the standard C macro works just fine, and it would be a lot of churn for little gain). The patch specifically uses #error rather than #warn so that a user is forced to tweak the header to acknowledge the issue, even when not using a -Werror compilation. Signed-off-by: Eric Blake <eblake@redhat.com> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> Reviewed-by: Thomas Huth <thuth@redhat.com> Message-Id: <20170911211320.25385-1-eblake@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-09-11 23:13:20 +02:00
/*
* We have a lot of unaudited code that may fail in strange ways, or
* even be a security risk during migration, if you disable assertions
* at compile-time. You may comment out these safety checks if you
* absolutely want to disable assertion overhead, but it is not
* supported upstream so the risk is all yours. Meanwhile, please
* submit patches to remove any side-effects inside an assertion, or
* fixing error handling that should use Error instead of assert.
*/
#ifdef NDEBUG
#error building with NDEBUG is not supported
#endif
#ifdef G_DISABLE_ASSERT
#error building with G_DISABLE_ASSERT is not supported
#endif
#ifndef O_LARGEFILE
#define O_LARGEFILE 0
#endif
#ifndef O_BINARY
#define O_BINARY 0
#endif
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
#ifndef MAP_FIXED_NOREPLACE
#define MAP_FIXED_NOREPLACE 0
#endif
util/mmap-alloc: Support RAM_NORESERVE via MAP_NORESERVE under Linux Let's support RAM_NORESERVE via MAP_NORESERVE on Linux. The flag has no effect on most shared mappings - except for hugetlbfs and anonymous memory. Linux man page: "MAP_NORESERVE: Do not reserve swap space for this mapping. When swap space is reserved, one has the guarantee that it is possible to modify the mapping. When swap space is not reserved one might get SIGSEGV upon a write if no physical memory is available. See also the discussion of the file /proc/sys/vm/overcommit_memory in proc(5). In kernels before 2.6, this flag had effect only for private writable mappings." Note that the "guarantee" part is wrong with memory overcommit in Linux. Also, in Linux hugetlbfs is treated differently - we configure reservation of huge pages from the pool, not reservation of swap space (huge pages cannot be swapped). The rough behavior is [1]: a) !Hugetlbfs: 1) Without MAP_NORESERVE *or* with memory overcommit under Linux disabled ("/proc/sys/vm/overcommit_memory == 2"), the following accounting/reservation happens: For a file backed map SHARED or READ-only - 0 cost (the file is the map not swap) PRIVATE WRITABLE - size of mapping per instance For an anonymous or /dev/zero map SHARED - size of mapping PRIVATE READ-only - 0 cost (but of little use) PRIVATE WRITABLE - size of mapping per instance 2) With MAP_NORESERVE, no accounting/reservation happens. b) Hugetlbfs: 1) Without MAP_NORESERVE, huge pages are reserved. 2) With MAP_NORESERVE, no huge pages are reserved. Note: With "/proc/sys/vm/overcommit_memory == 0", we were already able to configure it for !hugetlbfs globally; this toggle now allows configuring it more fine-grained, not for the whole system. The target use case is virtio-mem, which dynamically exposes memory inside a large, sparse memory area to the VM. [1] https://www.kernel.org/doc/Documentation/vm/overcommit-accounting Reviewed-by: Peter Xu <peterx@redhat.com> Acked-by: Eduardo Habkost <ehabkost@redhat.com> for memory backend and machine core Signed-off-by: David Hildenbrand <david@redhat.com> Message-Id: <20210510114328.21835-10-david@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-05-10 13:43:22 +02:00
#ifndef MAP_NORESERVE
#define MAP_NORESERVE 0
#endif
#ifndef ENOMEDIUM
#define ENOMEDIUM ENODEV
#endif
#if !defined(ENOTSUP)
#define ENOTSUP 4096
#endif
#if !defined(ECANCELED)
#define ECANCELED 4097
#endif
#if !defined(EMEDIUMTYPE)
#define EMEDIUMTYPE 4098
#endif
#if !defined(ESHUTDOWN)
#define ESHUTDOWN 4099
#endif
/* time_t may be either 32 or 64 bits depending on the host OS, and
* can be either signed or unsigned, so we can't just hardcode a
* specific maximum value. This is not a C preprocessor constant,
* so you can't use TIME_MAX in an #ifdef, but for our purposes
* this isn't a problem.
*/
/* The macros TYPE_SIGNED, TYPE_WIDTH, and TYPE_MAXIMUM are from
* Gnulib, and are under the LGPL v2.1 or (at your option) any
* later version.
*/
/* True if the real type T is signed. */
#define TYPE_SIGNED(t) (!((t)0 < (t)-1))
/* The width in bits of the integer type or expression T.
* Padding bits are not supported.
*/
#define TYPE_WIDTH(t) (sizeof(t) * CHAR_BIT)
/* The maximum and minimum values for the integer type T. */
#define TYPE_MAXIMUM(t) \
((t) (!TYPE_SIGNED(t) \
? (t)-1 \
: ((((t)1 << (TYPE_WIDTH(t) - 2)) - 1) * 2 + 1)))
#ifndef TIME_MAX
#define TIME_MAX TYPE_MAXIMUM(time_t)
#endif
/* HOST_LONG_BITS is the size of a native pointer in bits. */
#if UINTPTR_MAX == UINT32_MAX
# define HOST_LONG_BITS 32
#elif UINTPTR_MAX == UINT64_MAX
# define HOST_LONG_BITS 64
#else
# error Unknown pointer size
#endif
/* Mac OSX has a <stdint.h> bug that incorrectly defines SIZE_MAX with
* the wrong type. Our replacement isn't usable in preprocessor
* expressions, but it is sufficient for our needs. */
#if defined(HAVE_BROKEN_SIZE_MAX) && HAVE_BROKEN_SIZE_MAX
#undef SIZE_MAX
#define SIZE_MAX ((size_t)-1)
#endif
osdep: Make MIN/MAX evaluate arguments only once I'm not aware of any immediate bugs in qemu where a second runtime evaluation of the arguments to MIN() or MAX() causes a problem, but proactively preventing such abuse is easier than falling prey to an unintended case down the road. At any rate, here's the conversation that sparked the current patch: https://lists.gnu.org/archive/html/qemu-devel/2018-12/msg05718.html Update the MIN/MAX macros to only evaluate their argument once at runtime; this uses typeof(1 ? (a) : (b)) to ensure that we are promoting the temporaries to the same type as the final comparison (we have to trigger type promotion, as typeof(bitfield) won't compile; and we can't use typeof((a) + (b)) or even typeof((a) + 0), as some of our uses of MAX are on void* pointers where such addition is undefined). However, we are unable to work around gcc refusing to compile ({}) in a constant context (such as the array length of a static variable), even when only used in the dead branch of a __builtin_choose_expr(), so we have to provide a second macro pair MIN_CONST and MAX_CONST for use when both arguments are known to be compile-time constants and where the result must also be usable as a constant; this second form evaluates arguments multiple times but that doesn't matter for constants. By using a void expression as the expansion if a non-constant is presented to this second form, we can enlist the compiler to ensure the double evaluation is not attempted on non-constants. Alas, as both macros now rely on compiler intrinsics, they are no longer usable in preprocessor #if conditions; those will just have to be open-coded or the logic rewritten into #define or runtime 'if' conditions (but where the compiler dead-code-elimination will probably still apply). I tested that both gcc 10.1.1 and clang 10.0.0 produce errors for all forms of macro mis-use. As the errors can sometimes be cryptic, I'm demonstrating the gcc output: Use of MIN when MIN_CONST is needed: In file included from /home/eblake/qemu/qemu-img.c:25: /home/eblake/qemu/include/qemu/osdep.h:249:5: error: braced-group within expression allowed only inside a function 249 | ({ \ | ^ /home/eblake/qemu/qemu-img.c:92:12: note: in expansion of macro ‘MIN’ 92 | char array[MIN(1, 2)] = ""; | ^~~ Use of MIN_CONST when MIN is needed: /home/eblake/qemu/qemu-img.c: In function ‘is_allocated_sectors’: /home/eblake/qemu/qemu-img.c:1225:15: error: void value not ignored as it ought to be 1225 | i = MIN_CONST(i, n); | ^ Use of MIN in the preprocessor: In file included from /home/eblake/qemu/accel/tcg/translate-all.c:20: /home/eblake/qemu/accel/tcg/translate-all.c: In function ‘page_check_range’: /home/eblake/qemu/include/qemu/osdep.h:249:6: error: token "{" is not valid in preprocessor expressions 249 | ({ \ | ^ Fix the resulting callsites that used #if or computed a compile-time constant min or max to use the new macros. cpu-defs.h is interesting, as CPU_TLB_DYN_MAX_BITS is sometimes used as a constant and sometimes dynamic. It may be worth improving glib's MIN/MAX definitions to be saner, but that is a task for another day. Signed-off-by: Eric Blake <eblake@redhat.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com> Message-Id: <20200625162602.700741-1-eblake@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2020-06-25 18:26:02 +02:00
/*
* Two variations of MIN/MAX macros. The first is for runtime use, and
* evaluates arguments only once (so it is safe even with side
* effects), but will not work in constant contexts (such as array
* size declarations) because of the '{}'. The second is for constant
* expression use, where evaluating arguments twice is safe because
* the result is going to be constant anyway, but will not work in a
* runtime context because of a void expression where a value is
* expected. Thus, both gcc and clang will fail to compile if you use
* the wrong macro (even if the error may seem a bit cryptic).
*
* Note that neither form is usable as an #if condition; if you truly
* need to write conditional code that depends on a minimum or maximum
* determined by the pre-processor instead of the compiler, you'll
* have to open-code it. Sadly, Coverity is severely confused by the
* constant variants, so we have to dumb things down there.
osdep: Make MIN/MAX evaluate arguments only once I'm not aware of any immediate bugs in qemu where a second runtime evaluation of the arguments to MIN() or MAX() causes a problem, but proactively preventing such abuse is easier than falling prey to an unintended case down the road. At any rate, here's the conversation that sparked the current patch: https://lists.gnu.org/archive/html/qemu-devel/2018-12/msg05718.html Update the MIN/MAX macros to only evaluate their argument once at runtime; this uses typeof(1 ? (a) : (b)) to ensure that we are promoting the temporaries to the same type as the final comparison (we have to trigger type promotion, as typeof(bitfield) won't compile; and we can't use typeof((a) + (b)) or even typeof((a) + 0), as some of our uses of MAX are on void* pointers where such addition is undefined). However, we are unable to work around gcc refusing to compile ({}) in a constant context (such as the array length of a static variable), even when only used in the dead branch of a __builtin_choose_expr(), so we have to provide a second macro pair MIN_CONST and MAX_CONST for use when both arguments are known to be compile-time constants and where the result must also be usable as a constant; this second form evaluates arguments multiple times but that doesn't matter for constants. By using a void expression as the expansion if a non-constant is presented to this second form, we can enlist the compiler to ensure the double evaluation is not attempted on non-constants. Alas, as both macros now rely on compiler intrinsics, they are no longer usable in preprocessor #if conditions; those will just have to be open-coded or the logic rewritten into #define or runtime 'if' conditions (but where the compiler dead-code-elimination will probably still apply). I tested that both gcc 10.1.1 and clang 10.0.0 produce errors for all forms of macro mis-use. As the errors can sometimes be cryptic, I'm demonstrating the gcc output: Use of MIN when MIN_CONST is needed: In file included from /home/eblake/qemu/qemu-img.c:25: /home/eblake/qemu/include/qemu/osdep.h:249:5: error: braced-group within expression allowed only inside a function 249 | ({ \ | ^ /home/eblake/qemu/qemu-img.c:92:12: note: in expansion of macro ‘MIN’ 92 | char array[MIN(1, 2)] = ""; | ^~~ Use of MIN_CONST when MIN is needed: /home/eblake/qemu/qemu-img.c: In function ‘is_allocated_sectors’: /home/eblake/qemu/qemu-img.c:1225:15: error: void value not ignored as it ought to be 1225 | i = MIN_CONST(i, n); | ^ Use of MIN in the preprocessor: In file included from /home/eblake/qemu/accel/tcg/translate-all.c:20: /home/eblake/qemu/accel/tcg/translate-all.c: In function ‘page_check_range’: /home/eblake/qemu/include/qemu/osdep.h:249:6: error: token "{" is not valid in preprocessor expressions 249 | ({ \ | ^ Fix the resulting callsites that used #if or computed a compile-time constant min or max to use the new macros. cpu-defs.h is interesting, as CPU_TLB_DYN_MAX_BITS is sometimes used as a constant and sometimes dynamic. It may be worth improving glib's MIN/MAX definitions to be saner, but that is a task for another day. Signed-off-by: Eric Blake <eblake@redhat.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com> Message-Id: <20200625162602.700741-1-eblake@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2020-06-25 18:26:02 +02:00
*/
#undef MIN
#define MIN(a, b) \
({ \
typeof(1 ? (a) : (b)) _a = (a), _b = (b); \
_a < _b ? _a : _b; \
})
#undef MAX
#define MAX(a, b) \
({ \
typeof(1 ? (a) : (b)) _a = (a), _b = (b); \
_a > _b ? _a : _b; \
})
#ifdef __COVERITY__
# define MIN_CONST(a, b) ((a) < (b) ? (a) : (b))
# define MAX_CONST(a, b) ((a) > (b) ? (a) : (b))
#else
# define MIN_CONST(a, b) \
__builtin_choose_expr( \
__builtin_constant_p(a) && __builtin_constant_p(b), \
(a) < (b) ? (a) : (b), \
((void)0))
# define MAX_CONST(a, b) \
osdep: Make MIN/MAX evaluate arguments only once I'm not aware of any immediate bugs in qemu where a second runtime evaluation of the arguments to MIN() or MAX() causes a problem, but proactively preventing such abuse is easier than falling prey to an unintended case down the road. At any rate, here's the conversation that sparked the current patch: https://lists.gnu.org/archive/html/qemu-devel/2018-12/msg05718.html Update the MIN/MAX macros to only evaluate their argument once at runtime; this uses typeof(1 ? (a) : (b)) to ensure that we are promoting the temporaries to the same type as the final comparison (we have to trigger type promotion, as typeof(bitfield) won't compile; and we can't use typeof((a) + (b)) or even typeof((a) + 0), as some of our uses of MAX are on void* pointers where such addition is undefined). However, we are unable to work around gcc refusing to compile ({}) in a constant context (such as the array length of a static variable), even when only used in the dead branch of a __builtin_choose_expr(), so we have to provide a second macro pair MIN_CONST and MAX_CONST for use when both arguments are known to be compile-time constants and where the result must also be usable as a constant; this second form evaluates arguments multiple times but that doesn't matter for constants. By using a void expression as the expansion if a non-constant is presented to this second form, we can enlist the compiler to ensure the double evaluation is not attempted on non-constants. Alas, as both macros now rely on compiler intrinsics, they are no longer usable in preprocessor #if conditions; those will just have to be open-coded or the logic rewritten into #define or runtime 'if' conditions (but where the compiler dead-code-elimination will probably still apply). I tested that both gcc 10.1.1 and clang 10.0.0 produce errors for all forms of macro mis-use. As the errors can sometimes be cryptic, I'm demonstrating the gcc output: Use of MIN when MIN_CONST is needed: In file included from /home/eblake/qemu/qemu-img.c:25: /home/eblake/qemu/include/qemu/osdep.h:249:5: error: braced-group within expression allowed only inside a function 249 | ({ \ | ^ /home/eblake/qemu/qemu-img.c:92:12: note: in expansion of macro ‘MIN’ 92 | char array[MIN(1, 2)] = ""; | ^~~ Use of MIN_CONST when MIN is needed: /home/eblake/qemu/qemu-img.c: In function ‘is_allocated_sectors’: /home/eblake/qemu/qemu-img.c:1225:15: error: void value not ignored as it ought to be 1225 | i = MIN_CONST(i, n); | ^ Use of MIN in the preprocessor: In file included from /home/eblake/qemu/accel/tcg/translate-all.c:20: /home/eblake/qemu/accel/tcg/translate-all.c: In function ‘page_check_range’: /home/eblake/qemu/include/qemu/osdep.h:249:6: error: token "{" is not valid in preprocessor expressions 249 | ({ \ | ^ Fix the resulting callsites that used #if or computed a compile-time constant min or max to use the new macros. cpu-defs.h is interesting, as CPU_TLB_DYN_MAX_BITS is sometimes used as a constant and sometimes dynamic. It may be worth improving glib's MIN/MAX definitions to be saner, but that is a task for another day. Signed-off-by: Eric Blake <eblake@redhat.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com> Message-Id: <20200625162602.700741-1-eblake@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2020-06-25 18:26:02 +02:00
__builtin_choose_expr( \
__builtin_constant_p(a) && __builtin_constant_p(b), \
(a) > (b) ? (a) : (b), \
((void)0))
#endif
osdep: Make MIN/MAX evaluate arguments only once I'm not aware of any immediate bugs in qemu where a second runtime evaluation of the arguments to MIN() or MAX() causes a problem, but proactively preventing such abuse is easier than falling prey to an unintended case down the road. At any rate, here's the conversation that sparked the current patch: https://lists.gnu.org/archive/html/qemu-devel/2018-12/msg05718.html Update the MIN/MAX macros to only evaluate their argument once at runtime; this uses typeof(1 ? (a) : (b)) to ensure that we are promoting the temporaries to the same type as the final comparison (we have to trigger type promotion, as typeof(bitfield) won't compile; and we can't use typeof((a) + (b)) or even typeof((a) + 0), as some of our uses of MAX are on void* pointers where such addition is undefined). However, we are unable to work around gcc refusing to compile ({}) in a constant context (such as the array length of a static variable), even when only used in the dead branch of a __builtin_choose_expr(), so we have to provide a second macro pair MIN_CONST and MAX_CONST for use when both arguments are known to be compile-time constants and where the result must also be usable as a constant; this second form evaluates arguments multiple times but that doesn't matter for constants. By using a void expression as the expansion if a non-constant is presented to this second form, we can enlist the compiler to ensure the double evaluation is not attempted on non-constants. Alas, as both macros now rely on compiler intrinsics, they are no longer usable in preprocessor #if conditions; those will just have to be open-coded or the logic rewritten into #define or runtime 'if' conditions (but where the compiler dead-code-elimination will probably still apply). I tested that both gcc 10.1.1 and clang 10.0.0 produce errors for all forms of macro mis-use. As the errors can sometimes be cryptic, I'm demonstrating the gcc output: Use of MIN when MIN_CONST is needed: In file included from /home/eblake/qemu/qemu-img.c:25: /home/eblake/qemu/include/qemu/osdep.h:249:5: error: braced-group within expression allowed only inside a function 249 | ({ \ | ^ /home/eblake/qemu/qemu-img.c:92:12: note: in expansion of macro ‘MIN’ 92 | char array[MIN(1, 2)] = ""; | ^~~ Use of MIN_CONST when MIN is needed: /home/eblake/qemu/qemu-img.c: In function ‘is_allocated_sectors’: /home/eblake/qemu/qemu-img.c:1225:15: error: void value not ignored as it ought to be 1225 | i = MIN_CONST(i, n); | ^ Use of MIN in the preprocessor: In file included from /home/eblake/qemu/accel/tcg/translate-all.c:20: /home/eblake/qemu/accel/tcg/translate-all.c: In function ‘page_check_range’: /home/eblake/qemu/include/qemu/osdep.h:249:6: error: token "{" is not valid in preprocessor expressions 249 | ({ \ | ^ Fix the resulting callsites that used #if or computed a compile-time constant min or max to use the new macros. cpu-defs.h is interesting, as CPU_TLB_DYN_MAX_BITS is sometimes used as a constant and sometimes dynamic. It may be worth improving glib's MIN/MAX definitions to be saner, but that is a task for another day. Signed-off-by: Eric Blake <eblake@redhat.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com> Message-Id: <20200625162602.700741-1-eblake@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2020-06-25 18:26:02 +02:00
/*
* Minimum function that returns zero only if both values are zero.
* Intended for use with unsigned values only.
*/
#ifndef MIN_NON_ZERO
osdep: Make MIN/MAX evaluate arguments only once I'm not aware of any immediate bugs in qemu where a second runtime evaluation of the arguments to MIN() or MAX() causes a problem, but proactively preventing such abuse is easier than falling prey to an unintended case down the road. At any rate, here's the conversation that sparked the current patch: https://lists.gnu.org/archive/html/qemu-devel/2018-12/msg05718.html Update the MIN/MAX macros to only evaluate their argument once at runtime; this uses typeof(1 ? (a) : (b)) to ensure that we are promoting the temporaries to the same type as the final comparison (we have to trigger type promotion, as typeof(bitfield) won't compile; and we can't use typeof((a) + (b)) or even typeof((a) + 0), as some of our uses of MAX are on void* pointers where such addition is undefined). However, we are unable to work around gcc refusing to compile ({}) in a constant context (such as the array length of a static variable), even when only used in the dead branch of a __builtin_choose_expr(), so we have to provide a second macro pair MIN_CONST and MAX_CONST for use when both arguments are known to be compile-time constants and where the result must also be usable as a constant; this second form evaluates arguments multiple times but that doesn't matter for constants. By using a void expression as the expansion if a non-constant is presented to this second form, we can enlist the compiler to ensure the double evaluation is not attempted on non-constants. Alas, as both macros now rely on compiler intrinsics, they are no longer usable in preprocessor #if conditions; those will just have to be open-coded or the logic rewritten into #define or runtime 'if' conditions (but where the compiler dead-code-elimination will probably still apply). I tested that both gcc 10.1.1 and clang 10.0.0 produce errors for all forms of macro mis-use. As the errors can sometimes be cryptic, I'm demonstrating the gcc output: Use of MIN when MIN_CONST is needed: In file included from /home/eblake/qemu/qemu-img.c:25: /home/eblake/qemu/include/qemu/osdep.h:249:5: error: braced-group within expression allowed only inside a function 249 | ({ \ | ^ /home/eblake/qemu/qemu-img.c:92:12: note: in expansion of macro ‘MIN’ 92 | char array[MIN(1, 2)] = ""; | ^~~ Use of MIN_CONST when MIN is needed: /home/eblake/qemu/qemu-img.c: In function ‘is_allocated_sectors’: /home/eblake/qemu/qemu-img.c:1225:15: error: void value not ignored as it ought to be 1225 | i = MIN_CONST(i, n); | ^ Use of MIN in the preprocessor: In file included from /home/eblake/qemu/accel/tcg/translate-all.c:20: /home/eblake/qemu/accel/tcg/translate-all.c: In function ‘page_check_range’: /home/eblake/qemu/include/qemu/osdep.h:249:6: error: token "{" is not valid in preprocessor expressions 249 | ({ \ | ^ Fix the resulting callsites that used #if or computed a compile-time constant min or max to use the new macros. cpu-defs.h is interesting, as CPU_TLB_DYN_MAX_BITS is sometimes used as a constant and sometimes dynamic. It may be worth improving glib's MIN/MAX definitions to be saner, but that is a task for another day. Signed-off-by: Eric Blake <eblake@redhat.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com> Message-Id: <20200625162602.700741-1-eblake@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2020-06-25 18:26:02 +02:00
#define MIN_NON_ZERO(a, b) \
({ \
typeof(1 ? (a) : (b)) _a = (a), _b = (b); \
_a == 0 ? _b : (_b == 0 || _b > _a) ? _a : _b; \
})
#endif
/* Round number down to multiple */
#define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m))
/* Round number up to multiple. Safe when m is not a power of 2 (see
* ROUND_UP for a faster version when a power of 2 is guaranteed) */
#define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m))
/* Check if n is a multiple of m */
#define QEMU_IS_ALIGNED(n, m) (((n) % (m)) == 0)
/* n-byte align pointer down */
#define QEMU_ALIGN_PTR_DOWN(p, n) \
((typeof(p))QEMU_ALIGN_DOWN((uintptr_t)(p), (n)))
/* n-byte align pointer up */
#define QEMU_ALIGN_PTR_UP(p, n) \
((typeof(p))QEMU_ALIGN_UP((uintptr_t)(p), (n)))
/* Check if pointer p is n-bytes aligned */
#define QEMU_PTR_IS_ALIGNED(p, n) QEMU_IS_ALIGNED((uintptr_t)(p), (n))
/* Round number up to multiple. Requires that d be a power of 2 (see
* QEMU_ALIGN_UP for a safer but slower version on arbitrary
* numbers); works even if d is a smaller type than n. */
#ifndef ROUND_UP
#define ROUND_UP(n, d) (((n) + (d) - 1) & -(0 ? (n) : (d)))
#endif
#ifndef DIV_ROUND_UP
#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
#endif
/*
* &(x)[0] is always a pointer - if it's same type as x then the argument is a
* pointer, not an array.
*/
#define QEMU_IS_ARRAY(x) (!__builtin_types_compatible_p(typeof(x), \
typeof(&(x)[0])))
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) ((sizeof(x) / sizeof((x)[0])) + \
QEMU_BUILD_BUG_ON_ZERO(!QEMU_IS_ARRAY(x)))
#endif
int qemu_daemon(int nochdir, int noclose);
void *qemu_try_memalign(size_t alignment, size_t size);
void *qemu_memalign(size_t alignment, size_t size);
void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared,
bool noreserve);
void qemu_vfree(void *ptr);
void qemu_anon_ram_free(void *ptr, size_t size);
/*
* Abstraction of PROT_ and MAP_ flags as passed to mmap(), for example,
* consumed by qemu_ram_mmap().
*/
/* Map PROT_READ instead of PROT_READ | PROT_WRITE. */
#define QEMU_MAP_READONLY (1 << 0)
/* Use MAP_SHARED instead of MAP_PRIVATE. */
#define QEMU_MAP_SHARED (1 << 1)
/*
* Use MAP_SYNC | MAP_SHARED_VALIDATE if supported. Ignored without
* QEMU_MAP_SHARED. If mapping fails, warn and fallback to !QEMU_MAP_SYNC.
*/
#define QEMU_MAP_SYNC (1 << 2)
/*
* Use MAP_NORESERVE to skip reservation of swap space (or huge pages if
* applicable). Bail out if not supported/effective.
*/
#define QEMU_MAP_NORESERVE (1 << 3)
Introduce qemu_madvise() vl.c has a Sun-specific hack to supply a prototype for madvise(), but the call site has apparently moved to arch_init.c. Haiku doesn't implement madvise() in favor of posix_madvise(). OpenBSD and Solaris 10 don't implement posix_madvise() but madvise(). MinGW implements neither. Check for madvise() and posix_madvise() in configure and supply qemu_madvise() as wrapper. Prefer madvise() over posix_madvise() due to flag availability. Convert all callers to use qemu_madvise() and QEMU_MADV_*. Note that on Solaris the warning is fixed by moving the madvise() prototype, not by qemu_madvise() itself. It helps with porting though, and it simplifies most call sites. v7 -> v8: * Some versions of MinGW have no sys/mman.h header. Reported by Blue Swirl. v6 -> v7: * Adopt madvise() rather than posix_madvise() semantics for returning errors. * Use EINVAL in place of ENOTSUP. v5 -> v6: * Replace two leftover instances of POSIX_MADV_NORMAL with QEMU_MADV_INVALID. Spotted by Blue Swirl. v4 -> v5: * Introduce QEMU_MADV_INVALID, suggested by Alexander Graf. Note that this relies on -1 not being a valid advice value. v3 -> v4: * Eliminate #ifdefs at qemu_advise() call sites. Requested by Blue Swirl. This will currently break the check in kvm-all.c by calling madvise() with a supported flag, which will not fail. Ideas/patches welcome. v2 -> v3: * Reuse the *_MADV_* defines for QEMU_MADV_*. Suggested by Alexander Graf. * Add configure check for madvise(), too. Add defines to Makefile, not QEMU_CFLAGS. Convert all callers, untested. Suggested by Blue Swirl. * Keep Solaris' madvise() prototype around. Pointed out by Alexander Graf. * Display configure check results. v1 -> v2: * Don't rely on posix_madvise() availability, add qemu_madvise(). Suggested by Blue Swirl. Signed-off-by: Andreas Färber <afaerber@opensolaris.org> Cc: Blue Swirl <blauwirbel@gmail.com> Cc: Alexander Graf <agraf@suse.de> Cc: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
2010-09-25 13:26:05 +02:00
#define QEMU_MADV_INVALID -1
#if defined(CONFIG_MADVISE)
#define QEMU_MADV_WILLNEED MADV_WILLNEED
#define QEMU_MADV_DONTNEED MADV_DONTNEED
#ifdef MADV_DONTFORK
#define QEMU_MADV_DONTFORK MADV_DONTFORK
#else
#define QEMU_MADV_DONTFORK QEMU_MADV_INVALID
#endif
#ifdef MADV_MERGEABLE
#define QEMU_MADV_MERGEABLE MADV_MERGEABLE
#else
#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
#endif
#ifdef MADV_UNMERGEABLE
#define QEMU_MADV_UNMERGEABLE MADV_UNMERGEABLE
#else
#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID
#endif
#ifdef MADV_DODUMP
#define QEMU_MADV_DODUMP MADV_DODUMP
#else
#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
#endif
#ifdef MADV_DONTDUMP
#define QEMU_MADV_DONTDUMP MADV_DONTDUMP
#else
#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
#endif
#ifdef MADV_HUGEPAGE
#define QEMU_MADV_HUGEPAGE MADV_HUGEPAGE
#else
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
#endif
#ifdef MADV_NOHUGEPAGE
#define QEMU_MADV_NOHUGEPAGE MADV_NOHUGEPAGE
#else
#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
#endif
#ifdef MADV_REMOVE
#define QEMU_MADV_REMOVE MADV_REMOVE
#else
#define QEMU_MADV_REMOVE QEMU_MADV_DONTNEED
#endif
Introduce qemu_madvise() vl.c has a Sun-specific hack to supply a prototype for madvise(), but the call site has apparently moved to arch_init.c. Haiku doesn't implement madvise() in favor of posix_madvise(). OpenBSD and Solaris 10 don't implement posix_madvise() but madvise(). MinGW implements neither. Check for madvise() and posix_madvise() in configure and supply qemu_madvise() as wrapper. Prefer madvise() over posix_madvise() due to flag availability. Convert all callers to use qemu_madvise() and QEMU_MADV_*. Note that on Solaris the warning is fixed by moving the madvise() prototype, not by qemu_madvise() itself. It helps with porting though, and it simplifies most call sites. v7 -> v8: * Some versions of MinGW have no sys/mman.h header. Reported by Blue Swirl. v6 -> v7: * Adopt madvise() rather than posix_madvise() semantics for returning errors. * Use EINVAL in place of ENOTSUP. v5 -> v6: * Replace two leftover instances of POSIX_MADV_NORMAL with QEMU_MADV_INVALID. Spotted by Blue Swirl. v4 -> v5: * Introduce QEMU_MADV_INVALID, suggested by Alexander Graf. Note that this relies on -1 not being a valid advice value. v3 -> v4: * Eliminate #ifdefs at qemu_advise() call sites. Requested by Blue Swirl. This will currently break the check in kvm-all.c by calling madvise() with a supported flag, which will not fail. Ideas/patches welcome. v2 -> v3: * Reuse the *_MADV_* defines for QEMU_MADV_*. Suggested by Alexander Graf. * Add configure check for madvise(), too. Add defines to Makefile, not QEMU_CFLAGS. Convert all callers, untested. Suggested by Blue Swirl. * Keep Solaris' madvise() prototype around. Pointed out by Alexander Graf. * Display configure check results. v1 -> v2: * Don't rely on posix_madvise() availability, add qemu_madvise(). Suggested by Blue Swirl. Signed-off-by: Andreas Färber <afaerber@opensolaris.org> Cc: Blue Swirl <blauwirbel@gmail.com> Cc: Alexander Graf <agraf@suse.de> Cc: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
2010-09-25 13:26:05 +02:00
#elif defined(CONFIG_POSIX_MADVISE)
#define QEMU_MADV_WILLNEED POSIX_MADV_WILLNEED
#define QEMU_MADV_DONTNEED POSIX_MADV_DONTNEED
#define QEMU_MADV_DONTFORK QEMU_MADV_INVALID
#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID
#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
#define QEMU_MADV_REMOVE QEMU_MADV_DONTNEED
Introduce qemu_madvise() vl.c has a Sun-specific hack to supply a prototype for madvise(), but the call site has apparently moved to arch_init.c. Haiku doesn't implement madvise() in favor of posix_madvise(). OpenBSD and Solaris 10 don't implement posix_madvise() but madvise(). MinGW implements neither. Check for madvise() and posix_madvise() in configure and supply qemu_madvise() as wrapper. Prefer madvise() over posix_madvise() due to flag availability. Convert all callers to use qemu_madvise() and QEMU_MADV_*. Note that on Solaris the warning is fixed by moving the madvise() prototype, not by qemu_madvise() itself. It helps with porting though, and it simplifies most call sites. v7 -> v8: * Some versions of MinGW have no sys/mman.h header. Reported by Blue Swirl. v6 -> v7: * Adopt madvise() rather than posix_madvise() semantics for returning errors. * Use EINVAL in place of ENOTSUP. v5 -> v6: * Replace two leftover instances of POSIX_MADV_NORMAL with QEMU_MADV_INVALID. Spotted by Blue Swirl. v4 -> v5: * Introduce QEMU_MADV_INVALID, suggested by Alexander Graf. Note that this relies on -1 not being a valid advice value. v3 -> v4: * Eliminate #ifdefs at qemu_advise() call sites. Requested by Blue Swirl. This will currently break the check in kvm-all.c by calling madvise() with a supported flag, which will not fail. Ideas/patches welcome. v2 -> v3: * Reuse the *_MADV_* defines for QEMU_MADV_*. Suggested by Alexander Graf. * Add configure check for madvise(), too. Add defines to Makefile, not QEMU_CFLAGS. Convert all callers, untested. Suggested by Blue Swirl. * Keep Solaris' madvise() prototype around. Pointed out by Alexander Graf. * Display configure check results. v1 -> v2: * Don't rely on posix_madvise() availability, add qemu_madvise(). Suggested by Blue Swirl. Signed-off-by: Andreas Färber <afaerber@opensolaris.org> Cc: Blue Swirl <blauwirbel@gmail.com> Cc: Alexander Graf <agraf@suse.de> Cc: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
2010-09-25 13:26:05 +02:00
#else /* no-op */
#define QEMU_MADV_WILLNEED QEMU_MADV_INVALID
#define QEMU_MADV_DONTNEED QEMU_MADV_INVALID
#define QEMU_MADV_DONTFORK QEMU_MADV_INVALID
#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID
#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
#define QEMU_MADV_REMOVE QEMU_MADV_INVALID
Introduce qemu_madvise() vl.c has a Sun-specific hack to supply a prototype for madvise(), but the call site has apparently moved to arch_init.c. Haiku doesn't implement madvise() in favor of posix_madvise(). OpenBSD and Solaris 10 don't implement posix_madvise() but madvise(). MinGW implements neither. Check for madvise() and posix_madvise() in configure and supply qemu_madvise() as wrapper. Prefer madvise() over posix_madvise() due to flag availability. Convert all callers to use qemu_madvise() and QEMU_MADV_*. Note that on Solaris the warning is fixed by moving the madvise() prototype, not by qemu_madvise() itself. It helps with porting though, and it simplifies most call sites. v7 -> v8: * Some versions of MinGW have no sys/mman.h header. Reported by Blue Swirl. v6 -> v7: * Adopt madvise() rather than posix_madvise() semantics for returning errors. * Use EINVAL in place of ENOTSUP. v5 -> v6: * Replace two leftover instances of POSIX_MADV_NORMAL with QEMU_MADV_INVALID. Spotted by Blue Swirl. v4 -> v5: * Introduce QEMU_MADV_INVALID, suggested by Alexander Graf. Note that this relies on -1 not being a valid advice value. v3 -> v4: * Eliminate #ifdefs at qemu_advise() call sites. Requested by Blue Swirl. This will currently break the check in kvm-all.c by calling madvise() with a supported flag, which will not fail. Ideas/patches welcome. v2 -> v3: * Reuse the *_MADV_* defines for QEMU_MADV_*. Suggested by Alexander Graf. * Add configure check for madvise(), too. Add defines to Makefile, not QEMU_CFLAGS. Convert all callers, untested. Suggested by Blue Swirl. * Keep Solaris' madvise() prototype around. Pointed out by Alexander Graf. * Display configure check results. v1 -> v2: * Don't rely on posix_madvise() availability, add qemu_madvise(). Suggested by Blue Swirl. Signed-off-by: Andreas Färber <afaerber@opensolaris.org> Cc: Blue Swirl <blauwirbel@gmail.com> Cc: Alexander Graf <agraf@suse.de> Cc: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
2010-09-25 13:26:05 +02:00
#endif
#ifdef _WIN32
#define HAVE_CHARDEV_SERIAL 1
#elif defined(__linux__) || defined(__sun__) || defined(__FreeBSD__) \
|| defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \
|| defined(__GLIBC__) || defined(__APPLE__)
#define HAVE_CHARDEV_SERIAL 1
#endif
#if defined(__linux__) || defined(__FreeBSD__) || \
defined(__FreeBSD_kernel__) || defined(__DragonFly__)
#define HAVE_CHARDEV_PARPORT 1
#endif
#if defined(__HAIKU__)
#define SIGIO SIGPOLL
#endif
#if defined(CONFIG_LINUX)
#ifndef BUS_MCEERR_AR
#define BUS_MCEERR_AR 4
#endif
#ifndef BUS_MCEERR_AO
#define BUS_MCEERR_AO 5
#endif
#endif
#if defined(__linux__) && \
(defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) \
|| defined(__powerpc64__))
/* Use 2 MiB alignment so transparent hugepages can be used by KVM.
Valgrind does not support alignments larger than 1 MiB,
therefore we need special code which handles running on Valgrind. */
# define QEMU_VMALLOC_ALIGN (512 * 4096)
#elif defined(__linux__) && defined(__s390x__)
/* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
# define QEMU_VMALLOC_ALIGN (256 * 4096)
#elif defined(__linux__) && defined(__sparc__)
# define QEMU_VMALLOC_ALIGN MAX(qemu_real_host_page_size, SHMLBA)
#else
# define QEMU_VMALLOC_ALIGN qemu_real_host_page_size
#endif
#ifdef CONFIG_POSIX
struct qemu_signalfd_siginfo {
uint32_t ssi_signo; /* Signal number */
int32_t ssi_errno; /* Error number (unused) */
int32_t ssi_code; /* Signal code */
uint32_t ssi_pid; /* PID of sender */
uint32_t ssi_uid; /* Real UID of sender */
int32_t ssi_fd; /* File descriptor (SIGIO) */
uint32_t ssi_tid; /* Kernel timer ID (POSIX timers) */
uint32_t ssi_band; /* Band event (SIGIO) */
uint32_t ssi_overrun; /* POSIX timer overrun count */
uint32_t ssi_trapno; /* Trap number that caused signal */
int32_t ssi_status; /* Exit status or signal (SIGCHLD) */
int32_t ssi_int; /* Integer sent by sigqueue(2) */
uint64_t ssi_ptr; /* Pointer sent by sigqueue(2) */
uint64_t ssi_utime; /* User CPU time consumed (SIGCHLD) */
uint64_t ssi_stime; /* System CPU time consumed (SIGCHLD) */
uint64_t ssi_addr; /* Address that generated signal
(for hardware-generated signals) */
uint8_t pad[48]; /* Pad size to 128 bytes (allow for
additional fields in the future) */
};
int qemu_signalfd(const sigset_t *mask);
void sigaction_invoke(struct sigaction *action,
struct qemu_signalfd_siginfo *info);
#endif
Introduce qemu_madvise() vl.c has a Sun-specific hack to supply a prototype for madvise(), but the call site has apparently moved to arch_init.c. Haiku doesn't implement madvise() in favor of posix_madvise(). OpenBSD and Solaris 10 don't implement posix_madvise() but madvise(). MinGW implements neither. Check for madvise() and posix_madvise() in configure and supply qemu_madvise() as wrapper. Prefer madvise() over posix_madvise() due to flag availability. Convert all callers to use qemu_madvise() and QEMU_MADV_*. Note that on Solaris the warning is fixed by moving the madvise() prototype, not by qemu_madvise() itself. It helps with porting though, and it simplifies most call sites. v7 -> v8: * Some versions of MinGW have no sys/mman.h header. Reported by Blue Swirl. v6 -> v7: * Adopt madvise() rather than posix_madvise() semantics for returning errors. * Use EINVAL in place of ENOTSUP. v5 -> v6: * Replace two leftover instances of POSIX_MADV_NORMAL with QEMU_MADV_INVALID. Spotted by Blue Swirl. v4 -> v5: * Introduce QEMU_MADV_INVALID, suggested by Alexander Graf. Note that this relies on -1 not being a valid advice value. v3 -> v4: * Eliminate #ifdefs at qemu_advise() call sites. Requested by Blue Swirl. This will currently break the check in kvm-all.c by calling madvise() with a supported flag, which will not fail. Ideas/patches welcome. v2 -> v3: * Reuse the *_MADV_* defines for QEMU_MADV_*. Suggested by Alexander Graf. * Add configure check for madvise(), too. Add defines to Makefile, not QEMU_CFLAGS. Convert all callers, untested. Suggested by Blue Swirl. * Keep Solaris' madvise() prototype around. Pointed out by Alexander Graf. * Display configure check results. v1 -> v2: * Don't rely on posix_madvise() availability, add qemu_madvise(). Suggested by Blue Swirl. Signed-off-by: Andreas Färber <afaerber@opensolaris.org> Cc: Blue Swirl <blauwirbel@gmail.com> Cc: Alexander Graf <agraf@suse.de> Cc: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
2010-09-25 13:26:05 +02:00
int qemu_madvise(void *addr, size_t len, int advice);
int qemu_mprotect_rw(void *addr, size_t size);
int qemu_mprotect_rwx(void *addr, size_t size);
int qemu_mprotect_none(void *addr, size_t size);
Introduce qemu_madvise() vl.c has a Sun-specific hack to supply a prototype for madvise(), but the call site has apparently moved to arch_init.c. Haiku doesn't implement madvise() in favor of posix_madvise(). OpenBSD and Solaris 10 don't implement posix_madvise() but madvise(). MinGW implements neither. Check for madvise() and posix_madvise() in configure and supply qemu_madvise() as wrapper. Prefer madvise() over posix_madvise() due to flag availability. Convert all callers to use qemu_madvise() and QEMU_MADV_*. Note that on Solaris the warning is fixed by moving the madvise() prototype, not by qemu_madvise() itself. It helps with porting though, and it simplifies most call sites. v7 -> v8: * Some versions of MinGW have no sys/mman.h header. Reported by Blue Swirl. v6 -> v7: * Adopt madvise() rather than posix_madvise() semantics for returning errors. * Use EINVAL in place of ENOTSUP. v5 -> v6: * Replace two leftover instances of POSIX_MADV_NORMAL with QEMU_MADV_INVALID. Spotted by Blue Swirl. v4 -> v5: * Introduce QEMU_MADV_INVALID, suggested by Alexander Graf. Note that this relies on -1 not being a valid advice value. v3 -> v4: * Eliminate #ifdefs at qemu_advise() call sites. Requested by Blue Swirl. This will currently break the check in kvm-all.c by calling madvise() with a supported flag, which will not fail. Ideas/patches welcome. v2 -> v3: * Reuse the *_MADV_* defines for QEMU_MADV_*. Suggested by Alexander Graf. * Add configure check for madvise(), too. Add defines to Makefile, not QEMU_CFLAGS. Convert all callers, untested. Suggested by Blue Swirl. * Keep Solaris' madvise() prototype around. Pointed out by Alexander Graf. * Display configure check results. v1 -> v2: * Don't rely on posix_madvise() availability, add qemu_madvise(). Suggested by Blue Swirl. Signed-off-by: Andreas Färber <afaerber@opensolaris.org> Cc: Blue Swirl <blauwirbel@gmail.com> Cc: Alexander Graf <agraf@suse.de> Cc: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
2010-09-25 13:26:05 +02:00
/*
* Don't introduce new usage of this function, prefer the following
* qemu_open/qemu_create that take an "Error **errp"
*/
int qemu_open_old(const char *name, int flags, ...);
int qemu_open(const char *name, int flags, Error **errp);
int qemu_create(const char *name, int flags, mode_t mode, Error **errp);
int qemu_close(int fd);
int qemu_unlink(const char *name);
#ifndef _WIN32
int qemu_dup_flags(int fd, int flags);
int qemu_dup(int fd);
int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive);
int qemu_unlock_fd(int fd, int64_t start, int64_t len);
int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive);
bool qemu_has_ofd_lock(void);
#endif
#if defined(__HAIKU__) && defined(__i386__)
#define FMT_pid "%ld"
#elif defined(WIN64)
#define FMT_pid "%" PRId64
#else
#define FMT_pid "%d"
#endif
util: add qemu_write_pidfile() There are variants of qemu_create_pidfile() in qemu-pr-helper and qemu-ga. Let's have a common implementation in libqemuutil. The code is initially based from pr-helper write_pidfile(), with various improvements and suggestions from Daniel Berrangé: QEMU will leave the pidfile existing on disk when it exits which initially made me think it avoids the deletion race. The app managing QEMU, however, may well delete the pidfile after it has seen QEMU exit, and even if the app locks the pidfile before deleting it, there is still a race. eg consider the following sequence QEMU 1 libvirtd QEMU 2 1. lock(pidfile) 2. exit() 3. open(pidfile) 4. lock(pidfile) 5. open(pidfile) 6. unlink(pidfile) 7. close(pidfile) 8. lock(pidfile) IOW, at step 8 the new QEMU has successfully acquired the lock, but the pidfile no longer exists on disk because it was deleted after the original QEMU exited. While we could just say no external app should ever delete the pidfile, I don't think that is satisfactory as people don't read docs, and admins don't like stale pidfiles being left around on disk. To make this robust, I think we might want to copy libvirt's approach to pidfile acquisition which runs in a loop and checks that the file on disk /after/ acquiring the lock matches the file that was locked. Then we could in fact safely let QEMU delete its own pidfiles on clean exit.. Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com> Message-Id: <20180831145314.14736-2-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2018-08-31 16:53:12 +02:00
bool qemu_write_pidfile(const char *pidfile, Error **errp);
int qemu_get_thread_id(void);
#ifndef CONFIG_IOVEC
struct iovec {
void *iov_base;
size_t iov_len;
};
/*
* Use the same value as Linux for now.
*/
#define IOV_MAX 1024
ssize_t readv(int fd, const struct iovec *iov, int iov_cnt);
ssize_t writev(int fd, const struct iovec *iov, int iov_cnt);
#endif
#ifdef _WIN32
static inline void qemu_timersub(const struct timeval *val1,
const struct timeval *val2,
struct timeval *res)
{
res->tv_sec = val1->tv_sec - val2->tv_sec;
if (val1->tv_usec < val2->tv_usec) {
res->tv_sec--;
res->tv_usec = val1->tv_usec - val2->tv_usec + 1000 * 1000;
} else {
res->tv_usec = val1->tv_usec - val2->tv_usec;
}
}
#else
#define qemu_timersub timersub
#endif
void qemu_set_cloexec(int fd);
/* Starting on QEMU 2.5, qemu_hw_version() returns "2.5+" by default
* instead of QEMU_VERSION, so setting hw_version on MachineClass
* is no longer mandatory.
*
* Do NOT change this string, or it will break compatibility on all
* machine classes that don't set hw_version.
*/
#define QEMU_HW_VERSION "2.5+"
/* QEMU "hardware version" setting. Used to replace code that exposed
* QEMU_VERSION to guests in the past and need to keep compatibility.
* Do not use qemu_hw_version() in new code.
*/
void qemu_set_hw_version(const char *);
const char *qemu_hw_version(void);
void fips_set_state(bool requested);
bool fips_get_state(void);
/* Return a dynamically allocated pathname denoting a file or directory that is
* appropriate for storing local state.
*
* @relative_pathname need not start with a directory separator; one will be
* added automatically.
*
* The caller is responsible for releasing the value returned with g_free()
* after use.
*/
char *qemu_get_local_state_pathname(const char *relative_pathname);
/* Find program directory, and save it for later usage with
* qemu_get_exec_dir().
* Try OS specific API first, if not working, parse from argv0. */
void qemu_init_exec_dir(const char *argv0);
/* Get the saved exec dir. */
const char *qemu_get_exec_dir(void);
/**
* qemu_getauxval:
* @type: the auxiliary vector key to lookup
*
* Search the auxiliary vector for @type, returning the value
* or 0 if @type is not present.
*/
unsigned long qemu_getauxval(unsigned long type);
void qemu_set_tty_echo(int fd, bool echo);
mem-prealloc: reduce large guest start-up and migration time. Using "-mem-prealloc" option for a large guest leads to higher guest start-up and migration time. This is because with "-mem-prealloc" option qemu tries to map every guest page (create address translations), and make sure the pages are available during runtime. virsh/libvirt by default, seems to use "-mem-prealloc" option in case the guest is configured to use huge pages. The patch tries to map all guest pages simultaneously by spawning multiple threads. Currently limiting the change to QEMU library functions on POSIX compliant host only, as we are not sure if the problem exists on win32. Below are some stats with "-mem-prealloc" option for guest configured to use huge pages. ------------------------------------------------------------------------ Idle Guest | Start-up time | Migration time ------------------------------------------------------------------------ Guest stats with 2M HugePage usage - single threaded (existing code) ------------------------------------------------------------------------ 64 Core - 4TB | 54m11.796s | 75m43.843s 64 Core - 1TB | 8m56.576s | 14m29.049s 64 Core - 256GB | 2m11.245s | 3m26.598s ------------------------------------------------------------------------ Guest stats with 2M HugePage usage - map guest pages using 8 threads ------------------------------------------------------------------------ 64 Core - 4TB | 5m1.027s | 34m10.565s 64 Core - 1TB | 1m10.366s | 8m28.188s 64 Core - 256GB | 0m19.040s | 2m10.148s ----------------------------------------------------------------------- Guest stats with 2M HugePage usage - map guest pages using 16 threads ----------------------------------------------------------------------- 64 Core - 4TB | 1m58.970s | 31m43.400s 64 Core - 1TB | 0m39.885s | 7m55.289s 64 Core - 256GB | 0m11.960s | 2m0.135s ----------------------------------------------------------------------- Changed in v2: - modify number of memset threads spawned to min(smp_cpus, 16). - removed 64GB memory restriction for spawning memset threads. Changed in v3: - limit number of threads spawned based on min(sysconf(_SC_NPROCESSORS_ONLN), 16, smp_cpus) - implement memset thread specific siglongjmp in SIGBUS signal_handler. Changed in v4 - remove sigsetjmp/siglongjmp and SIGBUS unblock/block for main thread as main thread no longer touches any pages. - simplify code my returning memset_thread_failed status from touch_all_pages. Signed-off-by: Jitendra Kolhe <jitendra.kolhe@hpe.com> Message-Id: <1487907103-32350-1-git-send-email-jitendra.kolhe@hpe.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-24 04:31:43 +01:00
void os_mem_prealloc(int fd, char *area, size_t sz, int smp_cpus,
Error **errp);
/**
* qemu_get_pid_name:
* @pid: pid of a process
*
* For given @pid fetch its name. Caller is responsible for
* freeing the string when no longer needed.
* Returns allocated string on success, NULL on failure.
*/
char *qemu_get_pid_name(pid_t pid);
/**
* qemu_fork:
*
* A version of fork that avoids signal handler race
* conditions that can lead to child process getting
* signals that are otherwise only expected by the
* parent. It also resets all signal handlers to the
* default settings.
*
* Returns 0 to child process, pid number to parent
* or -1 on failure.
*/
pid_t qemu_fork(Error **errp);
/* Using intptr_t ensures that qemu_*_page_mask is sign-extended even
* when intptr_t is 32-bit and we are aligning a long long.
*/
extern uintptr_t qemu_real_host_page_size;
extern intptr_t qemu_real_host_page_mask;
extern int qemu_icache_linesize;
extern int qemu_icache_linesize_log;
extern int qemu_dcache_linesize;
extern int qemu_dcache_linesize_log;
qemu-io: Add generic function for reinitializing optind. On FreeBSD 11.2: $ nbdkit memory size=1M --run './qemu-io -f raw -c "aio_write 0 512" $nbd' Parsing error: non-numeric argument, or extraneous/unrecognized suffix -- aio_write After main option parsing, we reinitialize optind so we can parse each command. However reinitializing optind to 0 does not work on FreeBSD. What happens when you do this is optind remains 0 after the option parsing loop, and the result is we try to parse argv[optind] == argv[0] == "aio_write" as if it was the first parameter. The FreeBSD manual page says: In order to use getopt() to evaluate multiple sets of arguments, or to evaluate a single set of arguments multiple times, the variable optreset must be set to 1 before the second and each additional set of calls to getopt(), and the variable optind must be reinitialized. (From the rest of the man page it is clear that optind must be reinitialized to 1). The glibc man page says: A program that scans multiple argument vectors, or rescans the same vector more than once, and wants to make use of GNU extensions such as '+' and '-' at the start of optstring, or changes the value of POSIXLY_CORRECT between scans, must reinitialize getopt() by resetting optind to 0, rather than the traditional value of 1. (Resetting to 0 forces the invocation of an internal initialization routine that rechecks POSIXLY_CORRECT and checks for GNU extensions in optstring.) This commit introduces an OS-portability function called qemu_reset_optind which provides a way of resetting optind that works on FreeBSD and platforms that use optreset, while keeping it the same as now on other platforms. Note that the qemu codebase sets optind in many other places, but in those other places it's setting a local variable and not using getopt. This change is only needed in places where we are using getopt and the associated global variable optind. Signed-off-by: Richard W.M. Jones <rjones@redhat.com> Message-id: 20190118101114.11759-2-rjones@redhat.com Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Signed-off-by: Max Reitz <mreitz@redhat.com>
2019-01-18 11:11:14 +01:00
/*
* After using getopt or getopt_long, if you need to parse another set
* of options, then you must reset optind. Unfortunately the way to
* do this varies between implementations of getopt.
*/
static inline void qemu_reset_optind(void)
{
#ifdef HAVE_OPTRESET
optind = 1;
optreset = 1;
#else
optind = 0;
#endif
}
/**
* qemu_get_host_name:
* @errp: Error object
*
* Operating system agnostic way of querying host name.
*
* Returns allocated hostname (caller should free), NULL on failure.
*/
char *qemu_get_host_name(Error **errp);
/**
* qemu_get_host_physmem:
*
* Operating system agnostic way of querying host memory.
*
* Returns amount of physical memory on the system. This is purely
* advisery and may return 0 if we can't work it out. At the other
* end we saturate to SIZE_MAX if you are lucky enough to have that
* much memory.
*/
size_t qemu_get_host_physmem(void);
/*
* Toggle write/execute on the pages marked MAP_JIT
* for the current thread.
*/
#if defined(MAC_OS_VERSION_11_0) && \
MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
static inline void qemu_thread_jit_execute(void)
{
if (__builtin_available(macOS 11.0, *)) {
pthread_jit_write_protect_np(true);
}
}
static inline void qemu_thread_jit_write(void)
{
if (__builtin_available(macOS 11.0, *)) {
pthread_jit_write_protect_np(false);
}
}
#else
static inline void qemu_thread_jit_write(void) {}
static inline void qemu_thread_jit_execute(void) {}
#endif
/**
* Platforms which do not support system() return ENOSYS
*/
#ifndef HAVE_SYSTEM_FUNCTION
#define system platform_does_not_support_system
static inline int platform_does_not_support_system(const char *command)
{
errno = ENOSYS;
return -1;
}
#endif /* !HAVE_SYSTEM_FUNCTION */
#ifdef __cplusplus
}
#endif
#endif