606600a176
If a socket is closed it remains in TIME_WAIT state for some time. On operating systems using BSD sockets the endpoint of the socket may not be reused while in this state unless SO_REUSEADDR was set on the socket. On windows on the other hand the default behaviour is to allow reuse (i.e. identical to SO_REUSEADDR on other operating systems) and setting SO_REUSEADDR on a socket allows it to be bound to a endpoint even if the endpoint is already used by another socket independently of the other sockets state. This can even result in undefined behaviour. Many sockets used by QEMU should not block the use of their endpoint after being closed while they are still in TIME_WAIT state. Currently QEMU sets SO_REUSEADDR for such sockets, which can lead to problems on Windows. This patch introduces the function socket_set_fast_reuse that should be used instead of setting SO_REUSEADDR when fast socket reuse is desired and behaves correctly on all operating systems. As a failure of this function can only be caused by bad QEMU internal errors, an assertion handles these situations. The return value is still passed on, to minimize changes in client code and prevent unused variable warnings if NDEBUG is defined. Signed-off-by: Sebastian Ottlik <ottlik@fzi.de> Reviewed-by: Eric Blake <eblake@redhat.com> Signed-off-by: Stefan Weil <sw@weilnetz.de>
254 lines
6.4 KiB
C
254 lines
6.4 KiB
C
/*
|
|
* os-posix-lib.c
|
|
*
|
|
* Copyright (c) 2003-2008 Fabrice Bellard
|
|
* Copyright (c) 2010 Red Hat, Inc.
|
|
*
|
|
* QEMU library functions on POSIX which are shared between QEMU and
|
|
* the QEMU tools.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
*/
|
|
|
|
/* The following block of code temporarily renames the daemon() function so the
|
|
compiler does not see the warning associated with it in stdlib.h on OSX */
|
|
#ifdef __APPLE__
|
|
#define daemon qemu_fake_daemon_function
|
|
#include <stdlib.h>
|
|
#undef daemon
|
|
extern int daemon(int, int);
|
|
#endif
|
|
|
|
#if defined(__linux__) && (defined(__x86_64__) || defined(__arm__))
|
|
/* Use 2 MiB alignment so transparent hugepages can be used by KVM.
|
|
Valgrind does not support alignments larger than 1 MiB,
|
|
therefore we need special code which handles running on Valgrind. */
|
|
# define QEMU_VMALLOC_ALIGN (512 * 4096)
|
|
#elif defined(__linux__) && defined(__s390x__)
|
|
/* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
|
|
# define QEMU_VMALLOC_ALIGN (256 * 4096)
|
|
#else
|
|
# define QEMU_VMALLOC_ALIGN getpagesize()
|
|
#endif
|
|
|
|
#include <glib/gprintf.h>
|
|
|
|
#include "config-host.h"
|
|
#include "sysemu/sysemu.h"
|
|
#include "trace.h"
|
|
#include "qemu/sockets.h"
|
|
#include <sys/mman.h>
|
|
|
|
#ifdef CONFIG_LINUX
|
|
#include <sys/syscall.h>
|
|
#endif
|
|
|
|
int qemu_get_thread_id(void)
|
|
{
|
|
#if defined(__linux__)
|
|
return syscall(SYS_gettid);
|
|
#else
|
|
return getpid();
|
|
#endif
|
|
}
|
|
|
|
int qemu_daemon(int nochdir, int noclose)
|
|
{
|
|
return daemon(nochdir, noclose);
|
|
}
|
|
|
|
void *qemu_oom_check(void *ptr)
|
|
{
|
|
if (ptr == NULL) {
|
|
fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
|
|
abort();
|
|
}
|
|
return ptr;
|
|
}
|
|
|
|
void *qemu_memalign(size_t alignment, size_t size)
|
|
{
|
|
void *ptr;
|
|
#if defined(_POSIX_C_SOURCE) && !defined(__sun__)
|
|
int ret;
|
|
ret = posix_memalign(&ptr, alignment, size);
|
|
if (ret != 0) {
|
|
fprintf(stderr, "Failed to allocate %zu B: %s\n",
|
|
size, strerror(ret));
|
|
abort();
|
|
}
|
|
#elif defined(CONFIG_BSD)
|
|
ptr = qemu_oom_check(valloc(size));
|
|
#else
|
|
ptr = qemu_oom_check(memalign(alignment, size));
|
|
#endif
|
|
trace_qemu_memalign(alignment, size, ptr);
|
|
return ptr;
|
|
}
|
|
|
|
/* alloc shared memory pages */
|
|
void *qemu_anon_ram_alloc(size_t size)
|
|
{
|
|
size_t align = QEMU_VMALLOC_ALIGN;
|
|
size_t total = size + align - getpagesize();
|
|
void *ptr = mmap(0, total, PROT_READ | PROT_WRITE,
|
|
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
|
size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
|
|
|
|
if (ptr == MAP_FAILED) {
|
|
return NULL;
|
|
}
|
|
|
|
ptr += offset;
|
|
total -= offset;
|
|
|
|
if (offset > 0) {
|
|
munmap(ptr - offset, offset);
|
|
}
|
|
if (total > size) {
|
|
munmap(ptr + size, total - size);
|
|
}
|
|
|
|
trace_qemu_anon_ram_alloc(size, ptr);
|
|
return ptr;
|
|
}
|
|
|
|
void qemu_vfree(void *ptr)
|
|
{
|
|
trace_qemu_vfree(ptr);
|
|
free(ptr);
|
|
}
|
|
|
|
void qemu_anon_ram_free(void *ptr, size_t size)
|
|
{
|
|
trace_qemu_anon_ram_free(ptr, size);
|
|
if (ptr) {
|
|
munmap(ptr, size);
|
|
}
|
|
}
|
|
|
|
void qemu_set_block(int fd)
|
|
{
|
|
int f;
|
|
f = fcntl(fd, F_GETFL);
|
|
fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
|
|
}
|
|
|
|
void qemu_set_nonblock(int fd)
|
|
{
|
|
int f;
|
|
f = fcntl(fd, F_GETFL);
|
|
fcntl(fd, F_SETFL, f | O_NONBLOCK);
|
|
}
|
|
|
|
int socket_set_fast_reuse(int fd)
|
|
{
|
|
int val = 1, ret;
|
|
|
|
ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
|
|
(const char *)&val, sizeof(val));
|
|
|
|
assert(ret == 0);
|
|
|
|
return ret;
|
|
}
|
|
|
|
void qemu_set_cloexec(int fd)
|
|
{
|
|
int f;
|
|
f = fcntl(fd, F_GETFD);
|
|
fcntl(fd, F_SETFD, f | FD_CLOEXEC);
|
|
}
|
|
|
|
/*
|
|
* Creates a pipe with FD_CLOEXEC set on both file descriptors
|
|
*/
|
|
int qemu_pipe(int pipefd[2])
|
|
{
|
|
int ret;
|
|
|
|
#ifdef CONFIG_PIPE2
|
|
ret = pipe2(pipefd, O_CLOEXEC);
|
|
if (ret != -1 || errno != ENOSYS) {
|
|
return ret;
|
|
}
|
|
#endif
|
|
ret = pipe(pipefd);
|
|
if (ret == 0) {
|
|
qemu_set_cloexec(pipefd[0]);
|
|
qemu_set_cloexec(pipefd[1]);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int qemu_utimens(const char *path, const struct timespec *times)
|
|
{
|
|
struct timeval tv[2], tv_now;
|
|
struct stat st;
|
|
int i;
|
|
#ifdef CONFIG_UTIMENSAT
|
|
int ret;
|
|
|
|
ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW);
|
|
if (ret != -1 || errno != ENOSYS) {
|
|
return ret;
|
|
}
|
|
#endif
|
|
/* Fallback: use utimes() instead of utimensat() */
|
|
|
|
/* happy if special cases */
|
|
if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) {
|
|
return 0;
|
|
}
|
|
if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) {
|
|
return utimes(path, NULL);
|
|
}
|
|
|
|
/* prepare for hard cases */
|
|
if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) {
|
|
gettimeofday(&tv_now, NULL);
|
|
}
|
|
if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) {
|
|
stat(path, &st);
|
|
}
|
|
|
|
for (i = 0; i < 2; i++) {
|
|
if (times[i].tv_nsec == UTIME_NOW) {
|
|
tv[i].tv_sec = tv_now.tv_sec;
|
|
tv[i].tv_usec = tv_now.tv_usec;
|
|
} else if (times[i].tv_nsec == UTIME_OMIT) {
|
|
tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime;
|
|
tv[i].tv_usec = 0;
|
|
} else {
|
|
tv[i].tv_sec = times[i].tv_sec;
|
|
tv[i].tv_usec = times[i].tv_nsec / 1000;
|
|
}
|
|
}
|
|
|
|
return utimes(path, &tv[0]);
|
|
}
|
|
|
|
char *
|
|
qemu_get_local_state_pathname(const char *relative_pathname)
|
|
{
|
|
return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR,
|
|
relative_pathname);
|
|
}
|