diff --git a/configure b/configure index 4627d2c1eb..8c2f90b312 100755 --- a/configure +++ b/configure @@ -3800,8 +3800,8 @@ if compile_prog "" "" ; then epoll=yes fi -# epoll_create1 and epoll_pwait are later additions -# so we must check separately for their presence +# epoll_create1 is a later addition +# so we must check separately for its presence epoll_create1=no cat > $TMPC << EOF #include @@ -3823,20 +3823,6 @@ if compile_prog "" "" ; then epoll_create1=yes fi -epoll_pwait=no -cat > $TMPC << EOF -#include - -int main(void) -{ - epoll_pwait(0, 0, 0, 0, 0); - return 0; -} -EOF -if compile_prog "" "" ; then - epoll_pwait=yes -fi - # check for sendfile support sendfile=no cat > $TMPC << EOF @@ -4528,6 +4514,19 @@ if compile_prog "" "" ; then have_fsxattr=yes fi +########################################## +# check if rtnetlink.h exists and is useful +have_rtnetlink=no +cat > $TMPC << EOF +#include +int main(void) { + return IFLA_PROTO_DOWN; +} +EOF +if compile_prog "" "" ; then + have_rtnetlink=yes +fi + ################################################# # Sparc implicitly links with --relax, which is # incompatible with -r, so --no-relax should be @@ -5135,9 +5134,6 @@ fi if test "$epoll_create1" = "yes" ; then echo "CONFIG_EPOLL_CREATE1=y" >> $config_host_mak fi -if test "$epoll_pwait" = "yes" ; then - echo "CONFIG_EPOLL_PWAIT=y" >> $config_host_mak -fi if test "$sendfile" = "yes" ; then echo "CONFIG_SENDFILE=y" >> $config_host_mak fi @@ -5482,6 +5478,10 @@ if test "$rdma" = "yes" ; then echo "CONFIG_RDMA=y" >> $config_host_mak fi +if test "$have_rtnetlink" = "yes" ; then + echo "CONFIG_RTNETLINK=y" >> $config_host_mak +fi + # Hold two types of flag: # CONFIG_THREAD_SETNAME_BYTHREAD - we've got a way of setting the name on # a thread we have a handle to diff --git a/gdbstub.c b/gdbstub.c index 9d3951336f..5da66f1794 100644 --- a/gdbstub.c +++ b/gdbstub.c @@ -1493,19 +1493,6 @@ void gdb_exit(CPUArchState *env, int code) } #ifdef CONFIG_USER_ONLY -int -gdb_queuesig (void) -{ - GDBState *s; - - s = gdbserver_state; - - if (gdbserver_fd < 0 || s->fd < 0) - return 0; - else - return 1; -} - int gdb_handlesig(CPUState *cpu, int sig) { diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h index 8e3f8d8176..f9708bbcd6 100644 --- a/include/exec/gdbstub.h +++ b/include/exec/gdbstub.h @@ -48,7 +48,6 @@ int use_gdb_syscalls(void); void gdb_set_stop_cpu(CPUState *cpu); void gdb_exit(CPUArchState *, int); #ifdef CONFIG_USER_ONLY -int gdb_queuesig (void); int gdb_handlesig(CPUState *, int); void gdb_signalled(CPUArchState *, int); void gdbserver_fork(CPUState *); diff --git a/linux-user/host/x86_64/safe-syscall.inc.S b/linux-user/host/x86_64/safe-syscall.inc.S index dde434c8d7..e09368d450 100644 --- a/linux-user/host/x86_64/safe-syscall.inc.S +++ b/linux-user/host/x86_64/safe-syscall.inc.S @@ -24,6 +24,7 @@ * -1-and-errno-set convention is done by the calling wrapper. */ safe_syscall_base: + .cfi_startproc /* This saves a frame pointer and aligns the stack for the syscall. * (It's unclear if the syscall ABI has the same stack alignment * requirements as the userspace function call ABI, but better safe than @@ -31,6 +32,8 @@ safe_syscall_base: * does not list any ABI differences regarding stack alignment.) */ push %rbp + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset rbp, 0 /* The syscall calling convention isn't the same as the * C one: @@ -70,12 +73,19 @@ safe_syscall_start: safe_syscall_end: /* code path for having successfully executed the syscall */ pop %rbp + .cfi_remember_state + .cfi_def_cfa_offset 8 + .cfi_restore rbp ret return_ERESTARTSYS: /* code path when we didn't execute the syscall */ + .cfi_restore_state mov $-TARGET_ERESTARTSYS, %rax pop %rbp + .cfi_def_cfa_offset 8 + .cfi_restore rbp ret + .cfi_endproc .size safe_syscall_base, .-safe_syscall_base diff --git a/linux-user/main.c b/linux-user/main.c index 8a11d0219e..f8a8764ae9 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -131,7 +131,7 @@ void fork_end(int child) Discard information about the parent threads. */ CPU_FOREACH_SAFE(cpu, next_cpu) { if (cpu != thread_cpu) { - QTAILQ_REMOVE(&cpus, thread_cpu, node); + QTAILQ_REMOVE(&cpus, cpu, node); } } pending_cpus = 0; @@ -3795,14 +3795,7 @@ void stop_all_tasks(void) /* Assumes contents are already zeroed. */ void init_task_state(TaskState *ts) { - int i; - ts->used = 1; - ts->first_free = ts->sigqueue_table; - for (i = 0; i < MAX_SIGQUEUE_SIZE - 1; i++) { - ts->sigqueue_table[i].next = &ts->sigqueue_table[i + 1]; - } - ts->sigqueue_table[i].next = NULL; } CPUArchState *cpu_copy(CPUArchState *env) diff --git a/linux-user/qemu.h b/linux-user/qemu.h index f09b750bbf..56f29c35b5 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -78,16 +78,9 @@ struct vm86_saved_state { #define MAX_SIGQUEUE_SIZE 1024 -struct sigqueue { - struct sigqueue *next; - target_siginfo_t info; -}; - struct emulated_sigtable { int pending; /* true if signal is pending */ - struct sigqueue *first; - struct sigqueue info; /* in order to always have memory for the - first signal, we put it here */ + target_siginfo_t info; }; /* NOTE: we force a big alignment so that the stack stored after is @@ -123,14 +116,32 @@ typedef struct TaskState { #endif uint32_t stack_base; int used; /* non zero if used */ - bool sigsegv_blocked; /* SIGSEGV blocked by guest */ struct image_info *info; struct linux_binprm *bprm; + struct emulated_sigtable sync_signal; struct emulated_sigtable sigtab[TARGET_NSIG]; - struct sigqueue sigqueue_table[MAX_SIGQUEUE_SIZE]; /* siginfo queue */ - struct sigqueue *first_free; /* first free siginfo queue entry */ - int signal_pending; /* non zero if a signal may be pending */ + /* This thread's signal mask, as requested by the guest program. + * The actual signal mask of this thread may differ: + * + we don't let SIGSEGV and SIGBUS be blocked while running guest code + * + sometimes we block all signals to avoid races + */ + sigset_t signal_mask; + /* The signal mask imposed by a guest sigsuspend syscall, if we are + * currently in the middle of such a syscall + */ + sigset_t sigsuspend_mask; + /* Nonzero if we're leaving a sigsuspend and sigsuspend_mask is valid. */ + int in_sigsuspend; + + /* Nonzero if process_pending_signals() needs to do something (either + * handle a pending signal or unblock signals). + * This flag is written from a signal handler so should be accessed via + * the atomic_read() and atomic_write() functions. (It is not accessed + * from multiple threads.) + */ + int signal_pending; + } __attribute__((aligned(16))) TaskState; extern char *exec_path; @@ -184,7 +195,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, void gemu_log(const char *fmt, ...) GCC_FMT_ATTR(1, 2); extern THREAD CPUState *thread_cpu; void cpu_loop(CPUArchState *env); -char *target_strerror(int err); +const char *target_strerror(int err); int get_osversion(void); void init_qemu_uname_release(void); void fork_start(void); @@ -235,6 +246,12 @@ unsigned long init_guest_space(unsigned long host_start, * It's also OK to implement these with safe_syscall, though it will be * a little less efficient if a signal is delivered at the 'wrong' moment. * + * Some non-interruptible syscalls need to be handled using block_signals() + * to block signals for the duration of the syscall. This mainly applies + * to code which needs to modify the data structures used by the + * host_signal_handler() function and the functions it calls, including + * all syscalls which change the thread's signal mask. + * * (2) Interruptible syscalls * * These are guest syscalls that can be interrupted by signals and @@ -266,6 +283,8 @@ unsigned long init_guest_space(unsigned long host_start, * you make in the implementation returns either -TARGET_ERESTARTSYS or * EINTR though.) * + * block_signals() cannot be used for interruptible syscalls. + * * * How and why the safe_syscall implementation works: * @@ -352,6 +371,25 @@ long do_sigreturn(CPUArchState *env); long do_rt_sigreturn(CPUArchState *env); abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp); int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset); +/** + * block_signals: block all signals while handling this guest syscall + * + * Block all signals, and arrange that the signal mask is returned to + * its correct value for the guest before we resume execution of guest code. + * If this function returns non-zero, then the caller should immediately + * return -TARGET_ERESTARTSYS to the main loop, which will take the pending + * signal and restart execution of the syscall. + * If block_signals() returns zero, then the caller can continue with + * emulation of the system call knowing that no signals can be taken + * (and therefore that no race conditions will result). + * This should only be called once, because if it is called a second time + * it will always return non-zero. (Think of it like a mutex that can't + * be recursively locked.) + * Signals will be unblocked again by process_pending_signals(). + * + * Return value: non-zero if there was a pending signal, zero if not. + */ +int block_signals(void); /* Returns non zero if signal pending */ #ifdef TARGET_I386 /* vm86.c */ diff --git a/linux-user/signal.c b/linux-user/signal.c index 8090b4de1f..61c1145446 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -17,6 +17,7 @@ * along with this program; if not, see . */ #include "qemu/osdep.h" +#include "qemu/bitops.h" #include #include @@ -190,125 +191,213 @@ void target_to_host_old_sigset(sigset_t *sigset, target_to_host_sigset(sigset, &d); } +int block_signals(void) +{ + TaskState *ts = (TaskState *)thread_cpu->opaque; + sigset_t set; + int pending; + + /* It's OK to block everything including SIGSEGV, because we won't + * run any further guest code before unblocking signals in + * process_pending_signals(). + */ + sigfillset(&set); + sigprocmask(SIG_SETMASK, &set, 0); + + pending = atomic_xchg(&ts->signal_pending, 1); + + return pending; +} + /* Wrapper for sigprocmask function * Emulates a sigprocmask in a safe way for the guest. Note that set and oldset - * are host signal set, not guest ones. This wraps the sigprocmask host calls - * that should be protected (calls originated from guest) + * are host signal set, not guest ones. Returns -TARGET_ERESTARTSYS if + * a signal was already pending and the syscall must be restarted, or + * 0 on success. + * If set is NULL, this is guaranteed not to fail. */ int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset) { - int ret; - sigset_t val; - sigset_t *temp = NULL; - CPUState *cpu = thread_cpu; - TaskState *ts = (TaskState *)cpu->opaque; - bool segv_was_blocked = ts->sigsegv_blocked; + TaskState *ts = (TaskState *)thread_cpu->opaque; + + if (oldset) { + *oldset = ts->signal_mask; + } if (set) { - bool has_sigsegv = sigismember(set, SIGSEGV); - val = *set; - temp = &val; + int i; - sigdelset(temp, SIGSEGV); + if (block_signals()) { + return -TARGET_ERESTARTSYS; + } switch (how) { case SIG_BLOCK: - if (has_sigsegv) { - ts->sigsegv_blocked = true; - } + sigorset(&ts->signal_mask, &ts->signal_mask, set); break; case SIG_UNBLOCK: - if (has_sigsegv) { - ts->sigsegv_blocked = false; + for (i = 1; i <= NSIG; ++i) { + if (sigismember(set, i)) { + sigdelset(&ts->signal_mask, i); + } } break; case SIG_SETMASK: - ts->sigsegv_blocked = has_sigsegv; + ts->signal_mask = *set; break; default: g_assert_not_reached(); } + + /* Silently ignore attempts to change blocking status of KILL or STOP */ + sigdelset(&ts->signal_mask, SIGKILL); + sigdelset(&ts->signal_mask, SIGSTOP); } - - ret = sigprocmask(how, temp, oldset); - - if (oldset && segv_was_blocked) { - sigaddset(oldset, SIGSEGV); - } - - return ret; + return 0; } +#if !defined(TARGET_OPENRISC) && !defined(TARGET_UNICORE32) && \ + !defined(TARGET_X86_64) +/* Just set the guest's signal mask to the specified value; the + * caller is assumed to have called block_signals() already. + */ +static void set_sigmask(const sigset_t *set) +{ + TaskState *ts = (TaskState *)thread_cpu->opaque; + + ts->signal_mask = *set; +} +#endif + /* siginfo conversion */ static inline void host_to_target_siginfo_noswap(target_siginfo_t *tinfo, const siginfo_t *info) { int sig = host_to_target_signal(info->si_signo); + int si_code = info->si_code; + int si_type; tinfo->si_signo = sig; tinfo->si_errno = 0; tinfo->si_code = info->si_code; - if (sig == TARGET_SIGILL || sig == TARGET_SIGFPE || sig == TARGET_SIGSEGV - || sig == TARGET_SIGBUS || sig == TARGET_SIGTRAP) { - /* Should never come here, but who knows. The information for - the target is irrelevant. */ - tinfo->_sifields._sigfault._addr = 0; - } else if (sig == TARGET_SIGIO) { - tinfo->_sifields._sigpoll._band = info->si_band; - tinfo->_sifields._sigpoll._fd = info->si_fd; - } else if (sig == TARGET_SIGCHLD) { - tinfo->_sifields._sigchld._pid = info->si_pid; - tinfo->_sifields._sigchld._uid = info->si_uid; - tinfo->_sifields._sigchld._status + /* This is awkward, because we have to use a combination of + * the si_code and si_signo to figure out which of the union's + * members are valid. (Within the host kernel it is always possible + * to tell, but the kernel carefully avoids giving userspace the + * high 16 bits of si_code, so we don't have the information to + * do this the easy way...) We therefore make our best guess, + * bearing in mind that a guest can spoof most of the si_codes + * via rt_sigqueueinfo() if it likes. + * + * Once we have made our guess, we record it in the top 16 bits of + * the si_code, so that tswap_siginfo() later can use it. + * tswap_siginfo() will strip these top bits out before writing + * si_code to the guest (sign-extending the lower bits). + */ + + switch (si_code) { + case SI_USER: + case SI_TKILL: + case SI_KERNEL: + /* Sent via kill(), tkill() or tgkill(), or direct from the kernel. + * These are the only unspoofable si_code values. + */ + tinfo->_sifields._kill._pid = info->si_pid; + tinfo->_sifields._kill._uid = info->si_uid; + si_type = QEMU_SI_KILL; + break; + default: + /* Everything else is spoofable. Make best guess based on signal */ + switch (sig) { + case TARGET_SIGCHLD: + tinfo->_sifields._sigchld._pid = info->si_pid; + tinfo->_sifields._sigchld._uid = info->si_uid; + tinfo->_sifields._sigchld._status = host_to_target_waitstatus(info->si_status); - tinfo->_sifields._sigchld._utime = info->si_utime; - tinfo->_sifields._sigchld._stime = info->si_stime; - } else if (sig >= TARGET_SIGRTMIN) { - tinfo->_sifields._rt._pid = info->si_pid; - tinfo->_sifields._rt._uid = info->si_uid; - /* XXX: potential problem if 64 bit */ - tinfo->_sifields._rt._sigval.sival_ptr + tinfo->_sifields._sigchld._utime = info->si_utime; + tinfo->_sifields._sigchld._stime = info->si_stime; + si_type = QEMU_SI_CHLD; + break; + case TARGET_SIGIO: + tinfo->_sifields._sigpoll._band = info->si_band; + tinfo->_sifields._sigpoll._fd = info->si_fd; + si_type = QEMU_SI_POLL; + break; + default: + /* Assume a sigqueue()/mq_notify()/rt_sigqueueinfo() source. */ + tinfo->_sifields._rt._pid = info->si_pid; + tinfo->_sifields._rt._uid = info->si_uid; + /* XXX: potential problem if 64 bit */ + tinfo->_sifields._rt._sigval.sival_ptr = (abi_ulong)(unsigned long)info->si_value.sival_ptr; + si_type = QEMU_SI_RT; + break; + } + break; } + + tinfo->si_code = deposit32(si_code, 16, 16, si_type); } static void tswap_siginfo(target_siginfo_t *tinfo, const target_siginfo_t *info) { - int sig = info->si_signo; - tinfo->si_signo = tswap32(sig); - tinfo->si_errno = tswap32(info->si_errno); - tinfo->si_code = tswap32(info->si_code); + int si_type = extract32(info->si_code, 16, 16); + int si_code = sextract32(info->si_code, 0, 16); - if (sig == TARGET_SIGILL || sig == TARGET_SIGFPE || sig == TARGET_SIGSEGV - || sig == TARGET_SIGBUS || sig == TARGET_SIGTRAP) { - tinfo->_sifields._sigfault._addr - = tswapal(info->_sifields._sigfault._addr); - } else if (sig == TARGET_SIGIO) { - tinfo->_sifields._sigpoll._band - = tswap32(info->_sifields._sigpoll._band); - tinfo->_sifields._sigpoll._fd = tswap32(info->_sifields._sigpoll._fd); - } else if (sig == TARGET_SIGCHLD) { - tinfo->_sifields._sigchld._pid - = tswap32(info->_sifields._sigchld._pid); - tinfo->_sifields._sigchld._uid - = tswap32(info->_sifields._sigchld._uid); - tinfo->_sifields._sigchld._status - = tswap32(info->_sifields._sigchld._status); - tinfo->_sifields._sigchld._utime - = tswapal(info->_sifields._sigchld._utime); - tinfo->_sifields._sigchld._stime - = tswapal(info->_sifields._sigchld._stime); - } else if (sig >= TARGET_SIGRTMIN) { - tinfo->_sifields._rt._pid = tswap32(info->_sifields._rt._pid); - tinfo->_sifields._rt._uid = tswap32(info->_sifields._rt._uid); - tinfo->_sifields._rt._sigval.sival_ptr - = tswapal(info->_sifields._rt._sigval.sival_ptr); + __put_user(info->si_signo, &tinfo->si_signo); + __put_user(info->si_errno, &tinfo->si_errno); + __put_user(si_code, &tinfo->si_code); + + /* We can use our internal marker of which fields in the structure + * are valid, rather than duplicating the guesswork of + * host_to_target_siginfo_noswap() here. + */ + switch (si_type) { + case QEMU_SI_KILL: + __put_user(info->_sifields._kill._pid, &tinfo->_sifields._kill._pid); + __put_user(info->_sifields._kill._uid, &tinfo->_sifields._kill._uid); + break; + case QEMU_SI_TIMER: + __put_user(info->_sifields._timer._timer1, + &tinfo->_sifields._timer._timer1); + __put_user(info->_sifields._timer._timer2, + &tinfo->_sifields._timer._timer2); + break; + case QEMU_SI_POLL: + __put_user(info->_sifields._sigpoll._band, + &tinfo->_sifields._sigpoll._band); + __put_user(info->_sifields._sigpoll._fd, + &tinfo->_sifields._sigpoll._fd); + break; + case QEMU_SI_FAULT: + __put_user(info->_sifields._sigfault._addr, + &tinfo->_sifields._sigfault._addr); + break; + case QEMU_SI_CHLD: + __put_user(info->_sifields._sigchld._pid, + &tinfo->_sifields._sigchld._pid); + __put_user(info->_sifields._sigchld._uid, + &tinfo->_sifields._sigchld._uid); + __put_user(info->_sifields._sigchld._status, + &tinfo->_sifields._sigchld._status); + __put_user(info->_sifields._sigchld._utime, + &tinfo->_sifields._sigchld._utime); + __put_user(info->_sifields._sigchld._stime, + &tinfo->_sifields._sigchld._stime); + break; + case QEMU_SI_RT: + __put_user(info->_sifields._rt._pid, &tinfo->_sifields._rt._pid); + __put_user(info->_sifields._rt._uid, &tinfo->_sifields._rt._uid); + __put_user(info->_sifields._rt._sigval.sival_ptr, + &tinfo->_sifields._rt._sigval.sival_ptr); + break; + default: + g_assert_not_reached(); } } - void host_to_target_siginfo(target_siginfo_t *tinfo, const siginfo_t *info) { host_to_target_siginfo_noswap(tinfo, info); @@ -319,13 +408,18 @@ void host_to_target_siginfo(target_siginfo_t *tinfo, const siginfo_t *info) /* XXX: find a solution for 64 bit (additional malloced data is needed) */ void target_to_host_siginfo(siginfo_t *info, const target_siginfo_t *tinfo) { - info->si_signo = tswap32(tinfo->si_signo); - info->si_errno = tswap32(tinfo->si_errno); - info->si_code = tswap32(tinfo->si_code); - info->si_pid = tswap32(tinfo->_sifields._rt._pid); - info->si_uid = tswap32(tinfo->_sifields._rt._uid); - info->si_value.sival_ptr = - (void *)(long)tswapal(tinfo->_sifields._rt._sigval.sival_ptr); + /* This conversion is used only for the rt_sigqueueinfo syscall, + * and so we know that the _rt fields are the valid ones. + */ + abi_ulong sival_ptr; + + __get_user(info->si_signo, &tinfo->si_signo); + __get_user(info->si_errno, &tinfo->si_errno); + __get_user(info->si_code, &tinfo->si_code); + __get_user(info->si_pid, &tinfo->_sifields._rt._pid); + __get_user(info->si_uid, &tinfo->_sifields._rt._uid); + __get_user(sival_ptr, &tinfo->_sifields._rt._sigval.sival_ptr); + info->si_value.sival_ptr = (void *)(long)sival_ptr; } static int fatal_signal (int sig) @@ -367,6 +461,7 @@ static int core_dump_signal(int sig) void signal_init(void) { + TaskState *ts = (TaskState *)thread_cpu->opaque; struct sigaction act; struct sigaction oact; int i, j; @@ -382,6 +477,9 @@ void signal_init(void) target_to_host_signal_table[j] = i; } + /* Set the signal mask from the host mask. */ + sigprocmask(0, 0, &ts->signal_mask); + /* set all host signal handlers. ALL signals are blocked during the handlers to serialize them. */ memset(sigact_table, 0, sizeof(sigact_table)); @@ -408,27 +506,6 @@ void signal_init(void) } } -/* signal queue handling */ - -static inline struct sigqueue *alloc_sigqueue(CPUArchState *env) -{ - CPUState *cpu = ENV_GET_CPU(env); - TaskState *ts = cpu->opaque; - struct sigqueue *q = ts->first_free; - if (!q) - return NULL; - ts->first_free = q->next; - return q; -} - -static inline void free_sigqueue(CPUArchState *env, struct sigqueue *q) -{ - CPUState *cpu = ENV_GET_CPU(env); - TaskState *ts = cpu->opaque; - - q->next = ts->first_free; - ts->first_free = q; -} /* abort execution with signal */ static void QEMU_NORETURN force_sig(int target_sig) @@ -490,75 +567,21 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info) { CPUState *cpu = ENV_GET_CPU(env); TaskState *ts = cpu->opaque; - struct emulated_sigtable *k; - struct sigqueue *q, **pq; - abi_ulong handler; - int queue; trace_user_queue_signal(env, sig); - k = &ts->sigtab[sig - 1]; - queue = gdb_queuesig (); - handler = sigact_table[sig - 1]._sa_handler; - if (ts->sigsegv_blocked && sig == TARGET_SIGSEGV) { - /* Guest has blocked SIGSEGV but we got one anyway. Assume this - * is a forced SIGSEGV (ie one the kernel handles via force_sig_info - * because it got a real MMU fault). A blocked SIGSEGV in that - * situation is treated as if using the default handler. This is - * not correct if some other process has randomly sent us a SIGSEGV - * via kill(), but that is not easy to distinguish at this point, - * so we assume it doesn't happen. - */ - handler = TARGET_SIG_DFL; - } + /* Currently all callers define siginfo structures which + * use the _sifields._sigfault union member, so we can + * set the type here. If that changes we should push this + * out so the si_type is passed in by callers. + */ + info->si_code = deposit32(info->si_code, 16, 16, QEMU_SI_FAULT); - if (!queue && handler == TARGET_SIG_DFL) { - if (sig == TARGET_SIGTSTP || sig == TARGET_SIGTTIN || sig == TARGET_SIGTTOU) { - kill(getpid(),SIGSTOP); - return 0; - } else - /* default handler : ignore some signal. The other are fatal */ - if (sig != TARGET_SIGCHLD && - sig != TARGET_SIGURG && - sig != TARGET_SIGWINCH && - sig != TARGET_SIGCONT) { - force_sig(sig); - } else { - return 0; /* indicate ignored */ - } - } else if (!queue && handler == TARGET_SIG_IGN) { - /* ignore signal */ - return 0; - } else if (!queue && handler == TARGET_SIG_ERR) { - force_sig(sig); - } else { - pq = &k->first; - if (sig < TARGET_SIGRTMIN) { - /* if non real time signal, we queue exactly one signal */ - if (!k->pending) - q = &k->info; - else - return 0; - } else { - if (!k->pending) { - /* first signal */ - q = &k->info; - } else { - q = alloc_sigqueue(env); - if (!q) - return -EAGAIN; - while (*pq != NULL) - pq = &(*pq)->next; - } - } - *pq = q; - q->info = *info; - q->next = NULL; - k->pending = 1; - /* signal that a new signal is pending */ - ts->signal_pending = 1; - return 1; /* indicates that the signal was queued */ - } + ts->sync_signal.info = *info; + ts->sync_signal.pending = sig; + /* signal that a new signal is pending */ + atomic_set(&ts->signal_pending, 1); + return 1; /* indicates that the signal was queued */ } #ifndef HAVE_SAFE_SYSCALL @@ -572,8 +595,13 @@ static void host_signal_handler(int host_signum, siginfo_t *info, void *puc) { CPUArchState *env = thread_cpu->env_ptr; + CPUState *cpu = ENV_GET_CPU(env); + TaskState *ts = cpu->opaque; + int sig; target_siginfo_t tinfo; + ucontext_t *uc = puc; + struct emulated_sigtable *k; /* the CPU emulator uses some host signals to detect exceptions, we forward to it some signals */ @@ -592,10 +620,23 @@ static void host_signal_handler(int host_signum, siginfo_t *info, rewind_if_in_safe_syscall(puc); host_to_target_siginfo_noswap(&tinfo, info); - if (queue_signal(env, sig, &tinfo) == 1) { - /* interrupt the virtual CPU as soon as possible */ - cpu_exit(thread_cpu); - } + k = &ts->sigtab[sig - 1]; + k->info = tinfo; + k->pending = sig; + ts->signal_pending = 1; + + /* Block host signals until target signal handler entered. We + * can't block SIGSEGV or SIGBUS while we're executing guest + * code in case the guest code provokes one in the window between + * now and it getting out to the main loop. Signals will be + * unblocked again in process_pending_signals(). + */ + sigfillset(&uc->uc_sigmask); + sigdelset(&uc->uc_sigmask, SIGSEGV); + sigdelset(&uc->uc_sigmask, SIGBUS); + + /* interrupt the virtual CPU as soon as possible */ + cpu_exit(thread_cpu); } /* do_sigaltstack() returns target values and errnos. */ @@ -671,7 +712,7 @@ out: return ret; } -/* do_sigaction() return host values and errnos */ +/* do_sigaction() return target values and host errnos */ int do_sigaction(int sig, const struct target_sigaction *act, struct target_sigaction *oact) { @@ -680,8 +721,14 @@ int do_sigaction(int sig, const struct target_sigaction *act, int host_sig; int ret = 0; - if (sig < 1 || sig > TARGET_NSIG || sig == TARGET_SIGKILL || sig == TARGET_SIGSTOP) - return -EINVAL; + if (sig < 1 || sig > TARGET_NSIG || sig == TARGET_SIGKILL || sig == TARGET_SIGSTOP) { + return -TARGET_EINVAL; + } + + if (block_signals()) { + return -TARGET_ERESTARTSYS; + } + k = &sigact_table[sig - 1]; if (oact) { __put_user(k->_sa_handler, &oact->_sa_handler); @@ -1093,7 +1140,7 @@ long do_sigreturn(CPUX86State *env) } target_to_host_sigset_internal(&set, &target_set); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); /* restore registers */ if (restore_sigcontext(env, &frame->sc)) @@ -1118,7 +1165,7 @@ long do_rt_sigreturn(CPUX86State *env) if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) goto badframe; target_to_host_sigset(&set, &frame->uc.tuc_sigmask); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); if (restore_sigcontext(env, &frame->uc.tuc_mcontext)) { goto badframe; @@ -1258,7 +1305,7 @@ static int target_restore_sigframe(CPUARMState *env, uint64_t pstate; target_to_host_sigset(&set, &sf->uc.tuc_sigmask); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); for (i = 0; i < 31; i++) { __get_user(env->xregs[i], &sf->uc.tuc_mcontext.regs[i]); @@ -1900,7 +1947,7 @@ static long do_sigreturn_v1(CPUARMState *env) } target_to_host_sigset_internal(&host_set, &set); - do_sigprocmask(SIG_SETMASK, &host_set, NULL); + set_sigmask(&host_set); if (restore_sigcontext(env, &frame->sc)) { goto badframe; @@ -1981,7 +2028,7 @@ static int do_sigframe_return_v2(CPUARMState *env, target_ulong frame_addr, abi_ulong *regspace; target_to_host_sigset(&host_set, &uc->tuc_sigmask); - do_sigprocmask(SIG_SETMASK, &host_set, NULL); + set_sigmask(&host_set); if (restore_sigcontext(env, &uc->tuc_mcontext)) return 1; @@ -2077,7 +2124,7 @@ static long do_rt_sigreturn_v1(CPUARMState *env) } target_to_host_sigset(&host_set, &frame->uc.tuc_sigmask); - do_sigprocmask(SIG_SETMASK, &host_set, NULL); + set_sigmask(&host_set); if (restore_sigcontext(env, &frame->uc.tuc_mcontext)) { goto badframe; @@ -2453,7 +2500,7 @@ long do_sigreturn(CPUSPARCState *env) } target_to_host_sigset_internal(&host_set, &set); - do_sigprocmask(SIG_SETMASK, &host_set, NULL); + set_sigmask(&host_set); if (err) { goto segv_and_exit; @@ -2576,7 +2623,7 @@ void sparc64_set_context(CPUSPARCState *env) } } target_to_host_sigset_internal(&set, &target_set); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); } env->pc = pc; env->npc = npc; @@ -2664,9 +2711,13 @@ void sparc64_get_context(CPUSPARCState *env) env->pc = env->npc; env->npc += 4; - err = 0; - - do_sigprocmask(0, NULL, &set); + /* If we're only reading the signal mask then do_sigprocmask() + * is guaranteed not to fail, which is important because we don't + * have any way to signal a failure or restart this operation since + * this is not a normal syscall. + */ + err = do_sigprocmask(0, NULL, &set); + assert(err == 0); host_to_target_sigset_internal(&target_set, &set); if (TARGET_NSIG_WORDS == 1) { __put_user(target_set.sig[0], @@ -2993,7 +3044,7 @@ long do_sigreturn(CPUMIPSState *regs) } target_to_host_sigset_internal(&blocked, &target_set); - do_sigprocmask(SIG_SETMASK, &blocked, NULL); + set_sigmask(&blocked); restore_sigcontext(regs, &frame->sf_sc); @@ -3097,7 +3148,7 @@ long do_rt_sigreturn(CPUMIPSState *env) } target_to_host_sigset(&blocked, &frame->rs_uc.tuc_sigmask); - do_sigprocmask(SIG_SETMASK, &blocked, NULL); + set_sigmask(&blocked); restore_sigcontext(env, &frame->rs_uc.tuc_mcontext); @@ -3371,7 +3422,7 @@ long do_sigreturn(CPUSH4State *regs) goto badframe; target_to_host_sigset_internal(&blocked, &target_set); - do_sigprocmask(SIG_SETMASK, &blocked, NULL); + set_sigmask(&blocked); restore_sigcontext(regs, &frame->sc); @@ -3397,7 +3448,7 @@ long do_rt_sigreturn(CPUSH4State *regs) } target_to_host_sigset(&blocked, &frame->uc.tuc_sigmask); - do_sigprocmask(SIG_SETMASK, &blocked, NULL); + set_sigmask(&blocked); restore_sigcontext(regs, &frame->uc.tuc_mcontext); @@ -3621,7 +3672,7 @@ long do_sigreturn(CPUMBState *env) __get_user(target_set.sig[i], &frame->extramask[i - 1]); } target_to_host_sigset_internal(&set, &target_set); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); restore_sigcontext(&frame->uc.tuc_mcontext, env); /* We got here through a sigreturn syscall, our path back is via an @@ -3792,7 +3843,7 @@ long do_sigreturn(CPUCRISState *env) __get_user(target_set.sig[i], &frame->extramask[i - 1]); } target_to_host_sigset_internal(&set, &target_set); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); restore_sigcontext(&frame->sc, env); unlock_user_struct(frame, frame_addr, 0); @@ -4284,7 +4335,7 @@ long do_sigreturn(CPUS390XState *env) __get_user(target_set.sig[0], &frame->sc.oldmask[0]); target_to_host_sigset_internal(&set, &target_set); - do_sigprocmask(SIG_SETMASK, &set, NULL); /* ~_BLOCKABLE? */ + set_sigmask(&set); /* ~_BLOCKABLE? */ if (restore_sigregs(env, &frame->sregs)) { goto badframe; @@ -4310,7 +4361,7 @@ long do_rt_sigreturn(CPUS390XState *env) } target_to_host_sigset(&set, &frame->uc.tuc_sigmask); - do_sigprocmask(SIG_SETMASK, &set, NULL); /* ~_BLOCKABLE? */ + set_sigmask(&set); /* ~_BLOCKABLE? */ if (restore_sigregs(env, &frame->uc.tuc_mcontext)) { goto badframe; @@ -4872,7 +4923,7 @@ long do_sigreturn(CPUPPCState *env) __get_user(set.sig[1], &sc->_unused[3]); #endif target_to_host_sigset_internal(&blocked, &set); - do_sigprocmask(SIG_SETMASK, &blocked, NULL); + set_sigmask(&blocked); __get_user(sr_addr, &sc->regs); if (!lock_user_struct(VERIFY_READ, sr, sr_addr, 1)) @@ -4913,7 +4964,7 @@ static int do_setcontext(struct target_ucontext *ucp, CPUPPCState *env, int sig) return 1; target_to_host_sigset_internal(&blocked, &set); - do_sigprocmask(SIG_SETMASK, &blocked, NULL); + set_sigmask(&blocked); restore_user_regs(env, mcp, sig); unlock_user_struct(mcp, mcp_addr, 1); @@ -5261,7 +5312,7 @@ long do_sigreturn(CPUM68KState *env) } target_to_host_sigset_internal(&set, &target_set); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); /* restore registers */ @@ -5287,7 +5338,7 @@ long do_rt_sigreturn(CPUM68KState *env) goto badframe; target_to_host_sigset_internal(&set, &target_set); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); /* restore registers */ @@ -5530,7 +5581,7 @@ long do_sigreturn(CPUAlphaState *env) __get_user(target_set.sig[0], &sc->sc_mask); target_to_host_sigset_internal(&set, &target_set); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); restore_sigcontext(env, sc); unlock_user_struct(sc, sc_addr, 0); @@ -5551,7 +5602,7 @@ long do_rt_sigreturn(CPUAlphaState *env) goto badframe; } target_to_host_sigset(&set, &frame->uc.tuc_sigmask); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); restore_sigcontext(env, &frame->uc.tuc_mcontext); if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe, @@ -5718,7 +5769,7 @@ long do_rt_sigreturn(CPUTLGState *env) goto badframe; } target_to_host_sigset(&set, &frame->uc.tuc_sigmask); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); restore_sigcontext(env, &frame->uc.tuc_mcontext); if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe, @@ -5765,39 +5816,19 @@ long do_rt_sigreturn(CPUArchState *env) #endif -void process_pending_signals(CPUArchState *cpu_env) +static void handle_pending_signal(CPUArchState *cpu_env, int sig) { CPUState *cpu = ENV_GET_CPU(cpu_env); - int sig; abi_ulong handler; - sigset_t set, old_set; + sigset_t set; target_sigset_t target_old_set; - struct emulated_sigtable *k; struct target_sigaction *sa; - struct sigqueue *q; TaskState *ts = cpu->opaque; + struct emulated_sigtable *k = &ts->sigtab[sig - 1]; - if (!ts->signal_pending) - return; - - /* FIXME: This is not threadsafe. */ - k = ts->sigtab; - for(sig = 1; sig <= TARGET_NSIG; sig++) { - if (k->pending) - goto handle_signal; - k++; - } - /* if no signal is pending, just return */ - ts->signal_pending = 0; - return; - - handle_signal: trace_user_handle_signal(cpu_env, sig); /* dequeue signal */ - q = k->first; - k->first = q->next; - if (!k->first) - k->pending = 0; + k->pending = 0; sig = gdb_handlesig(cpu, sig); if (!sig) { @@ -5808,14 +5839,6 @@ void process_pending_signals(CPUArchState *cpu_env) handler = sa->_sa_handler; } - if (ts->sigsegv_blocked && sig == TARGET_SIGSEGV) { - /* Guest has blocked SIGSEGV but we got one anyway. Assume this - * is a forced SIGSEGV (ie one the kernel handles via force_sig_info - * because it got a real MMU fault), and treat as if default handler. - */ - handler = TARGET_SIG_DFL; - } - if (handler == TARGET_SIG_DFL) { /* default handler : ignore some signal. The other are job control or fatal */ if (sig == TARGET_SIGTSTP || sig == TARGET_SIGTTIN || sig == TARGET_SIGTTOU) { @@ -5832,17 +5855,23 @@ void process_pending_signals(CPUArchState *cpu_env) force_sig(sig); } else { /* compute the blocked signals during the handler execution */ + sigset_t *blocked_set; + target_to_host_sigset(&set, &sa->sa_mask); /* SA_NODEFER indicates that the current signal should not be blocked during the handler */ if (!(sa->sa_flags & TARGET_SA_NODEFER)) sigaddset(&set, target_to_host_signal(sig)); - /* block signals in the handler using Linux */ - do_sigprocmask(SIG_BLOCK, &set, &old_set); /* save the previous blocked signal state to restore it at the end of the signal execution (see do_sigreturn) */ - host_to_target_sigset_internal(&target_old_set, &old_set); + host_to_target_sigset_internal(&target_old_set, &ts->signal_mask); + + /* block signals in the handler */ + blocked_set = ts->in_sigsuspend ? + &ts->sigsuspend_mask : &ts->signal_mask; + sigorset(&ts->signal_mask, blocked_set, &set); + ts->in_sigsuspend = 0; /* if the CPU is in VM86 mode, we restore the 32 bit values */ #if defined(TARGET_I386) && !defined(TARGET_X86_64) @@ -5856,16 +5885,74 @@ void process_pending_signals(CPUArchState *cpu_env) #if defined(TARGET_ABI_MIPSN32) || defined(TARGET_ABI_MIPSN64) \ || defined(TARGET_OPENRISC) || defined(TARGET_TILEGX) /* These targets do not have traditional signals. */ - setup_rt_frame(sig, sa, &q->info, &target_old_set, cpu_env); + setup_rt_frame(sig, sa, &k->info, &target_old_set, cpu_env); #else if (sa->sa_flags & TARGET_SA_SIGINFO) - setup_rt_frame(sig, sa, &q->info, &target_old_set, cpu_env); + setup_rt_frame(sig, sa, &k->info, &target_old_set, cpu_env); else setup_frame(sig, sa, &target_old_set, cpu_env); #endif - if (sa->sa_flags & TARGET_SA_RESETHAND) + if (sa->sa_flags & TARGET_SA_RESETHAND) { sa->_sa_handler = TARGET_SIG_DFL; + } } - if (q != &k->info) - free_sigqueue(cpu_env, q); +} + +void process_pending_signals(CPUArchState *cpu_env) +{ + CPUState *cpu = ENV_GET_CPU(cpu_env); + int sig; + TaskState *ts = cpu->opaque; + sigset_t set; + sigset_t *blocked_set; + + while (atomic_read(&ts->signal_pending)) { + /* FIXME: This is not threadsafe. */ + sigfillset(&set); + sigprocmask(SIG_SETMASK, &set, 0); + + sig = ts->sync_signal.pending; + if (sig) { + /* Synchronous signals are forced, + * see force_sig_info() and callers in Linux + * Note that not all of our queue_signal() calls in QEMU correspond + * to force_sig_info() calls in Linux (some are send_sig_info()). + * However it seems like a kernel bug to me to allow the process + * to block a synchronous signal since it could then just end up + * looping round and round indefinitely. + */ + if (sigismember(&ts->signal_mask, target_to_host_signal_table[sig]) + || sigact_table[sig - 1]._sa_handler == TARGET_SIG_IGN) { + sigdelset(&ts->signal_mask, target_to_host_signal_table[sig]); + sigact_table[sig - 1]._sa_handler = TARGET_SIG_DFL; + } + + handle_pending_signal(cpu_env, sig); + } + + for (sig = 1; sig <= TARGET_NSIG; sig++) { + blocked_set = ts->in_sigsuspend ? + &ts->sigsuspend_mask : &ts->signal_mask; + + if (ts->sigtab[sig - 1].pending && + (!sigismember(blocked_set, + target_to_host_signal_table[sig]))) { + handle_pending_signal(cpu_env, sig); + /* Restart scan from the beginning */ + sig = 1; + } + } + + /* if no signal is pending, unblock signals and recheck (the act + * of unblocking might cause us to take another host signal which + * will set signal_pending again). + */ + atomic_set(&ts->signal_pending, 0); + ts->in_sigsuspend = 0; + set = ts->signal_mask; + sigdelset(&set, SIGSEGV); + sigdelset(&set, SIGBUS); + sigprocmask(SIG_SETMASK, &set, 0); + } + ts->in_sigsuspend = 0; } diff --git a/linux-user/strace.c b/linux-user/strace.c index 0810c85fbd..c5980a128c 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -281,7 +281,7 @@ print_ipc(const struct syscallname *name, static void print_syscall_ret_addr(const struct syscallname *name, abi_long ret) { - char *errstr = NULL; + const char *errstr = NULL; if (ret < 0) { errstr = target_strerror(-ret); @@ -1594,7 +1594,7 @@ void print_syscall_ret(int num, abi_long ret) { int i; - char *errstr = NULL; + const char *errstr = NULL; for(i=0;i #include #include +#include +#ifdef CONFIG_RTNETLINK +#include +#endif +#include #include "linux_loop.h" #include "uname.h" @@ -119,6 +124,10 @@ int __clone2(int (*fn)(void *), void *child_stack_base, #define VFAT_IOCTL_READDIR_BOTH _IOR('r', 1, struct linux_dirent [2]) #define VFAT_IOCTL_READDIR_SHORT _IOR('r', 2, struct linux_dirent [2]) +/* This is the size of the host kernel's sigset_t, needed where we make + * direct system calls that take a sigset_t pointer and a size. + */ +#define SIGSET_T_SIZE (_NSIG / 8) #undef _syscall0 #undef _syscall1 @@ -182,8 +191,6 @@ static type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5, \ #define __NR_sys_getpriority __NR_getpriority #define __NR_sys_rt_sigqueueinfo __NR_rt_sigqueueinfo #define __NR_sys_syslog __NR_syslog -#define __NR_sys_tgkill __NR_tgkill -#define __NR_sys_tkill __NR_tkill #define __NR_sys_futex __NR_futex #define __NR_sys_inotify_init __NR_inotify_init #define __NR_sys_inotify_add_watch __NR_inotify_add_watch @@ -221,12 +228,6 @@ _syscall5(int, _llseek, uint, fd, ulong, hi, ulong, lo, #endif _syscall3(int,sys_rt_sigqueueinfo,int,pid,int,sig,siginfo_t *,uinfo) _syscall3(int,sys_syslog,int,type,char*,bufp,int,len) -#if defined(TARGET_NR_tgkill) && defined(__NR_tgkill) -_syscall3(int,sys_tgkill,int,tgid,int,pid,int,sig) -#endif -#if defined(TARGET_NR_tkill) && defined(__NR_tkill) -_syscall2(int,sys_tkill,int,tid,int,sig) -#endif #ifdef __NR_exit_group _syscall1(int,exit_group,int,error_code) #endif @@ -304,6 +305,14 @@ static TargetFdTrans **target_fd_trans; static unsigned int target_fd_max; +static TargetFdDataFunc fd_trans_target_to_host_data(int fd) +{ + if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) { + return target_fd_trans[fd]->target_to_host_data; + } + return NULL; +} + static TargetFdDataFunc fd_trans_host_to_target_data(int fd) { if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) { @@ -420,16 +429,6 @@ static int sys_inotify_init1(int flags) #undef TARGET_NR_inotify_rm_watch #endif /* CONFIG_INOTIFY */ -#if defined(TARGET_NR_ppoll) -#ifndef __NR_ppoll -# define __NR_ppoll -1 -#endif -#define __NR_sys_ppoll __NR_ppoll -_syscall5(int, sys_ppoll, struct pollfd *, fds, nfds_t, nfds, - struct timespec *, timeout, const sigset_t *, sigmask, - size_t, sigsetsize) -#endif - #if defined(TARGET_NR_prlimit64) #ifndef __NR_prlimit64 # define __NR_prlimit64 -1 @@ -631,8 +630,15 @@ static inline int is_error(abi_long ret) return (abi_ulong)ret >= (abi_ulong)(-4096); } -char *target_strerror(int err) +const char *target_strerror(int err) { + if (err == TARGET_ERESTARTSYS) { + return "To be restarted"; + } + if (err == TARGET_QEMU_ESIGRETURN) { + return "Successful exit from sigreturn"; + } + if ((err >= ERRNO_TABLE_SIZE) || (err < 0)) { return NULL; } @@ -697,8 +703,87 @@ safe_syscall5(int, waitid, idtype_t, idtype, id_t, id, siginfo_t *, infop, \ safe_syscall3(int, execve, const char *, filename, char **, argv, char **, envp) safe_syscall6(int, pselect6, int, nfds, fd_set *, readfds, fd_set *, writefds, \ fd_set *, exceptfds, struct timespec *, timeout, void *, sig) +safe_syscall5(int, ppoll, struct pollfd *, ufds, unsigned int, nfds, + struct timespec *, tsp, const sigset_t *, sigmask, + size_t, sigsetsize) +safe_syscall6(int, epoll_pwait, int, epfd, struct epoll_event *, events, + int, maxevents, int, timeout, const sigset_t *, sigmask, + size_t, sigsetsize) safe_syscall6(int,futex,int *,uaddr,int,op,int,val, \ const struct timespec *,timeout,int *,uaddr2,int,val3) +safe_syscall2(int, rt_sigsuspend, sigset_t *, newset, size_t, sigsetsize) +safe_syscall2(int, kill, pid_t, pid, int, sig) +safe_syscall2(int, tkill, int, tid, int, sig) +safe_syscall3(int, tgkill, int, tgid, int, pid, int, sig) +safe_syscall3(ssize_t, readv, int, fd, const struct iovec *, iov, int, iovcnt) +safe_syscall3(ssize_t, writev, int, fd, const struct iovec *, iov, int, iovcnt) +safe_syscall3(int, connect, int, fd, const struct sockaddr *, addr, + socklen_t, addrlen) +safe_syscall6(ssize_t, sendto, int, fd, const void *, buf, size_t, len, + int, flags, const struct sockaddr *, addr, socklen_t, addrlen) +safe_syscall6(ssize_t, recvfrom, int, fd, void *, buf, size_t, len, + int, flags, struct sockaddr *, addr, socklen_t *, addrlen) +safe_syscall3(ssize_t, sendmsg, int, fd, const struct msghdr *, msg, int, flags) +safe_syscall3(ssize_t, recvmsg, int, fd, struct msghdr *, msg, int, flags) +safe_syscall2(int, flock, int, fd, int, operation) +safe_syscall4(int, rt_sigtimedwait, const sigset_t *, these, siginfo_t *, uinfo, + const struct timespec *, uts, size_t, sigsetsize) +safe_syscall4(int, accept4, int, fd, struct sockaddr *, addr, socklen_t *, len, + int, flags) +safe_syscall2(int, nanosleep, const struct timespec *, req, + struct timespec *, rem) +#ifdef TARGET_NR_clock_nanosleep +safe_syscall4(int, clock_nanosleep, const clockid_t, clock, int, flags, + const struct timespec *, req, struct timespec *, rem) +#endif +#ifdef __NR_msgsnd +safe_syscall4(int, msgsnd, int, msgid, const void *, msgp, size_t, sz, + int, flags) +safe_syscall5(int, msgrcv, int, msgid, void *, msgp, size_t, sz, + long, msgtype, int, flags) +safe_syscall4(int, semtimedop, int, semid, struct sembuf *, tsops, + unsigned, nsops, const struct timespec *, timeout) +#else +/* This host kernel architecture uses a single ipc syscall; fake up + * wrappers for the sub-operations to hide this implementation detail. + * Annoyingly we can't include linux/ipc.h to get the constant definitions + * for the call parameter because some structs in there conflict with the + * sys/ipc.h ones. So we just define them here, and rely on them being + * the same for all host architectures. + */ +#define Q_SEMTIMEDOP 4 +#define Q_MSGSND 11 +#define Q_MSGRCV 12 +#define Q_IPCCALL(VERSION, OP) ((VERSION) << 16 | (OP)) + +safe_syscall6(int, ipc, int, call, long, first, long, second, long, third, + void *, ptr, long, fifth) +static int safe_msgsnd(int msgid, const void *msgp, size_t sz, int flags) +{ + return safe_ipc(Q_IPCCALL(0, Q_MSGSND), msgid, sz, flags, (void *)msgp, 0); +} +static int safe_msgrcv(int msgid, void *msgp, size_t sz, long type, int flags) +{ + return safe_ipc(Q_IPCCALL(1, Q_MSGRCV), msgid, sz, flags, msgp, type); +} +static int safe_semtimedop(int semid, struct sembuf *tsops, unsigned nsops, + const struct timespec *timeout) +{ + return safe_ipc(Q_IPCCALL(0, Q_SEMTIMEDOP), semid, nsops, 0, tsops, + (long)timeout); +} +#endif +#if defined(TARGET_NR_mq_open) && defined(__NR_mq_open) +safe_syscall5(int, mq_timedsend, int, mqdes, const char *, msg_ptr, + size_t, len, unsigned, prio, const struct timespec *, timeout) +safe_syscall5(int, mq_timedreceive, int, mqdes, char *, msg_ptr, + size_t, len, unsigned *, prio, const struct timespec *, timeout) +#endif +/* We do ioctl like this rather than via safe_syscall3 to preserve the + * "third argument might be integer or pointer or not present" behaviour of + * the libc function. + */ +#define safe_ioctl(...) safe_syscall(__NR_ioctl, __VA_ARGS__) static inline int host_to_target_sock_type(int host_type) { @@ -1261,7 +1346,13 @@ static inline abi_long target_to_host_sockaddr(int fd, struct sockaddr *addr, memcpy(addr, target_saddr, len); addr->sa_family = sa_family; - if (sa_family == AF_PACKET) { + if (sa_family == AF_NETLINK) { + struct sockaddr_nl *nladdr; + + nladdr = (struct sockaddr_nl *)addr; + nladdr->nl_pid = tswap32(nladdr->nl_pid); + nladdr->nl_groups = tswap32(nladdr->nl_groups); + } else if (sa_family == AF_PACKET) { struct target_sockaddr_ll *lladdr; lladdr = (struct target_sockaddr_ll *)addr; @@ -1284,6 +1375,11 @@ static inline abi_long host_to_target_sockaddr(abi_ulong target_addr, return -TARGET_EFAULT; memcpy(target_saddr, addr, len); target_saddr->sa_family = tswap16(addr->sa_family); + if (addr->sa_family == AF_NETLINK) { + struct sockaddr_nl *target_nl = (struct sockaddr_nl *)target_saddr; + target_nl->nl_pid = tswap32(target_nl->nl_pid); + target_nl->nl_groups = tswap32(target_nl->nl_groups); + } unlock_user(target_saddr, target_addr, len); return 0; @@ -1515,6 +1611,551 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, return 0; } +static void tswap_nlmsghdr(struct nlmsghdr *nlh) +{ + nlh->nlmsg_len = tswap32(nlh->nlmsg_len); + nlh->nlmsg_type = tswap16(nlh->nlmsg_type); + nlh->nlmsg_flags = tswap16(nlh->nlmsg_flags); + nlh->nlmsg_seq = tswap32(nlh->nlmsg_seq); + nlh->nlmsg_pid = tswap32(nlh->nlmsg_pid); +} + +static abi_long host_to_target_for_each_nlmsg(struct nlmsghdr *nlh, + size_t len, + abi_long (*host_to_target_nlmsg) + (struct nlmsghdr *)) +{ + uint32_t nlmsg_len; + abi_long ret; + + while (len > sizeof(struct nlmsghdr)) { + + nlmsg_len = nlh->nlmsg_len; + if (nlmsg_len < sizeof(struct nlmsghdr) || + nlmsg_len > len) { + break; + } + + switch (nlh->nlmsg_type) { + case NLMSG_DONE: + tswap_nlmsghdr(nlh); + return 0; + case NLMSG_NOOP: + break; + case NLMSG_ERROR: + { + struct nlmsgerr *e = NLMSG_DATA(nlh); + e->error = tswap32(e->error); + tswap_nlmsghdr(&e->msg); + tswap_nlmsghdr(nlh); + return 0; + } + default: + ret = host_to_target_nlmsg(nlh); + if (ret < 0) { + tswap_nlmsghdr(nlh); + return ret; + } + break; + } + tswap_nlmsghdr(nlh); + len -= NLMSG_ALIGN(nlmsg_len); + nlh = (struct nlmsghdr *)(((char*)nlh) + NLMSG_ALIGN(nlmsg_len)); + } + return 0; +} + +static abi_long target_to_host_for_each_nlmsg(struct nlmsghdr *nlh, + size_t len, + abi_long (*target_to_host_nlmsg) + (struct nlmsghdr *)) +{ + int ret; + + while (len > sizeof(struct nlmsghdr)) { + if (tswap32(nlh->nlmsg_len) < sizeof(struct nlmsghdr) || + tswap32(nlh->nlmsg_len) > len) { + break; + } + tswap_nlmsghdr(nlh); + switch (nlh->nlmsg_type) { + case NLMSG_DONE: + return 0; + case NLMSG_NOOP: + break; + case NLMSG_ERROR: + { + struct nlmsgerr *e = NLMSG_DATA(nlh); + e->error = tswap32(e->error); + tswap_nlmsghdr(&e->msg); + } + default: + ret = target_to_host_nlmsg(nlh); + if (ret < 0) { + return ret; + } + } + len -= NLMSG_ALIGN(nlh->nlmsg_len); + nlh = (struct nlmsghdr *)(((char *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); + } + return 0; +} + +#ifdef CONFIG_RTNETLINK +static abi_long host_to_target_for_each_rtattr(struct rtattr *rtattr, + size_t len, + abi_long (*host_to_target_rtattr) + (struct rtattr *)) +{ + unsigned short rta_len; + abi_long ret; + + while (len > sizeof(struct rtattr)) { + rta_len = rtattr->rta_len; + if (rta_len < sizeof(struct rtattr) || + rta_len > len) { + break; + } + ret = host_to_target_rtattr(rtattr); + rtattr->rta_len = tswap16(rtattr->rta_len); + rtattr->rta_type = tswap16(rtattr->rta_type); + if (ret < 0) { + return ret; + } + len -= RTA_ALIGN(rta_len); + rtattr = (struct rtattr *)(((char *)rtattr) + RTA_ALIGN(rta_len)); + } + return 0; +} + +static abi_long host_to_target_data_link_rtattr(struct rtattr *rtattr) +{ + uint32_t *u32; + struct rtnl_link_stats *st; + struct rtnl_link_stats64 *st64; + struct rtnl_link_ifmap *map; + + switch (rtattr->rta_type) { + /* binary stream */ + case IFLA_ADDRESS: + case IFLA_BROADCAST: + /* string */ + case IFLA_IFNAME: + case IFLA_QDISC: + break; + /* uin8_t */ + case IFLA_OPERSTATE: + case IFLA_LINKMODE: + case IFLA_CARRIER: + case IFLA_PROTO_DOWN: + break; + /* uint32_t */ + case IFLA_MTU: + case IFLA_LINK: + case IFLA_WEIGHT: + case IFLA_TXQLEN: + case IFLA_CARRIER_CHANGES: + case IFLA_NUM_RX_QUEUES: + case IFLA_NUM_TX_QUEUES: + case IFLA_PROMISCUITY: + case IFLA_EXT_MASK: + case IFLA_LINK_NETNSID: + case IFLA_GROUP: + case IFLA_MASTER: + case IFLA_NUM_VF: + u32 = RTA_DATA(rtattr); + *u32 = tswap32(*u32); + break; + /* struct rtnl_link_stats */ + case IFLA_STATS: + st = RTA_DATA(rtattr); + st->rx_packets = tswap32(st->rx_packets); + st->tx_packets = tswap32(st->tx_packets); + st->rx_bytes = tswap32(st->rx_bytes); + st->tx_bytes = tswap32(st->tx_bytes); + st->rx_errors = tswap32(st->rx_errors); + st->tx_errors = tswap32(st->tx_errors); + st->rx_dropped = tswap32(st->rx_dropped); + st->tx_dropped = tswap32(st->tx_dropped); + st->multicast = tswap32(st->multicast); + st->collisions = tswap32(st->collisions); + + /* detailed rx_errors: */ + st->rx_length_errors = tswap32(st->rx_length_errors); + st->rx_over_errors = tswap32(st->rx_over_errors); + st->rx_crc_errors = tswap32(st->rx_crc_errors); + st->rx_frame_errors = tswap32(st->rx_frame_errors); + st->rx_fifo_errors = tswap32(st->rx_fifo_errors); + st->rx_missed_errors = tswap32(st->rx_missed_errors); + + /* detailed tx_errors */ + st->tx_aborted_errors = tswap32(st->tx_aborted_errors); + st->tx_carrier_errors = tswap32(st->tx_carrier_errors); + st->tx_fifo_errors = tswap32(st->tx_fifo_errors); + st->tx_heartbeat_errors = tswap32(st->tx_heartbeat_errors); + st->tx_window_errors = tswap32(st->tx_window_errors); + + /* for cslip etc */ + st->rx_compressed = tswap32(st->rx_compressed); + st->tx_compressed = tswap32(st->tx_compressed); + break; + /* struct rtnl_link_stats64 */ + case IFLA_STATS64: + st64 = RTA_DATA(rtattr); + st64->rx_packets = tswap64(st64->rx_packets); + st64->tx_packets = tswap64(st64->tx_packets); + st64->rx_bytes = tswap64(st64->rx_bytes); + st64->tx_bytes = tswap64(st64->tx_bytes); + st64->rx_errors = tswap64(st64->rx_errors); + st64->tx_errors = tswap64(st64->tx_errors); + st64->rx_dropped = tswap64(st64->rx_dropped); + st64->tx_dropped = tswap64(st64->tx_dropped); + st64->multicast = tswap64(st64->multicast); + st64->collisions = tswap64(st64->collisions); + + /* detailed rx_errors: */ + st64->rx_length_errors = tswap64(st64->rx_length_errors); + st64->rx_over_errors = tswap64(st64->rx_over_errors); + st64->rx_crc_errors = tswap64(st64->rx_crc_errors); + st64->rx_frame_errors = tswap64(st64->rx_frame_errors); + st64->rx_fifo_errors = tswap64(st64->rx_fifo_errors); + st64->rx_missed_errors = tswap64(st64->rx_missed_errors); + + /* detailed tx_errors */ + st64->tx_aborted_errors = tswap64(st64->tx_aborted_errors); + st64->tx_carrier_errors = tswap64(st64->tx_carrier_errors); + st64->tx_fifo_errors = tswap64(st64->tx_fifo_errors); + st64->tx_heartbeat_errors = tswap64(st64->tx_heartbeat_errors); + st64->tx_window_errors = tswap64(st64->tx_window_errors); + + /* for cslip etc */ + st64->rx_compressed = tswap64(st64->rx_compressed); + st64->tx_compressed = tswap64(st64->tx_compressed); + break; + /* struct rtnl_link_ifmap */ + case IFLA_MAP: + map = RTA_DATA(rtattr); + map->mem_start = tswap64(map->mem_start); + map->mem_end = tswap64(map->mem_end); + map->base_addr = tswap64(map->base_addr); + map->irq = tswap16(map->irq); + break; + /* nested */ + case IFLA_AF_SPEC: + case IFLA_LINKINFO: + /* FIXME: implement nested type */ + gemu_log("Unimplemented nested type %d\n", rtattr->rta_type); + break; + default: + gemu_log("Unknown host IFLA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long host_to_target_data_addr_rtattr(struct rtattr *rtattr) +{ + uint32_t *u32; + struct ifa_cacheinfo *ci; + + switch (rtattr->rta_type) { + /* binary: depends on family type */ + case IFA_ADDRESS: + case IFA_LOCAL: + break; + /* string */ + case IFA_LABEL: + break; + /* u32 */ + case IFA_FLAGS: + case IFA_BROADCAST: + u32 = RTA_DATA(rtattr); + *u32 = tswap32(*u32); + break; + /* struct ifa_cacheinfo */ + case IFA_CACHEINFO: + ci = RTA_DATA(rtattr); + ci->ifa_prefered = tswap32(ci->ifa_prefered); + ci->ifa_valid = tswap32(ci->ifa_valid); + ci->cstamp = tswap32(ci->cstamp); + ci->tstamp = tswap32(ci->tstamp); + break; + default: + gemu_log("Unknown host IFA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long host_to_target_data_route_rtattr(struct rtattr *rtattr) +{ + uint32_t *u32; + switch (rtattr->rta_type) { + /* binary: depends on family type */ + case RTA_GATEWAY: + case RTA_DST: + case RTA_PREFSRC: + break; + /* u32 */ + case RTA_PRIORITY: + case RTA_TABLE: + case RTA_OIF: + u32 = RTA_DATA(rtattr); + *u32 = tswap32(*u32); + break; + default: + gemu_log("Unknown host RTA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long host_to_target_link_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + return host_to_target_for_each_rtattr(rtattr, rtattr_len, + host_to_target_data_link_rtattr); +} + +static abi_long host_to_target_addr_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + return host_to_target_for_each_rtattr(rtattr, rtattr_len, + host_to_target_data_addr_rtattr); +} + +static abi_long host_to_target_route_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + return host_to_target_for_each_rtattr(rtattr, rtattr_len, + host_to_target_data_route_rtattr); +} + +static abi_long host_to_target_data_route(struct nlmsghdr *nlh) +{ + uint32_t nlmsg_len; + struct ifinfomsg *ifi; + struct ifaddrmsg *ifa; + struct rtmsg *rtm; + + nlmsg_len = nlh->nlmsg_len; + switch (nlh->nlmsg_type) { + case RTM_NEWLINK: + case RTM_DELLINK: + case RTM_GETLINK: + ifi = NLMSG_DATA(nlh); + ifi->ifi_type = tswap16(ifi->ifi_type); + ifi->ifi_index = tswap32(ifi->ifi_index); + ifi->ifi_flags = tswap32(ifi->ifi_flags); + ifi->ifi_change = tswap32(ifi->ifi_change); + host_to_target_link_rtattr(IFLA_RTA(ifi), + nlmsg_len - NLMSG_LENGTH(sizeof(*ifi))); + break; + case RTM_NEWADDR: + case RTM_DELADDR: + case RTM_GETADDR: + ifa = NLMSG_DATA(nlh); + ifa->ifa_index = tswap32(ifa->ifa_index); + host_to_target_addr_rtattr(IFA_RTA(ifa), + nlmsg_len - NLMSG_LENGTH(sizeof(*ifa))); + break; + case RTM_NEWROUTE: + case RTM_DELROUTE: + case RTM_GETROUTE: + rtm = NLMSG_DATA(nlh); + rtm->rtm_flags = tswap32(rtm->rtm_flags); + host_to_target_route_rtattr(RTM_RTA(rtm), + nlmsg_len - NLMSG_LENGTH(sizeof(*rtm))); + break; + default: + return -TARGET_EINVAL; + } + return 0; +} + +static inline abi_long host_to_target_nlmsg_route(struct nlmsghdr *nlh, + size_t len) +{ + return host_to_target_for_each_nlmsg(nlh, len, host_to_target_data_route); +} + +static abi_long target_to_host_for_each_rtattr(struct rtattr *rtattr, + size_t len, + abi_long (*target_to_host_rtattr) + (struct rtattr *)) +{ + abi_long ret; + + while (len >= sizeof(struct rtattr)) { + if (tswap16(rtattr->rta_len) < sizeof(struct rtattr) || + tswap16(rtattr->rta_len) > len) { + break; + } + rtattr->rta_len = tswap16(rtattr->rta_len); + rtattr->rta_type = tswap16(rtattr->rta_type); + ret = target_to_host_rtattr(rtattr); + if (ret < 0) { + return ret; + } + len -= RTA_ALIGN(rtattr->rta_len); + rtattr = (struct rtattr *)(((char *)rtattr) + + RTA_ALIGN(rtattr->rta_len)); + } + return 0; +} + +static abi_long target_to_host_data_link_rtattr(struct rtattr *rtattr) +{ + switch (rtattr->rta_type) { + default: + gemu_log("Unknown target IFLA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long target_to_host_data_addr_rtattr(struct rtattr *rtattr) +{ + switch (rtattr->rta_type) { + /* binary: depends on family type */ + case IFA_LOCAL: + case IFA_ADDRESS: + break; + default: + gemu_log("Unknown target IFA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long target_to_host_data_route_rtattr(struct rtattr *rtattr) +{ + uint32_t *u32; + switch (rtattr->rta_type) { + /* binary: depends on family type */ + case RTA_DST: + case RTA_SRC: + case RTA_GATEWAY: + break; + /* u32 */ + case RTA_OIF: + u32 = RTA_DATA(rtattr); + *u32 = tswap32(*u32); + break; + default: + gemu_log("Unknown target RTA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static void target_to_host_link_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + target_to_host_for_each_rtattr(rtattr, rtattr_len, + target_to_host_data_link_rtattr); +} + +static void target_to_host_addr_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + target_to_host_for_each_rtattr(rtattr, rtattr_len, + target_to_host_data_addr_rtattr); +} + +static void target_to_host_route_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + target_to_host_for_each_rtattr(rtattr, rtattr_len, + target_to_host_data_route_rtattr); +} + +static abi_long target_to_host_data_route(struct nlmsghdr *nlh) +{ + struct ifinfomsg *ifi; + struct ifaddrmsg *ifa; + struct rtmsg *rtm; + + switch (nlh->nlmsg_type) { + case RTM_GETLINK: + break; + case RTM_NEWLINK: + case RTM_DELLINK: + ifi = NLMSG_DATA(nlh); + ifi->ifi_type = tswap16(ifi->ifi_type); + ifi->ifi_index = tswap32(ifi->ifi_index); + ifi->ifi_flags = tswap32(ifi->ifi_flags); + ifi->ifi_change = tswap32(ifi->ifi_change); + target_to_host_link_rtattr(IFLA_RTA(ifi), nlh->nlmsg_len - + NLMSG_LENGTH(sizeof(*ifi))); + break; + case RTM_GETADDR: + case RTM_NEWADDR: + case RTM_DELADDR: + ifa = NLMSG_DATA(nlh); + ifa->ifa_index = tswap32(ifa->ifa_index); + target_to_host_addr_rtattr(IFA_RTA(ifa), nlh->nlmsg_len - + NLMSG_LENGTH(sizeof(*ifa))); + break; + case RTM_GETROUTE: + break; + case RTM_NEWROUTE: + case RTM_DELROUTE: + rtm = NLMSG_DATA(nlh); + rtm->rtm_flags = tswap32(rtm->rtm_flags); + target_to_host_route_rtattr(RTM_RTA(rtm), nlh->nlmsg_len - + NLMSG_LENGTH(sizeof(*rtm))); + break; + default: + return -TARGET_EOPNOTSUPP; + } + return 0; +} + +static abi_long target_to_host_nlmsg_route(struct nlmsghdr *nlh, size_t len) +{ + return target_to_host_for_each_nlmsg(nlh, len, target_to_host_data_route); +} +#endif /* CONFIG_RTNETLINK */ + +static abi_long host_to_target_data_audit(struct nlmsghdr *nlh) +{ + switch (nlh->nlmsg_type) { + default: + gemu_log("Unknown host audit message type %d\n", + nlh->nlmsg_type); + return -TARGET_EINVAL; + } + return 0; +} + +static inline abi_long host_to_target_nlmsg_audit(struct nlmsghdr *nlh, + size_t len) +{ + return host_to_target_for_each_nlmsg(nlh, len, host_to_target_data_audit); +} + +static abi_long target_to_host_data_audit(struct nlmsghdr *nlh) +{ + switch (nlh->nlmsg_type) { + case AUDIT_USER: + case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: + case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: + break; + default: + gemu_log("Unknown target audit message type %d\n", + nlh->nlmsg_type); + return -TARGET_EINVAL; + } + + return 0; +} + +static abi_long target_to_host_nlmsg_audit(struct nlmsghdr *nlh, size_t len) +{ + return target_to_host_for_each_nlmsg(nlh, len, target_to_host_data_audit); +} + /* do_setsockopt() Must return target values and target errnos. */ static abi_long do_setsockopt(int sockfd, int level, int optname, abi_ulong optval_addr, socklen_t optlen) @@ -2165,6 +2806,38 @@ static TargetFdTrans target_packet_trans = { .target_to_host_addr = packet_target_to_host_sockaddr, }; +#ifdef CONFIG_RTNETLINK +static abi_long netlink_route_target_to_host(void *buf, size_t len) +{ + return target_to_host_nlmsg_route(buf, len); +} + +static abi_long netlink_route_host_to_target(void *buf, size_t len) +{ + return host_to_target_nlmsg_route(buf, len); +} + +static TargetFdTrans target_netlink_route_trans = { + .target_to_host_data = netlink_route_target_to_host, + .host_to_target_data = netlink_route_host_to_target, +}; +#endif /* CONFIG_RTNETLINK */ + +static abi_long netlink_audit_target_to_host(void *buf, size_t len) +{ + return target_to_host_nlmsg_audit(buf, len); +} + +static abi_long netlink_audit_host_to_target(void *buf, size_t len) +{ + return host_to_target_nlmsg_audit(buf, len); +} + +static TargetFdTrans target_netlink_audit_trans = { + .target_to_host_data = netlink_audit_target_to_host, + .host_to_target_data = netlink_audit_host_to_target, +}; + /* do_socket() Must return target values and target errnos. */ static abi_long do_socket(int domain, int type, int protocol) { @@ -2176,8 +2849,14 @@ static abi_long do_socket(int domain, int type, int protocol) return ret; } - if (domain == PF_NETLINK) - return -TARGET_EAFNOSUPPORT; + if (domain == PF_NETLINK && !( +#ifdef CONFIG_RTNETLINK + protocol == NETLINK_ROUTE || +#endif + protocol == NETLINK_KOBJECT_UEVENT || + protocol == NETLINK_AUDIT)) { + return -EPFNOSUPPORT; + } if (domain == AF_PACKET || (domain == AF_INET && type == SOCK_PACKET)) { @@ -2192,6 +2871,22 @@ static abi_long do_socket(int domain, int type, int protocol) * if socket type is SOCK_PACKET, bind by name */ fd_trans_register(ret, &target_packet_trans); + } else if (domain == PF_NETLINK) { + switch (protocol) { +#ifdef CONFIG_RTNETLINK + case NETLINK_ROUTE: + fd_trans_register(ret, &target_netlink_route_trans); + break; +#endif + case NETLINK_KOBJECT_UEVENT: + /* nothing to do: messages are strings */ + break; + case NETLINK_AUDIT: + fd_trans_register(ret, &target_netlink_audit_trans); + break; + default: + g_assert_not_reached(); + } } } return ret; @@ -2234,7 +2929,7 @@ static abi_long do_connect(int sockfd, abi_ulong target_addr, if (ret) return ret; - return get_errno(connect(sockfd, addr, addrlen)); + return get_errno(safe_connect(sockfd, addr, addrlen)); } /* do_sendrecvmsg_locked() Must return target values and target errnos. */ @@ -2276,14 +2971,25 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp, msg.msg_iov = vec; if (send) { - ret = target_to_host_cmsg(&msg, msgp); - if (ret == 0) - ret = get_errno(sendmsg(fd, &msg, flags)); + if (fd_trans_target_to_host_data(fd)) { + ret = fd_trans_target_to_host_data(fd)(msg.msg_iov->iov_base, + msg.msg_iov->iov_len); + } else { + ret = target_to_host_cmsg(&msg, msgp); + } + if (ret == 0) { + ret = get_errno(safe_sendmsg(fd, &msg, flags)); + } } else { - ret = get_errno(recvmsg(fd, &msg, flags)); + ret = get_errno(safe_recvmsg(fd, &msg, flags)); if (!is_error(ret)) { len = ret; - ret = host_to_target_cmsg(msgp, &msg); + if (fd_trans_host_to_target_data(fd)) { + ret = fd_trans_host_to_target_data(fd)(msg.msg_iov->iov_base, + msg.msg_iov->iov_len); + } else { + ret = host_to_target_cmsg(msgp, &msg); + } if (!is_error(ret)) { msgp->msg_namelen = tswap32(msg.msg_namelen); if (msg.msg_name != NULL) { @@ -2369,19 +3075,6 @@ static abi_long do_sendrecvmmsg(int fd, abi_ulong target_msgvec, return ret; } -/* If we don't have a system accept4() then just call accept. - * The callsites to do_accept4() will ensure that they don't - * pass a non-zero flags argument in this config. - */ -#ifndef CONFIG_ACCEPT4 -static inline int accept4(int sockfd, struct sockaddr *addr, - socklen_t *addrlen, int flags) -{ - assert(flags == 0); - return accept(sockfd, addr, addrlen); -} -#endif - /* do_accept4() Must return target values and target errnos. */ static abi_long do_accept4(int fd, abi_ulong target_addr, abi_ulong target_addrlen_addr, int flags) @@ -2394,7 +3087,7 @@ static abi_long do_accept4(int fd, abi_ulong target_addr, host_flags = target_to_host_bitmask(flags, fcntl_flags_tbl); if (target_addr == 0) { - return get_errno(accept4(fd, NULL, NULL, host_flags)); + return get_errno(safe_accept4(fd, NULL, NULL, host_flags)); } /* linux returns EINVAL if addrlen pointer is invalid */ @@ -2410,7 +3103,7 @@ static abi_long do_accept4(int fd, abi_ulong target_addr, addr = alloca(addrlen); - ret = get_errno(accept4(fd, addr, &addrlen, host_flags)); + ret = get_errno(safe_accept4(fd, addr, &addrlen, host_flags)); if (!is_error(ret)) { host_to_target_sockaddr(target_addr, addr, addrlen); if (put_user_u32(addrlen, target_addrlen_addr)) @@ -2510,6 +3203,13 @@ static abi_long do_sendto(int fd, abi_ulong msg, size_t len, int flags, host_msg = lock_user(VERIFY_READ, msg, len, 1); if (!host_msg) return -TARGET_EFAULT; + if (fd_trans_target_to_host_data(fd)) { + ret = fd_trans_target_to_host_data(fd)(host_msg, len); + if (ret < 0) { + unlock_user(host_msg, msg, 0); + return ret; + } + } if (target_addr) { addr = alloca(addrlen+1); ret = target_to_host_sockaddr(fd, addr, target_addr, addrlen); @@ -2517,9 +3217,9 @@ static abi_long do_sendto(int fd, abi_ulong msg, size_t len, int flags, unlock_user(host_msg, msg, 0); return ret; } - ret = get_errno(sendto(fd, host_msg, len, flags, addr, addrlen)); + ret = get_errno(safe_sendto(fd, host_msg, len, flags, addr, addrlen)); } else { - ret = get_errno(send(fd, host_msg, len, flags)); + ret = get_errno(safe_sendto(fd, host_msg, len, flags, NULL, 0)); } unlock_user(host_msg, msg, 0); return ret; @@ -2548,10 +3248,11 @@ static abi_long do_recvfrom(int fd, abi_ulong msg, size_t len, int flags, goto fail; } addr = alloca(addrlen); - ret = get_errno(recvfrom(fd, host_msg, len, flags, addr, &addrlen)); + ret = get_errno(safe_recvfrom(fd, host_msg, len, flags, + addr, &addrlen)); } else { addr = NULL; /* To keep compiler quiet. */ - ret = get_errno(qemu_recv(fd, host_msg, len, flags)); + ret = get_errno(safe_recvfrom(fd, host_msg, len, flags, NULL, 0)); } if (!is_error(ret)) { if (target_addr) { @@ -2989,7 +3690,7 @@ static inline abi_long do_semop(int semid, abi_long ptr, unsigned nsops) if (target_to_host_sembuf(sops, ptr, nsops)) return -TARGET_EFAULT; - return get_errno(semop(semid, sops, nsops)); + return get_errno(safe_semtimedop(semid, sops, nsops, NULL)); } struct target_msqid_ds @@ -3144,7 +3845,7 @@ static inline abi_long do_msgsnd(int msqid, abi_long msgp, } host_mb->mtype = (abi_long) tswapal(target_mb->mtype); memcpy(host_mb->mtext, target_mb->mtext, msgsz); - ret = get_errno(msgsnd(msqid, host_mb, msgsz, msgflg)); + ret = get_errno(safe_msgsnd(msqid, host_mb, msgsz, msgflg)); g_free(host_mb); unlock_user_struct(target_mb, msgp, 0); @@ -3172,7 +3873,7 @@ static inline abi_long do_msgrcv(int msqid, abi_long msgp, ret = -TARGET_ENOMEM; goto end; } - ret = get_errno(msgrcv(msqid, host_mb, msgsz, msgtyp, msgflg)); + ret = get_errno(safe_msgrcv(msqid, host_mb, msgsz, msgtyp, msgflg)); if (ret > 0) { abi_ulong target_mtext_addr = msgp + sizeof(abi_ulong); @@ -3588,7 +4289,7 @@ static abi_long do_ioctl_fs_ioc_fiemap(const IOCTLEntry *ie, uint8_t *buf_temp, memcpy(fm, buf_temp, sizeof(struct fiemap)); free_fm = 1; } - ret = get_errno(ioctl(fd, ie->host_cmd, fm)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd, fm)); if (!is_error(ret)) { target_size_out = target_size_in; /* An extent_count of 0 means we were only counting the extents @@ -3678,7 +4379,7 @@ static abi_long do_ioctl_ifconf(const IOCTLEntry *ie, uint8_t *buf_temp, host_ifconf->ifc_len = host_ifc_len; host_ifconf->ifc_buf = host_ifc_buf; - ret = get_errno(ioctl(fd, ie->host_cmd, host_ifconf)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd, host_ifconf)); if (!is_error(ret)) { /* convert host ifc_len to target ifc_len */ @@ -3807,7 +4508,7 @@ static abi_long do_ioctl_dm(const IOCTLEntry *ie, uint8_t *buf_temp, int fd, } unlock_user(argptr, guest_data, 0); - ret = get_errno(ioctl(fd, ie->host_cmd, buf_temp)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd, buf_temp)); if (!is_error(ret)) { guest_data = arg + host_dm->data_start; guest_data_size = host_dm->data_size - host_dm->data_start; @@ -3988,7 +4689,7 @@ static abi_long do_ioctl_blkpg(const IOCTLEntry *ie, uint8_t *buf_temp, int fd, /* Swizzle the data pointer to our local copy and call! */ host_blkpg->data = &host_part; - ret = get_errno(ioctl(fd, ie->host_cmd, host_blkpg)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd, host_blkpg)); out: return ret; @@ -4049,7 +4750,7 @@ static abi_long do_ioctl_rt(const IOCTLEntry *ie, uint8_t *buf_temp, } unlock_user(argptr, arg, 0); - ret = get_errno(ioctl(fd, ie->host_cmd, buf_temp)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd, buf_temp)); if (*host_rt_dev_ptr != 0) { unlock_user((void *)*host_rt_dev_ptr, *target_rt_dev_ptr, 0); @@ -4061,7 +4762,7 @@ static abi_long do_ioctl_kdsigaccept(const IOCTLEntry *ie, uint8_t *buf_temp, int fd, int cmd, abi_long arg) { int sig = target_to_host_signal(arg); - return get_errno(ioctl(fd, ie->host_cmd, sig)); + return get_errno(safe_ioctl(fd, ie->host_cmd, sig)); } static IOCTLEntry ioctl_entries[] = { @@ -4105,18 +4806,18 @@ static abi_long do_ioctl(int fd, int cmd, abi_long arg) switch(arg_type[0]) { case TYPE_NULL: /* no argument */ - ret = get_errno(ioctl(fd, ie->host_cmd)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd)); break; case TYPE_PTRVOID: case TYPE_INT: - ret = get_errno(ioctl(fd, ie->host_cmd, arg)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd, arg)); break; case TYPE_PTR: arg_type++; target_size = thunk_type_size(arg_type, 0); switch(ie->access) { case IOC_R: - ret = get_errno(ioctl(fd, ie->host_cmd, buf_temp)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd, buf_temp)); if (!is_error(ret)) { argptr = lock_user(VERIFY_WRITE, arg, target_size, 0); if (!argptr) @@ -4131,7 +4832,7 @@ static abi_long do_ioctl(int fd, int cmd, abi_long arg) return -TARGET_EFAULT; thunk_convert(buf_temp, argptr, arg_type, THUNK_HOST); unlock_user(argptr, arg, 0); - ret = get_errno(ioctl(fd, ie->host_cmd, buf_temp)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd, buf_temp)); break; default: case IOC_RW: @@ -4140,7 +4841,7 @@ static abi_long do_ioctl(int fd, int cmd, abi_long arg) return -TARGET_EFAULT; thunk_convert(buf_temp, argptr, arg_type, THUNK_HOST); unlock_user(argptr, arg, 0); - ret = get_errno(ioctl(fd, ie->host_cmd, buf_temp)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd, buf_temp)); if (!is_error(ret)) { argptr = lock_user(VERIFY_WRITE, arg, target_size, 0); if (!argptr) @@ -4741,6 +5442,7 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp, new_cpu->opaque = ts; ts->bprm = parent_ts->bprm; ts->info = parent_ts->info; + ts->signal_mask = parent_ts->signal_mask; nptl_flags = flags; flags &= ~CLONE_NPTL_FLAGS2; @@ -4795,6 +5497,11 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp, if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0) { return -TARGET_EINVAL; } + + if (block_signals()) { + return -TARGET_ERESTARTSYS; + } + fork_start(); ret = fork(); if (ret == 0) { @@ -5993,8 +6700,12 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, However in threaded applictions it is used for thread termination, and _exit_group is used for application termination. Do thread termination if we have more then one thread. */ - /* FIXME: This probably breaks if a signal arrives. We should probably - be disabling signals. */ + + if (block_signals()) { + ret = -TARGET_ERESTARTSYS; + break; + } + if (CPU_NEXT(first_cpu)) { TaskState *ts; @@ -6412,7 +7123,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_pause /* not on alpha */ case TARGET_NR_pause: - ret = get_errno(pause()); + if (!block_signals()) { + sigsuspend(&((TaskState *)cpu->opaque)->signal_mask); + } + ret = -TARGET_EINTR; break; #endif #ifdef TARGET_NR_utime @@ -6515,7 +7229,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, ret = 0; break; case TARGET_NR_kill: - ret = get_errno(kill(arg1, target_to_host_signal(arg2))); + ret = get_errno(safe_kill(arg1, target_to_host_signal(arg2))); break; #ifdef TARGET_NR_rename case TARGET_NR_rename: @@ -6836,9 +7550,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, { sigset_t cur_set; abi_ulong target_set; - do_sigprocmask(0, NULL, &cur_set); - host_to_target_old_sigset(&target_set, &cur_set); - ret = target_set; + ret = do_sigprocmask(0, NULL, &cur_set); + if (!ret) { + host_to_target_old_sigset(&target_set, &cur_set); + ret = target_set; + } } break; #endif @@ -6847,12 +7563,20 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, { sigset_t set, oset, cur_set; abi_ulong target_set = arg1; - do_sigprocmask(0, NULL, &cur_set); + /* We only have one word of the new mask so we must read + * the rest of it with do_sigprocmask() and OR in this word. + * We are guaranteed that a do_sigprocmask() that only queries + * the signal mask will not fail. + */ + ret = do_sigprocmask(0, NULL, &cur_set); + assert(!ret); target_to_host_old_sigset(&set, &target_set); sigorset(&set, &set, &cur_set); - do_sigprocmask(SIG_SETMASK, &set, &oset); - host_to_target_old_sigset(&target_set, &oset); - ret = target_set; + ret = do_sigprocmask(SIG_SETMASK, &set, &oset); + if (!ret) { + host_to_target_old_sigset(&target_set, &oset); + ret = target_set; + } } break; #endif @@ -6881,7 +7605,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, mask = arg2; target_to_host_old_sigset(&set, &mask); - ret = get_errno(do_sigprocmask(how, &set, &oldset)); + ret = do_sigprocmask(how, &set, &oldset); if (!is_error(ret)) { host_to_target_old_sigset(&mask, &oldset); ret = mask; @@ -6915,7 +7639,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, how = 0; set_ptr = NULL; } - ret = get_errno(do_sigprocmask(how, set_ptr, &oldset)); + ret = do_sigprocmask(how, set_ptr, &oldset); if (!is_error(ret) && arg3) { if (!(p = lock_user(VERIFY_WRITE, arg3, sizeof(target_sigset_t), 0))) goto efault; @@ -6955,7 +7679,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, how = 0; set_ptr = NULL; } - ret = get_errno(do_sigprocmask(how, set_ptr, &oldset)); + ret = do_sigprocmask(how, set_ptr, &oldset); if (!is_error(ret) && arg3) { if (!(p = lock_user(VERIFY_WRITE, arg3, sizeof(target_sigset_t), 0))) goto efault; @@ -6993,28 +7717,36 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #ifdef TARGET_NR_sigsuspend case TARGET_NR_sigsuspend: { - sigset_t set; + TaskState *ts = cpu->opaque; #if defined(TARGET_ALPHA) abi_ulong mask = arg1; - target_to_host_old_sigset(&set, &mask); + target_to_host_old_sigset(&ts->sigsuspend_mask, &mask); #else if (!(p = lock_user(VERIFY_READ, arg1, sizeof(target_sigset_t), 1))) goto efault; - target_to_host_old_sigset(&set, p); + target_to_host_old_sigset(&ts->sigsuspend_mask, p); unlock_user(p, arg1, 0); #endif - ret = get_errno(sigsuspend(&set)); + ret = get_errno(safe_rt_sigsuspend(&ts->sigsuspend_mask, + SIGSET_T_SIZE)); + if (ret != -TARGET_ERESTARTSYS) { + ts->in_sigsuspend = 1; + } } break; #endif case TARGET_NR_rt_sigsuspend: { - sigset_t set; + TaskState *ts = cpu->opaque; if (!(p = lock_user(VERIFY_READ, arg1, sizeof(target_sigset_t), 1))) goto efault; - target_to_host_sigset(&set, p); + target_to_host_sigset(&ts->sigsuspend_mask, p); unlock_user(p, arg1, 0); - ret = get_errno(sigsuspend(&set)); + ret = get_errno(safe_rt_sigsuspend(&ts->sigsuspend_mask, + SIGSET_T_SIZE)); + if (ret != -TARGET_ERESTARTSYS) { + ts->in_sigsuspend = 1; + } } break; case TARGET_NR_rt_sigtimedwait: @@ -7033,7 +7765,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } else { puts = NULL; } - ret = get_errno(sigtimedwait(&set, &uinfo, puts)); + ret = get_errno(safe_rt_sigtimedwait(&set, &uinfo, puts, + SIGSET_T_SIZE)); if (!is_error(ret)) { if (arg2) { p = lock_user(VERIFY_WRITE, arg2, sizeof(target_siginfo_t), @@ -7060,11 +7793,19 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, break; #ifdef TARGET_NR_sigreturn case TARGET_NR_sigreturn: - ret = do_sigreturn(cpu_env); + if (block_signals()) { + ret = -TARGET_ERESTARTSYS; + } else { + ret = do_sigreturn(cpu_env); + } break; #endif case TARGET_NR_rt_sigreturn: - ret = do_rt_sigreturn(cpu_env); + if (block_signals()) { + ret = -TARGET_ERESTARTSYS; + } else { + ret = do_rt_sigreturn(cpu_env); + } break; case TARGET_NR_sethostname: if (!(p = lock_user_string(arg1))) @@ -7221,7 +7962,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, /* Extract the two packed args for the sigset */ if (arg6) { sig_ptr = &sig; - sig.size = _NSIG / 8; + sig.size = SIGSET_T_SIZE; arg7 = lock_user(VERIFY_READ, arg6, sizeof(*arg7) * 2, 1); if (!arg7) { @@ -7594,11 +8335,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_accept4 case TARGET_NR_accept4: -#ifdef CONFIG_ACCEPT4 ret = do_accept4(arg1, arg2, arg3, arg4); -#else - goto unimplemented; -#endif break; #endif #ifdef TARGET_NR_bind @@ -8229,7 +8966,6 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, { struct target_pollfd *target_pfd; unsigned int nfds = arg2; - int timeout = arg3; struct pollfd *pfd; unsigned int i; @@ -8249,8 +8985,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } } + switch (num) { # ifdef TARGET_NR_ppoll - if (num == TARGET_NR_ppoll) { + case TARGET_NR_ppoll: + { struct timespec _timeout_ts, *timeout_ts = &_timeout_ts; target_sigset_t *target_set; sigset_t _set, *set = &_set; @@ -8275,7 +9013,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, set = NULL; } - ret = get_errno(sys_ppoll(pfd, nfds, timeout_ts, set, _NSIG/8)); + ret = get_errno(safe_ppoll(pfd, nfds, timeout_ts, + set, SIGSET_T_SIZE)); if (!is_error(ret) && arg3) { host_to_target_timespec(arg3, timeout_ts); @@ -8283,9 +9022,30 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, if (arg4) { unlock_user(target_set, arg4, 0); } - } else + break; + } # endif - ret = get_errno(poll(pfd, nfds, timeout)); +# ifdef TARGET_NR_poll + case TARGET_NR_poll: + { + struct timespec ts, *pts; + + if (arg3 >= 0) { + /* Convert ms to secs, ns */ + ts.tv_sec = arg3 / 1000; + ts.tv_nsec = (arg3 % 1000) * 1000000LL; + pts = &ts; + } else { + /* -ve poll() timeout means "infinite" */ + pts = NULL; + } + ret = get_errno(safe_ppoll(pfd, nfds, pts, NULL, 0)); + break; + } +# endif + default: + g_assert_not_reached(); + } if (!is_error(ret)) { for(i = 0; i < nfds; i++) { @@ -8299,13 +9059,13 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, case TARGET_NR_flock: /* NOTE: the flock constant seems to be the same for every Linux platform */ - ret = get_errno(flock(arg1, arg2)); + ret = get_errno(safe_flock(arg1, arg2)); break; case TARGET_NR_readv: { struct iovec *vec = lock_iovec(VERIFY_WRITE, arg2, arg3, 0); if (vec != NULL) { - ret = get_errno(readv(arg1, vec, arg3)); + ret = get_errno(safe_readv(arg1, vec, arg3)); unlock_iovec(vec, arg2, arg3, 1); } else { ret = -host_to_target_errno(errno); @@ -8316,7 +9076,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, { struct iovec *vec = lock_iovec(VERIFY_READ, arg2, arg3, 1); if (vec != NULL) { - ret = get_errno(writev(arg1, vec, arg3)); + ret = get_errno(safe_writev(arg1, vec, arg3)); unlock_iovec(vec, arg2, arg3, 0); } else { ret = -host_to_target_errno(errno); @@ -8475,7 +9235,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, { struct timespec req, rem; target_to_host_timespec(&req, arg1); - ret = get_errno(nanosleep(&req, &rem)); + ret = get_errno(safe_nanosleep(&req, &rem)); if (is_error(ret) && arg2) { host_to_target_timespec(arg2, &rem); } @@ -9117,9 +9877,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } mask = arg2; target_to_host_old_sigset(&set, &mask); - do_sigprocmask(how, &set, &oldset); - host_to_target_old_sigset(&mask, &oldset); - ret = mask; + ret = do_sigprocmask(how, &set, &oldset); + if (!ret) { + host_to_target_old_sigset(&mask, &oldset); + ret = mask; + } } break; #endif @@ -9287,18 +10049,56 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_arm_fadvise64_64 case TARGET_NR_arm_fadvise64_64: - { - /* - * arm_fadvise64_64 looks like fadvise64_64 but - * with different argument order - */ - abi_long temp; - temp = arg3; - arg3 = arg4; - arg4 = temp; - } + /* arm_fadvise64_64 looks like fadvise64_64 but + * with different argument order: fd, advice, offset, len + * rather than the usual fd, offset, len, advice. + * Note that offset and len are both 64-bit so appear as + * pairs of 32-bit registers. + */ + ret = posix_fadvise(arg1, target_offset64(arg3, arg4), + target_offset64(arg5, arg6), arg2); + ret = -host_to_target_errno(ret); + break; #endif -#if defined(TARGET_NR_fadvise64_64) || defined(TARGET_NR_arm_fadvise64_64) || defined(TARGET_NR_fadvise64) + +#if TARGET_ABI_BITS == 32 + +#ifdef TARGET_NR_fadvise64_64 + case TARGET_NR_fadvise64_64: + /* 6 args: fd, offset (high, low), len (high, low), advice */ + if (regpairs_aligned(cpu_env)) { + /* offset is in (3,4), len in (5,6) and advice in 7 */ + arg2 = arg3; + arg3 = arg4; + arg4 = arg5; + arg5 = arg6; + arg6 = arg7; + } + ret = -host_to_target_errno(posix_fadvise(arg1, + target_offset64(arg2, arg3), + target_offset64(arg4, arg5), + arg6)); + break; +#endif + +#ifdef TARGET_NR_fadvise64 + case TARGET_NR_fadvise64: + /* 5 args: fd, offset (high, low), len, advice */ + if (regpairs_aligned(cpu_env)) { + /* offset is in (3,4), len in 5 and advice in 6 */ + arg2 = arg3; + arg3 = arg4; + arg4 = arg5; + arg5 = arg6; + } + ret = -host_to_target_errno(posix_fadvise(arg1, + target_offset64(arg2, arg3), + arg4, arg5)); + break; +#endif + +#else /* not a 32-bit ABI */ +#if defined(TARGET_NR_fadvise64_64) || defined(TARGET_NR_fadvise64) #ifdef TARGET_NR_fadvise64_64 case TARGET_NR_fadvise64_64: #endif @@ -9314,9 +10114,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, default: break; } #endif - ret = -posix_fadvise(arg1, arg2, arg3, arg4); - break; + ret = -host_to_target_errno(posix_fadvise(arg1, arg2, arg3, arg4)); + break; #endif +#endif /* end of 64-bit ABI fadvise handling */ + #ifdef TARGET_NR_madvise case TARGET_NR_madvise: /* A straight passthrough may not be safe because qemu sometimes @@ -9701,14 +10503,15 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, { struct timespec ts; target_to_host_timespec(&ts, arg3); - ret = get_errno(clock_nanosleep(arg1, arg2, &ts, arg4 ? &ts : NULL)); + ret = get_errno(safe_clock_nanosleep(arg1, arg2, + &ts, arg4 ? &ts : NULL)); if (arg4) host_to_target_timespec(arg4, &ts); #if defined(TARGET_PPC) /* clock_nanosleep is odd in that it returns positive errno values. * On PPC, CR0 bit 3 should be set in such a situation. */ - if (ret) { + if (ret && ret != -TARGET_ERESTARTSYS) { ((CPUPPCState *)cpu_env)->crf[0] |= 1; } #endif @@ -9722,18 +10525,14 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, break; #endif -#if defined(TARGET_NR_tkill) && defined(__NR_tkill) case TARGET_NR_tkill: - ret = get_errno(sys_tkill((int)arg1, target_to_host_signal(arg2))); + ret = get_errno(safe_tkill((int)arg1, target_to_host_signal(arg2))); break; -#endif -#if defined(TARGET_NR_tgkill) && defined(__NR_tgkill) case TARGET_NR_tgkill: - ret = get_errno(sys_tgkill((int)arg1, (int)arg2, + ret = get_errno(safe_tgkill((int)arg1, (int)arg2, target_to_host_signal(arg3))); - break; -#endif + break; #ifdef TARGET_NR_set_robust_list case TARGET_NR_set_robust_list: @@ -9835,11 +10634,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, p = lock_user (VERIFY_READ, arg2, arg3, 1); if (arg5 != 0) { target_to_host_timespec(&ts, arg5); - ret = get_errno(mq_timedsend(arg1, p, arg3, arg4, &ts)); + ret = get_errno(safe_mq_timedsend(arg1, p, arg3, arg4, &ts)); host_to_target_timespec(arg5, &ts); + } else { + ret = get_errno(safe_mq_timedsend(arg1, p, arg3, arg4, NULL)); } - else - ret = get_errno(mq_send(arg1, p, arg3, arg4)); unlock_user (p, arg2, arg3); } break; @@ -9852,11 +10651,13 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, p = lock_user (VERIFY_READ, arg2, arg3, 1); if (arg5 != 0) { target_to_host_timespec(&ts, arg5); - ret = get_errno(mq_timedreceive(arg1, p, arg3, &prio, &ts)); + ret = get_errno(safe_mq_timedreceive(arg1, p, arg3, + &prio, &ts)); host_to_target_timespec(arg5, &ts); + } else { + ret = get_errno(safe_mq_timedreceive(arg1, p, arg3, + &prio, NULL)); } - else - ret = get_errno(mq_receive(arg1, p, arg3, &prio)); unlock_user (p, arg2, arg3); if (arg4 != 0) put_user_u32(prio, arg4); @@ -10043,14 +10844,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } #endif -#if defined(TARGET_NR_epoll_pwait) && defined(CONFIG_EPOLL_PWAIT) -#define IMPLEMENT_EPOLL_PWAIT -#endif -#if defined(TARGET_NR_epoll_wait) || defined(IMPLEMENT_EPOLL_PWAIT) +#if defined(TARGET_NR_epoll_wait) || defined(TARGET_NR_epoll_pwait) #if defined(TARGET_NR_epoll_wait) case TARGET_NR_epoll_wait: #endif -#if defined(IMPLEMENT_EPOLL_PWAIT) +#if defined(TARGET_NR_epoll_pwait) case TARGET_NR_epoll_pwait: #endif { @@ -10069,7 +10867,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, ep = alloca(maxevents * sizeof(struct epoll_event)); switch (num) { -#if defined(IMPLEMENT_EPOLL_PWAIT) +#if defined(TARGET_NR_epoll_pwait) case TARGET_NR_epoll_pwait: { target_sigset_t *target_set; @@ -10088,13 +10886,15 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, set = NULL; } - ret = get_errno(epoll_pwait(epfd, ep, maxevents, timeout, set)); + ret = get_errno(safe_epoll_pwait(epfd, ep, maxevents, timeout, + set, SIGSET_T_SIZE)); break; } #endif #if defined(TARGET_NR_epoll_wait) case TARGET_NR_epoll_wait: - ret = get_errno(epoll_wait(epfd, ep, maxevents, timeout)); + ret = get_errno(safe_epoll_pwait(epfd, ep, maxevents, timeout, + NULL, 0)); break; #endif default: diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h index 34af15a683..6ee9251c50 100644 --- a/linux-user/syscall_defs.h +++ b/linux-user/syscall_defs.h @@ -673,6 +673,21 @@ typedef struct { #define TARGET_SI_PAD_SIZE ((TARGET_SI_MAX_SIZE - TARGET_SI_PREAMBLE_SIZE) / sizeof(int)) +/* Within QEMU the top 16 bits of si_code indicate which of the parts of + * the union in target_siginfo is valid. This only applies between + * host_to_target_siginfo_noswap() and tswap_siginfo(); it does not + * appear either within host siginfo_t or in target_siginfo structures + * which we get from the guest userspace program. (The Linux kernel + * does a similar thing with using the top bits for its own internal + * purposes but not letting them be visible to userspace.) + */ +#define QEMU_SI_KILL 0 +#define QEMU_SI_TIMER 1 +#define QEMU_SI_POLL 2 +#define QEMU_SI_FAULT 3 +#define QEMU_SI_CHLD 4 +#define QEMU_SI_RT 5 + typedef struct target_siginfo { #ifdef TARGET_MIPS int si_signo; @@ -2274,34 +2289,34 @@ struct target_statfs64 { #endif struct target_flock { - short l_type; - short l_whence; - abi_ulong l_start; - abi_ulong l_len; - int l_pid; + short l_type; + short l_whence; + abi_long l_start; + abi_long l_len; + int l_pid; }; struct target_flock64 { - short l_type; - short l_whence; + short l_type; + short l_whence; #if defined(TARGET_PPC) || defined(TARGET_X86_64) || defined(TARGET_MIPS) \ || defined(TARGET_SPARC) || defined(TARGET_HPPA) \ || defined(TARGET_MICROBLAZE) || defined(TARGET_TILEGX) - int __pad; + int __pad; #endif - unsigned long long l_start; - unsigned long long l_len; - int l_pid; + abi_llong l_start; + abi_llong l_len; + int l_pid; } QEMU_PACKED; #ifdef TARGET_ARM struct target_eabi_flock64 { - short l_type; - short l_whence; - int __pad; - unsigned long long l_start; - unsigned long long l_len; - int l_pid; + short l_type; + short l_whence; + int __pad; + abi_llong l_start; + abi_llong l_len; + int l_pid; } QEMU_PACKED; #endif diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh old mode 100644 new mode 100755 index f5bba70d06..de4d1c13d4 --- a/scripts/qemu-binfmt-conf.sh +++ b/scripts/qemu-binfmt-conf.sh @@ -1,72 +1,323 @@ #!/bin/sh # enable automatic i386/ARM/M68K/MIPS/SPARC/PPC/s390 program execution by the kernel -# load the binfmt_misc module -if [ ! -d /proc/sys/fs/binfmt_misc ]; then - /sbin/modprobe binfmt_misc -fi -if [ ! -f /proc/sys/fs/binfmt_misc/register ]; then - mount binfmt_misc -t binfmt_misc /proc/sys/fs/binfmt_misc -fi +qemu_target_list="i386 i486 alpha arm sparc32plus ppc ppc64 ppc64le m68k \ +mips mipsel mipsn32 mipsn32el mips64 mips64el \ +sh4 sh4eb s390x aarch64" -# probe cpu type -cpu=$(uname -m) -case "$cpu" in - i386|i486|i586|i686|i86pc|BePC|x86_64) - cpu="i386" - ;; - m68k) - cpu="m68k" - ;; - mips*) - cpu="mips" - ;; - "Power Macintosh"|ppc|ppc64) - cpu="ppc" - ;; - armv[4-9]*) - cpu="arm" - ;; -esac +i386_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x03\x00' +i386_mask='\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +i386_family=i386 -# register the interpreter for each cpu except for the native one -if [ $cpu != "i386" ] ; then - echo ':i386:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x03\x00:\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-i386:' > /proc/sys/fs/binfmt_misc/register - echo ':i486:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x06\x00:\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-i386:' > /proc/sys/fs/binfmt_misc/register -fi -if [ $cpu != "alpha" ] ; then - echo ':alpha:M::\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x26\x90:\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-alpha:' > /proc/sys/fs/binfmt_misc/register -fi -if [ $cpu != "arm" ] ; then - echo ':arm:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-arm:' > /proc/sys/fs/binfmt_misc/register - echo ':armeb:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-armeb:' > /proc/sys/fs/binfmt_misc/register -fi -if [ $cpu != "aarch64" ] ; then - echo ':aarch64:M::\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xb7\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-aarch64:' > /proc/sys/fs/binfmt_misc/register -fi -if [ $cpu != "sparc" ] ; then - echo ':sparc:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x02:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-sparc:' > /proc/sys/fs/binfmt_misc/register -fi -if [ $cpu != "ppc" ] ; then - echo ':ppc:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x14:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-ppc:' > /proc/sys/fs/binfmt_misc/register -fi -if [ $cpu != "m68k" ] ; then - echo 'Please check cpu value and header information for m68k!' - echo ':m68k:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x04:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-m68k:' > /proc/sys/fs/binfmt_misc/register -fi -if [ $cpu != "mips" ] ; then - # FIXME: We could use the other endianness on a MIPS host. - echo ':mips:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-mips:' > /proc/sys/fs/binfmt_misc/register - echo ':mipsel:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-mipsel:' > /proc/sys/fs/binfmt_misc/register - echo ':mipsn32:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-mipsn32:' > /proc/sys/fs/binfmt_misc/register - echo ':mipsn32el:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-mipsn32el:' > /proc/sys/fs/binfmt_misc/register - echo ':mips64:M::\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-mips64:' > /proc/sys/fs/binfmt_misc/register - echo ':mips64el:M::\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-mips64el:' > /proc/sys/fs/binfmt_misc/register -fi -if [ $cpu != "sh" ] ; then - echo ':sh4:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a\x00:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-sh4:' > /proc/sys/fs/binfmt_misc/register - echo ':sh4eb:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-sh4eb:' > /proc/sys/fs/binfmt_misc/register -fi -if [ $cpu != "s390x" ] ; then - echo ':s390x:M::\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x16:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-s390x:' > /proc/sys/fs/binfmt_misc/register -fi +i486_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x06\x00' +i486_mask='\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +i486_family=i386 + +alpha_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x26\x90' +alpha_mask='\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +alpha_family=alpha + +arm_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00' +arm_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +arm_family=arm + +armeb_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28' +armeb_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +armeb_family=arm + +sparc_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x02' +sparc_mask='\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +sparc_family=sparc + +sparc32plus_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x12' +sparc32plus_mask='\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +sparc32plus_family=sparc + +ppc_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x14' +ppc_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +ppc_family=ppc + +ppc64_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15' +ppc64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +ppc64_family=ppc + +ppc64le_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15\x00' +ppc64le_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\x00' +ppc64le_family=ppcle + +m68k_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x04' +m68k_mask='\xff\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +m68k_family=m68k + +# FIXME: We could use the other endianness on a MIPS host. + +mips_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' +mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mips_family=mips + +mipsel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' +mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mipsel_family=mips + +mipsn32_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' +mipsn32_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mipsn32_family=mips + +mipsn32el_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' +mipsn32el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mipsn32el_family=mips + +mips64_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' +mips64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mips64_family=mips + +mips64el_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' +mips64el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mips64el_family=mips + +sh4_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a\x00' +sh4_mask='\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +sh4_family=sh4 + +sh4eb_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a' +sh4eb_mask='\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +sh4eb_family=sh4 + +s390x_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x16' +s390x_mask='\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +s390x_family=s390x + +aarch64_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xb7\x00' +aarch64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +aarch64_family=arm + +qemu_get_family() { + cpu=${HOST_ARCH:-$(uname -m)} + case "$cpu" in + amd64|i386|i486|i586|i686|i86pc|BePC|x86_64) + echo "i386" + ;; + mips*) + echo "mips" + ;; + "Power Macintosh"|ppc64|powerpc|ppc) + echo "ppc" + ;; + ppc64el|ppc64le) + echo "ppcle" + ;; + arm|armel|armhf|arm64|armv[4-9]*) + echo "arm" + ;; + sparc*) + echo "sparc" + ;; + *) + echo "$cpu" + ;; + esac +} + +usage() { + cat <&2 + exit 1 + fi +} + +qemu_check_bintfmt_misc() { + # load the binfmt_misc module + if [ ! -d /proc/sys/fs/binfmt_misc ]; then + if ! /sbin/modprobe binfmt_misc ; then + exit 1 + fi + fi + if [ ! -f /proc/sys/fs/binfmt_misc/register ]; then + if ! mount binfmt_misc -t binfmt_misc /proc/sys/fs/binfmt_misc ; then + exit 1 + fi + fi + + qemu_check_access /proc/sys/fs/binfmt_misc/register +} + +installed_dpkg() { + dpkg --status "$1" > /dev/null 2>&1 +} + +qemu_check_debian() { + if [ ! -e /etc/debian_version ] ; then + echo "WARNING: your system is not a Debian based distro" 1>&2 + elif ! installed_dpkg binfmt-support ; then + echo "WARNING: package binfmt-support is needed" 1>&2 + fi + qemu_check_access "$EXPORTDIR" +} + +qemu_check_systemd() { + if ! systemctl -q is-enabled systemd-binfmt.service ; then + echo "WARNING: systemd-binfmt.service is missing or disabled" 1>&2 + fi + qemu_check_access "$EXPORTDIR" +} + +qemu_generate_register() { + echo ":qemu-$cpu:M::$magic:$mask:$qemu:$FLAGS" +} + +qemu_register_interpreter() { + echo "Setting $qemu as binfmt interpreter for $cpu" + qemu_generate_register > /proc/sys/fs/binfmt_misc/register +} + +qemu_generate_systemd() { + echo "Setting $qemu as binfmt interpreter for $cpu for systemd-binfmt.service" + qemu_generate_register > "$EXPORTDIR/qemu-$cpu.conf" +} + +qemu_generate_debian() { + cat > "$EXPORTDIR/qemu-$cpu" <> "$EXPORTDIR/qemu-$cpu" + fi +} + +qemu_set_binfmts() { + # probe cpu type + host_family=$(qemu_get_family) + + # register the interpreter for each cpu except for the native one + + for cpu in ${qemu_target_list} ; do + magic=$(eval echo \$${cpu}_magic) + mask=$(eval echo \$${cpu}_mask) + family=$(eval echo \$${cpu}_family) + + if [ "$magic" = "" ] || [ "$mask" = "" ] || [ "$family" = "" ] ; then + echo "INTERNAL ERROR: unknown cpu $cpu" 1>&2 + continue + fi + + qemu="$QEMU_PATH/qemu-$cpu" + if [ "$cpu" = "i486" ] ; then + qemu="$QEMU_PATH/qemu-i386" + fi + + if [ "$host_family" != "$family" ] ; then + $BINFMT_SET + fi + done +} + +CHECK=qemu_check_bintfmt_misc +BINFMT_SET=qemu_register_interpreter + +SYSTEMDDIR="/etc/binfmt.d" +DEBIANDIR="/usr/share/binfmts" + +QEMU_PATH=/usr/local/bin +FLAGS="" + +options=$(getopt -o ds:Q:e:hc: -l debian,systemd:,qemu-path:,exportdir:,help,credential: -- "$@") +eval set -- "$options" + +while true ; do + case "$1" in + -d|--debian) + CHECK=qemu_check_debian + BINFMT_SET=qemu_generate_debian + EXPORTDIR=${EXPORTDIR:-$DEBIANDIR} + ;; + -s|--systemd) + CHECK=qemu_check_systemd + BINFMT_SET=qemu_generate_systemd + EXPORTDIR=${EXPORTDIR:-$SYSTEMDDIR} + shift + # check given cpu is in the supported CPU list + for cpu in ${qemu_target_list} ; do + if [ "$cpu" == "$1" ] ; then + break + fi + done + + if [ "$cpu" == "$1" ] ; then + qemu_target_list="$1" + else + echo "ERROR: unknown CPU \"$1\"" 1>&2 + usage + exit 1 + fi + ;; + -Q|--qemu-path) + shift + QEMU_PATH="$1" + ;; + -e|--exportdir) + shift + EXPORTDIR="$1" + ;; + -h|--help) + usage + exit 1 + ;; + -c|--credential) + shift + if [ "$1" = "yes" ] ; then + FLAGS="OC" + else + FLAGS="" + fi + ;; + *) + break + ;; + esac + shift +done + +$CHECK +qemu_set_binfmts