diff --git a/Makefile.target b/Makefile.target index 34ddb7e762..5b80dd7fc9 100644 --- a/Makefile.target +++ b/Makefile.target @@ -108,7 +108,12 @@ obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/dpd/decimal128.o ifdef CONFIG_LINUX_USER -QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) -I$(SRC_PATH)/linux-user +# Note that we only add linux-user/host/$ARCH if it exists, and +# that it must come before linux-user/host/generic in the search path. +QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) \ + $(patsubst %,-I%,$(wildcard $(SRC_PATH)/linux-user/host/$(ARCH))) \ + -I$(SRC_PATH)/linux-user/host/generic \ + -I$(SRC_PATH)/linux-user obj-y += linux-user/ obj-y += gdbstub.o thunk.o user-exec.o diff --git a/linux-user/Makefile.objs b/linux-user/Makefile.objs index fd5021788f..8c93058100 100644 --- a/linux-user/Makefile.objs +++ b/linux-user/Makefile.objs @@ -1,5 +1,6 @@ obj-y = main.o syscall.o strace.o mmap.o signal.o \ - elfload.o linuxload.o uaccess.o uname.o + elfload.o linuxload.o uaccess.o uname.o \ + safe-syscall.o obj-$(TARGET_HAS_BFLT) += flatload.o obj-$(TARGET_I386) += vm86.o diff --git a/linux-user/host/generic/hostdep.h b/linux-user/host/generic/hostdep.h new file mode 100644 index 0000000000..cfabc3590b --- /dev/null +++ b/linux-user/host/generic/hostdep.h @@ -0,0 +1,20 @@ +/* + * hostdep.h : fallback generic version of header for things + * which are dependent on the host architecture + * + * * Written by Peter Maydell + * + * Copyright (C) 2016 Linaro Limited + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_HOSTDEP_H +#define QEMU_HOSTDEP_H + +/* This is the fallback header which is only used if the host + * architecture doesn't provide one in linux-user/host/$ARCH. + */ + +#endif diff --git a/linux-user/host/x86_64/hostdep.h b/linux-user/host/x86_64/hostdep.h new file mode 100644 index 0000000000..9dfbf3ae6a --- /dev/null +++ b/linux-user/host/x86_64/hostdep.h @@ -0,0 +1,38 @@ +/* + * hostdep.h : things which are dependent on the host architecture + * + * * Written by Peter Maydell + * + * Copyright (C) 2016 Linaro Limited + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_HOSTDEP_H +#define QEMU_HOSTDEP_H + +/* We have a safe-syscall.inc.S */ +#define HAVE_SAFE_SYSCALL + +#ifndef __ASSEMBLER__ + +/* These are defined by the safe-syscall.inc.S file */ +extern char safe_syscall_start[]; +extern char safe_syscall_end[]; + +/* Adjust the signal context to rewind out of safe-syscall if we're in it */ +static inline void rewind_if_in_safe_syscall(void *puc) +{ + struct ucontext *uc = puc; + greg_t *pcreg = &uc->uc_mcontext.gregs[REG_RIP]; + + if (*pcreg > (uintptr_t)safe_syscall_start + && *pcreg < (uintptr_t)safe_syscall_end) { + *pcreg = (uintptr_t)safe_syscall_start; + } +} + +#endif /* __ASSEMBLER__ */ + +#endif diff --git a/linux-user/host/x86_64/safe-syscall.inc.S b/linux-user/host/x86_64/safe-syscall.inc.S new file mode 100644 index 0000000000..dde434c8d7 --- /dev/null +++ b/linux-user/host/x86_64/safe-syscall.inc.S @@ -0,0 +1,81 @@ +/* + * safe-syscall.inc.S : host-specific assembly fragment + * to handle signals occurring at the same time as system calls. + * This is intended to be included by linux-user/safe-syscall.S + * + * Copyright (C) 2015 Timothy Edward Baldwin + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + + .global safe_syscall_base + .global safe_syscall_start + .global safe_syscall_end + .type safe_syscall_base, @function + + /* This is the entry point for making a system call. The calling + * convention here is that of a C varargs function with the + * first argument an 'int *' to the signal_pending flag, the + * second one the system call number (as a 'long'), and all further + * arguments being syscall arguments (also 'long'). + * We return a long which is the syscall's return value, which + * may be negative-errno on failure. Conversion to the + * -1-and-errno-set convention is done by the calling wrapper. + */ +safe_syscall_base: + /* This saves a frame pointer and aligns the stack for the syscall. + * (It's unclear if the syscall ABI has the same stack alignment + * requirements as the userspace function call ABI, but better safe than + * sorry. Appendix A2 of http://www.x86-64.org/documentation/abi.pdf + * does not list any ABI differences regarding stack alignment.) + */ + push %rbp + + /* The syscall calling convention isn't the same as the + * C one: + * we enter with rdi == *signal_pending + * rsi == syscall number + * rdx, rcx, r8, r9, (stack), (stack) == syscall arguments + * and return the result in rax + * and the syscall instruction needs + * rax == syscall number + * rdi, rsi, rdx, r10, r8, r9 == syscall arguments + * and returns the result in rax + * Shuffle everything around appropriately. + * Note that syscall will trash rcx and r11. + */ + mov %rsi, %rax /* syscall number */ + mov %rdi, %rbp /* signal_pending pointer */ + /* and the syscall arguments */ + mov %rdx, %rdi + mov %rcx, %rsi + mov %r8, %rdx + mov %r9, %r10 + mov 16(%rsp), %r8 + mov 24(%rsp), %r9 + + /* This next sequence of code works in conjunction with the + * rewind_if_safe_syscall_function(). If a signal is taken + * and the interrupted PC is anywhere between 'safe_syscall_start' + * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. + * The code sequence must therefore be able to cope with this, and + * the syscall instruction must be the final one in the sequence. + */ +safe_syscall_start: + /* if signal_pending is non-zero, don't do the call */ + testl $1, (%rbp) + jnz return_ERESTARTSYS + syscall +safe_syscall_end: + /* code path for having successfully executed the syscall */ + pop %rbp + ret + +return_ERESTARTSYS: + /* code path when we didn't execute the syscall */ + mov $-TARGET_ERESTARTSYS, %rax + pop %rbp + ret + + .size safe_syscall_base, .-safe_syscall_base diff --git a/linux-user/qemu.h b/linux-user/qemu.h index 208c63eb2a..f09b750bbf 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -1,7 +1,7 @@ #ifndef QEMU_H #define QEMU_H - +#include "hostdep.h" #include "cpu.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" @@ -205,6 +205,131 @@ unsigned long init_guest_space(unsigned long host_start, #include "qemu/log.h" +/* safe_syscall.S */ + +/** + * safe_syscall: + * @int number: number of system call to make + * ...: arguments to the system call + * + * Call a system call if guest signal not pending. + * This has the same API as the libc syscall() function, except that it + * may return -1 with errno == TARGET_ERESTARTSYS if a signal was pending. + * + * Returns: the system call result, or -1 with an error code in errno + * (Errnos are host errnos; we rely on TARGET_ERESTARTSYS not clashing + * with any of the host errno values.) + */ + +/* A guide to using safe_syscall() to handle interactions between guest + * syscalls and guest signals: + * + * Guest syscalls come in two flavours: + * + * (1) Non-interruptible syscalls + * + * These are guest syscalls that never get interrupted by signals and + * so never return EINTR. They can be implemented straightforwardly in + * QEMU: just make sure that if the implementation code has to make any + * blocking calls that those calls are retried if they return EINTR. + * It's also OK to implement these with safe_syscall, though it will be + * a little less efficient if a signal is delivered at the 'wrong' moment. + * + * (2) Interruptible syscalls + * + * These are guest syscalls that can be interrupted by signals and + * for which we need to either return EINTR or arrange for the guest + * syscall to be restarted. This category includes both syscalls which + * always restart (and in the kernel return -ERESTARTNOINTR), ones + * which only restart if there is no handler (kernel returns -ERESTARTNOHAND + * or -ERESTART_RESTARTBLOCK), and the most common kind which restart + * if the handler was registered with SA_RESTART (kernel returns + * -ERESTARTSYS). System calls which are only interruptible in some + * situations (like 'open') also need to be handled this way. + * + * Here it is important that the host syscall is made + * via this safe_syscall() function, and *not* via the host libc. + * If the host libc is used then the implementation will appear to work + * most of the time, but there will be a race condition where a + * signal could arrive just before we make the host syscall inside libc, + * and then then guest syscall will not correctly be interrupted. + * Instead the implementation of the guest syscall can use the safe_syscall + * function but otherwise just return the result or errno in the usual + * way; the main loop code will take care of restarting the syscall + * if appropriate. + * + * (If the implementation needs to make multiple host syscalls this is + * OK; any which might really block must be via safe_syscall(); for those + * which are only technically blocking (ie which we know in practice won't + * stay in the host kernel indefinitely) it's OK to use libc if necessary. + * You must be able to cope with backing out correctly if some safe_syscall + * you make in the implementation returns either -TARGET_ERESTARTSYS or + * EINTR though.) + * + * + * How and why the safe_syscall implementation works: + * + * The basic setup is that we make the host syscall via a known + * section of host native assembly. If a signal occurs, our signal + * handler checks the interrupted host PC against the addresse of that + * known section. If the PC is before or at the address of the syscall + * instruction then we change the PC to point at a "return + * -TARGET_ERESTARTSYS" code path instead, and then exit the signal handler + * (causing the safe_syscall() call to immediately return that value). + * Then in the main.c loop if we see this magic return value we adjust + * the guest PC to wind it back to before the system call, and invoke + * the guest signal handler as usual. + * + * This winding-back will happen in two cases: + * (1) signal came in just before we took the host syscall (a race); + * in this case we'll take the guest signal and have another go + * at the syscall afterwards, and this is indistinguishable for the + * guest from the timing having been different such that the guest + * signal really did win the race + * (2) signal came in while the host syscall was blocking, and the + * host kernel decided the syscall should be restarted; + * in this case we want to restart the guest syscall also, and so + * rewinding is the right thing. (Note that "restart" semantics mean + * "first call the signal handler, then reattempt the syscall".) + * The other situation to consider is when a signal came in while the + * host syscall was blocking, and the host kernel decided that the syscall + * should not be restarted; in this case QEMU's host signal handler will + * be invoked with the PC pointing just after the syscall instruction, + * with registers indicating an EINTR return; the special code in the + * handler will not kick in, and we will return EINTR to the guest as + * we should. + * + * Notice that we can leave the host kernel to make the decision for + * us about whether to do a restart of the syscall or not; we do not + * need to check SA_RESTART flags in QEMU or distinguish the various + * kinds of restartability. + */ +#ifdef HAVE_SAFE_SYSCALL +/* The core part of this function is implemented in assembly */ +extern long safe_syscall_base(int *pending, long number, ...); + +#define safe_syscall(...) \ + ({ \ + long ret_; \ + int *psp_ = &((TaskState *)thread_cpu->opaque)->signal_pending; \ + ret_ = safe_syscall_base(psp_, __VA_ARGS__); \ + if (is_error(ret_)) { \ + errno = -ret_; \ + ret_ = -1; \ + } \ + ret_; \ + }) + +#else + +/* Fallback for architectures which don't yet provide a safe-syscall assembly + * fragment; note that this is racy! + * This should go away when all host architectures have been updated. + */ +#define safe_syscall syscall + +#endif + /* syscall.c */ int host_to_target_waitstatus(int status); diff --git a/linux-user/safe-syscall.S b/linux-user/safe-syscall.S new file mode 100644 index 0000000000..b5df6254ae --- /dev/null +++ b/linux-user/safe-syscall.S @@ -0,0 +1,30 @@ +/* + * safe-syscall.S : include the host-specific assembly fragment + * to handle signals occurring at the same time as system calls. + * + * Written by Peter Maydell + * + * Copyright (C) 2016 Linaro Limited + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "hostdep.h" +#include "errno_defs.h" + +/* We have the correct host directory on our include path + * so that this will pull in the right fragment for the architecture. + */ +#ifdef HAVE_SAFE_SYSCALL +#include "safe-syscall.inc.S" +#endif + +/* We must specifically say that we're happy for the stack to not be + * executable, otherwise the toolchain will default to assuming our + * assembly needs an executable stack and the whole QEMU binary will + * needlessly end up with one. This should be the last thing in this file. + */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/linux-user/signal.c b/linux-user/signal.c index 9e8555096a..ff4de4fb8f 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -561,6 +561,13 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info) } } +#ifndef HAVE_SAFE_SYSCALL +static inline void rewind_if_in_safe_syscall(void *puc) +{ + /* Default version: never rewind */ +} +#endif + static void host_signal_handler(int host_signum, siginfo_t *info, void *puc) { @@ -581,6 +588,9 @@ static void host_signal_handler(int host_signum, siginfo_t *info, if (sig < 1 || sig > TARGET_NSIG) return; trace_user_host_signal(env, host_signum, sig); + + rewind_if_in_safe_syscall(puc); + host_to_target_siginfo_noswap(&tinfo, info); if (queue_signal(env, sig, &tinfo) == 1) { /* interrupt the virtual CPU as soon as possible */ diff --git a/linux-user/syscall.c b/linux-user/syscall.c index ced519d692..b6a8ed6c7a 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -660,6 +660,53 @@ char *target_strerror(int err) return strerror(target_to_host_errno(err)); } +#define safe_syscall0(type, name) \ +static type safe_##name(void) \ +{ \ + return safe_syscall(__NR_##name); \ +} + +#define safe_syscall1(type, name, type1, arg1) \ +static type safe_##name(type1 arg1) \ +{ \ + return safe_syscall(__NR_##name, arg1); \ +} + +#define safe_syscall2(type, name, type1, arg1, type2, arg2) \ +static type safe_##name(type1 arg1, type2 arg2) \ +{ \ + return safe_syscall(__NR_##name, arg1, arg2); \ +} + +#define safe_syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ +static type safe_##name(type1 arg1, type2 arg2, type3 arg3) \ +{ \ + return safe_syscall(__NR_##name, arg1, arg2, arg3); \ +} + +#define safe_syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4) \ +static type safe_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ +{ \ + return safe_syscall(__NR_##name, arg1, arg2, arg3, arg4); \ +} + +#define safe_syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ +static type safe_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) \ +{ \ + return safe_syscall(__NR_##name, arg1, arg2, arg3, arg4, arg5); \ +} + +#define safe_syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ +static type safe_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) \ +{ \ + return safe_syscall(__NR_##name, arg1, arg2, arg3, arg4, arg5, arg6); \ +} + static inline int host_to_target_sock_type(int host_type) { int target_type;