From 91e5998f185ce0f4d6e0fb40bb0721466009d620 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Wed, 25 Mar 2020 08:57:57 +0100 Subject: [PATCH 1/6] linux-user, configure: fix (again) syscall_nr.h dependencies cleanup This patch fixes two problems: - it cleanups linux-user variants (for instance ppc64-linux-user and ppc64le-linux-user) - it removes the .o file when it removes the .d file, otherwise the .o file is never updated Fixes: 5f29856b852d ("linux-user, configure: improve syscall_nr.h dependencies checking") Fixes: 4d6a835dea47 ("linux-user: introduce parameters to generate syscall_nr.h") Signed-off-by: Laurent Vivier Reviewed-by: Richard Henderson Message-Id: <20200325075757.1959961-1-laurent@vivier.eu> --- configure | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/configure b/configure index da09c35895..89fe881dd4 100755 --- a/configure +++ b/configure @@ -1910,9 +1910,11 @@ for arch in alpha hppa m68k xtensa sh4 microblaze arm ppc s390x sparc sparc64 \ # remove the file if it has been generated in the source directory rm -f "${source_path}/linux-user/${arch}/syscall_nr.h" # remove the dependency files - test -d ${arch}-linux-user && find ${arch}-linux-user -type f -name "*.d" \ - -exec grep -q "${source_path}/linux-user/${arch}/syscall_nr.h" {} \; \ - -exec rm {} \; + for target in ${arch}*-linux-user ; do + test -d "${target}" && find "${target}" -type f -name "*.d" \ + -exec grep -q "${source_path}/linux-user/${arch}/syscall_nr.h" {} \; \ + -print | while read file ; do rm "${file}" "${file%.d}.o" ; done + done done if test -z "$python" From 628460891dd46c25e33eec01757ac655679ea198 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 12 Feb 2020 19:22:19 -0800 Subject: [PATCH 2/6] target/i386: Renumber EXCP_SYSCALL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are not short of numbers for EXCP_*. There is no need to confuse things by having EXCP_VMEXIT and EXCP_SYSCALL overlap, even though the former is only used for system mode and the latter is only used for user mode. Reviewed-by: Paolo Bonzini Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson Message-Id: <20200213032223.14643-2-richard.henderson@linaro.org> Signed-off-by: Laurent Vivier --- target/i386/cpu.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 60d797d594..49ecc23104 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1001,9 +1001,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; #define EXCP11_ALGN 17 #define EXCP12_MCHK 18 -#define EXCP_SYSCALL 0x100 /* only happens in user only emulation - for syscall instruction */ -#define EXCP_VMEXIT 0x100 +#define EXCP_VMEXIT 0x100 /* only for system emulation */ +#define EXCP_SYSCALL 0x101 /* only for user emulation */ /* i386-specific interrupt pending bits. */ #define CPU_INTERRUPT_POLL CPU_INTERRUPT_TGT_EXT_1 From acf768a904396b4a4b5fdfcb566843379dc8feb0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 12 Feb 2020 19:22:20 -0800 Subject: [PATCH 3/6] linux-user/i386: Split out gen_signal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a bit tidier than open-coding the 5 lines necessary to initialize the target_siginfo_t. In addition, this zeros the remaining bytes of the target_siginfo_t, rather than passing in garbage. Reviewed-by: Paolo Bonzini Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson Message-Id: <20200213032223.14643-3-richard.henderson@linaro.org> Signed-off-by: Laurent Vivier --- linux-user/i386/cpu_loop.c | 93 ++++++++++++++------------------------ 1 file changed, 33 insertions(+), 60 deletions(-) diff --git a/linux-user/i386/cpu_loop.c b/linux-user/i386/cpu_loop.c index 024b6f4d58..e217cca5ee 100644 --- a/linux-user/i386/cpu_loop.c +++ b/linux-user/i386/cpu_loop.c @@ -81,13 +81,23 @@ static void set_idt(int n, unsigned int dpl) } #endif +static void gen_signal(CPUX86State *env, int sig, int code, abi_ptr addr) +{ + target_siginfo_t info = { + .si_signo = sig, + .si_code = code, + ._sifields._sigfault._addr = addr + }; + + queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); +} + void cpu_loop(CPUX86State *env) { CPUState *cs = env_cpu(env); int trapnr; abi_ulong pc; abi_ulong ret; - target_siginfo_t info; for(;;) { cpu_exec_start(cs); @@ -134,70 +144,45 @@ void cpu_loop(CPUX86State *env) #endif case EXCP0B_NOSEG: case EXCP0C_STACK: - info.si_signo = TARGET_SIGBUS; - info.si_errno = 0; - info.si_code = TARGET_SI_KERNEL; - info._sifields._sigfault._addr = 0; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + gen_signal(env, TARGET_SIGBUS, TARGET_SI_KERNEL, 0); break; case EXCP0D_GPF: /* XXX: potential problem if ABI32 */ #ifndef TARGET_X86_64 if (env->eflags & VM_MASK) { handle_vm86_fault(env); - } else -#endif - { - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - info.si_code = TARGET_SI_KERNEL; - info._sifields._sigfault._addr = 0; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + break; } +#endif + gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); break; case EXCP0E_PAGE: - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - if (!(env->error_code & 1)) - info.si_code = TARGET_SEGV_MAPERR; - else - info.si_code = TARGET_SEGV_ACCERR; - info._sifields._sigfault._addr = env->cr[2]; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + gen_signal(env, TARGET_SIGSEGV, + (env->error_code & 1 ? + TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR), + env->cr[2]); break; case EXCP00_DIVZ: #ifndef TARGET_X86_64 if (env->eflags & VM_MASK) { handle_vm86_trap(env, trapnr); - } else -#endif - { - /* division by zero */ - info.si_signo = TARGET_SIGFPE; - info.si_errno = 0; - info.si_code = TARGET_FPE_INTDIV; - info._sifields._sigfault._addr = env->eip; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + break; } +#endif + gen_signal(env, TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip); break; case EXCP01_DB: case EXCP03_INT3: #ifndef TARGET_X86_64 if (env->eflags & VM_MASK) { handle_vm86_trap(env, trapnr); - } else + break; + } #endif - { - info.si_signo = TARGET_SIGTRAP; - info.si_errno = 0; - if (trapnr == EXCP01_DB) { - info.si_code = TARGET_TRAP_BRKPT; - info._sifields._sigfault._addr = env->eip; - } else { - info.si_code = TARGET_SI_KERNEL; - info._sifields._sigfault._addr = 0; - } - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + if (trapnr == EXCP01_DB) { + gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip); + } else { + gen_signal(env, TARGET_SIGTRAP, TARGET_SI_KERNEL, 0); } break; case EXCP04_INTO: @@ -205,31 +190,19 @@ void cpu_loop(CPUX86State *env) #ifndef TARGET_X86_64 if (env->eflags & VM_MASK) { handle_vm86_trap(env, trapnr); - } else -#endif - { - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - info.si_code = TARGET_SI_KERNEL; - info._sifields._sigfault._addr = 0; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + break; } +#endif + gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); break; case EXCP06_ILLOP: - info.si_signo = TARGET_SIGILL; - info.si_errno = 0; - info.si_code = TARGET_ILL_ILLOPN; - info._sifields._sigfault._addr = env->eip; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + gen_signal(env, TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip); break; case EXCP_INTERRUPT: /* just indicate that signals should be handled asap */ break; case EXCP_DEBUG: - info.si_signo = TARGET_SIGTRAP; - info.si_errno = 0; - info.si_code = TARGET_TRAP_BRKPT; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, 0); break; case EXCP_ATOMIC: cpu_exec_step_atomic(cs); From b26491b4d4f829fa4326d1ec2eea165a37bc6b3c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 12 Feb 2020 19:22:21 -0800 Subject: [PATCH 4/6] linux-user/i386: Emulate x86_64 vsyscalls Notice the magic page during translate, much like we already do for the arm32 commpage. At runtime, raise an exception to return cpu_loop for emulation. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson Message-Id: <20200213032223.14643-4-richard.henderson@linaro.org> Signed-off-by: Laurent Vivier --- linux-user/i386/cpu_loop.c | 108 +++++++++++++++++++++++++++++++++++++ target/i386/cpu.h | 7 +++ target/i386/translate.c | 14 ++++- 3 files changed, 128 insertions(+), 1 deletion(-) diff --git a/linux-user/i386/cpu_loop.c b/linux-user/i386/cpu_loop.c index e217cca5ee..70cde417e6 100644 --- a/linux-user/i386/cpu_loop.c +++ b/linux-user/i386/cpu_loop.c @@ -92,6 +92,109 @@ static void gen_signal(CPUX86State *env, int sig, int code, abi_ptr addr) queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); } +#ifdef TARGET_X86_64 +static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len) +{ + /* + * For all the vsyscalls, NULL means "don't write anything" not + * "write it at address 0". + */ + if (addr == 0 || access_ok(VERIFY_WRITE, addr, len)) { + return true; + } + + env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK; + gen_signal(env, TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr); + return false; +} + +/* + * Since v3.1, the kernel traps and emulates the vsyscall page. + * Entry points other than the official generate SIGSEGV. + */ +static void emulate_vsyscall(CPUX86State *env) +{ + int syscall; + abi_ulong ret; + uint64_t caller; + + /* + * Validate the entry point. We have already validated the page + * during translation to get here; now verify the offset. + */ + switch (env->eip & ~TARGET_PAGE_MASK) { + case 0x000: + syscall = TARGET_NR_gettimeofday; + break; + case 0x400: + syscall = TARGET_NR_time; + break; + case 0x800: + syscall = TARGET_NR_getcpu; + break; + default: + goto sigsegv; + } + + /* + * Validate the return address. + * Note that the kernel treats this the same as an invalid entry point. + */ + if (get_user_u64(caller, env->regs[R_ESP])) { + goto sigsegv; + } + + /* + * Validate the the pointer arguments. + */ + switch (syscall) { + case TARGET_NR_gettimeofday: + if (!write_ok_or_segv(env, env->regs[R_EDI], + sizeof(struct target_timeval)) || + !write_ok_or_segv(env, env->regs[R_ESI], + sizeof(struct target_timezone))) { + return; + } + break; + case TARGET_NR_time: + if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) { + return; + } + break; + case TARGET_NR_getcpu: + if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) || + !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) { + return; + } + break; + default: + g_assert_not_reached(); + } + + /* + * Perform the syscall. None of the vsyscalls should need restarting. + */ + ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI], + env->regs[R_EDX], env->regs[10], env->regs[8], + env->regs[9], 0, 0); + g_assert(ret != -TARGET_ERESTARTSYS); + g_assert(ret != -TARGET_QEMU_ESIGRETURN); + if (ret == -TARGET_EFAULT) { + goto sigsegv; + } + env->regs[R_EAX] = ret; + + /* Emulate a ret instruction to leave the vsyscall page. */ + env->eip = caller; + env->regs[R_ESP] += 8; + return; + + sigsegv: + /* Like force_sig(SIGSEGV). */ + gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); +} +#endif + void cpu_loop(CPUX86State *env) { CPUState *cs = env_cpu(env); @@ -141,6 +244,11 @@ void cpu_loop(CPUX86State *env) env->regs[R_EAX] = ret; } break; +#endif +#ifdef TARGET_X86_64 + case EXCP_VSYSCALL: + emulate_vsyscall(env); + break; #endif case EXCP0B_NOSEG: case EXCP0C_STACK: diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 49ecc23104..9af1b0c12e 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1003,6 +1003,7 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; #define EXCP_VMEXIT 0x100 /* only for system emulation */ #define EXCP_SYSCALL 0x101 /* only for user emulation */ +#define EXCP_VSYSCALL 0x102 /* only for user emulation */ /* i386-specific interrupt pending bits. */ #define CPU_INTERRUPT_POLL CPU_INTERRUPT_TGT_EXT_1 @@ -2218,4 +2219,10 @@ static inline bool hyperv_feat_enabled(X86CPU *cpu, int feat) return !!(cpu->hyperv_features & BIT(feat)); } +#if defined(TARGET_X86_64) && \ + defined(CONFIG_USER_ONLY) && \ + defined(CONFIG_LINUX) +# define TARGET_VSYSCALL_PAGE (UINT64_C(-10) << 20) +#endif + #endif /* I386_CPU_H */ diff --git a/target/i386/translate.c b/target/i386/translate.c index d9af8f4078..5e5dbb41b0 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -8555,7 +8555,19 @@ static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu, static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); - target_ulong pc_next = disas_insn(dc, cpu); + target_ulong pc_next; + +#ifdef TARGET_VSYSCALL_PAGE + /* + * Detect entry into the vsyscall page and invoke the syscall. + */ + if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) { + gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next); + return; + } +#endif + + pc_next = disas_insn(dc, cpu); if (dc->tf || (dc->base.tb->flags & HF_INHIBIT_IRQ_MASK)) { /* if single step mode, we generate only one instruction and From bf02adcd888f2837f6accc235a3acf69ca2e82f8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 12 Feb 2020 19:22:22 -0800 Subject: [PATCH 5/6] linux-user: Add x86_64 vsyscall page to /proc/self/maps The page isn't (necessarily) present in the host /proc/self/maps, and even if it might be it isn't present in page_flags, and even if it was it might not have the same set of page permissions. The easiest thing to do, particularly when it comes to the "[vsyscall]" note at the end of line, is to special case it. Signed-off-by: Richard Henderson Message-Id: <20200213032223.14643-5-richard.henderson@linaro.org> [lv: remove trailing space] Signed-off-by: Laurent Vivier --- linux-user/syscall.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 35f4146662..dbdd56e420 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -7144,6 +7144,16 @@ static int open_self_maps(void *cpu_env, int fd) } } +#ifdef TARGET_VSYSCALL_PAGE + /* + * We only support execution from the vsyscall page. + * This is as if CONFIG_LEGACY_VSYSCALL_XONLY=y from v5.3. + */ + dprintf(fd, TARGET_FMT_lx "-" TARGET_FMT_lx + " --xp 00000000 00:00 0 [vsyscall]\n", + TARGET_VSYSCALL_PAGE, TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE); +#endif + free(line); fclose(fp); From a52f5f87bece827a338d6eb3332e3def86fb9c33 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 12 Feb 2020 19:22:23 -0800 Subject: [PATCH 6/6] linux-user: Flush out implementation of gettimeofday MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The first argument, timeval, is allowed to be NULL. The second argument, timezone, was missing. While its use is deprecated, it is still present in the syscall. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson Message-Id: <20200213032223.14643-6-richard.henderson@linaro.org> [lv: add "#if defined(TARGET_NR_gettimeofday)"] Signed-off-by: Laurent Vivier --- linux-user/syscall.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index dbdd56e420..49395dcea9 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -1273,6 +1273,25 @@ static inline abi_long host_to_target_timespec64(abi_ulong target_addr, return 0; } +#if defined(TARGET_NR_gettimeofday) +static inline abi_long copy_to_user_timezone(abi_ulong target_tz_addr, + struct timezone *tz) +{ + struct target_timezone *target_tz; + + if (!lock_user_struct(VERIFY_WRITE, target_tz, target_tz_addr, 1)) { + return -TARGET_EFAULT; + } + + __put_user(tz->tz_minuteswest, &target_tz->tz_minuteswest); + __put_user(tz->tz_dsttime, &target_tz->tz_dsttime); + + unlock_user_struct(target_tz, target_tz_addr, 1); + + return 0; +} +#endif + #if defined(TARGET_NR_settimeofday) static inline abi_long copy_from_user_timezone(struct timezone *tz, abi_ulong target_tz_addr) @@ -8710,10 +8729,16 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, case TARGET_NR_gettimeofday: { struct timeval tv; - ret = get_errno(gettimeofday(&tv, NULL)); + struct timezone tz; + + ret = get_errno(gettimeofday(&tv, &tz)); if (!is_error(ret)) { - if (copy_to_user_timeval(arg1, &tv)) + if (arg1 && copy_to_user_timeval(arg1, &tv)) { return -TARGET_EFAULT; + } + if (arg2 && copy_to_user_timezone(arg2, &tz)) { + return -TARGET_EFAULT; + } } } return ret;