diff --git a/configure b/configure index da09c35895..89fe881dd4 100755 --- a/configure +++ b/configure @@ -1910,9 +1910,11 @@ for arch in alpha hppa m68k xtensa sh4 microblaze arm ppc s390x sparc sparc64 \ # remove the file if it has been generated in the source directory rm -f "${source_path}/linux-user/${arch}/syscall_nr.h" # remove the dependency files - test -d ${arch}-linux-user && find ${arch}-linux-user -type f -name "*.d" \ - -exec grep -q "${source_path}/linux-user/${arch}/syscall_nr.h" {} \; \ - -exec rm {} \; + for target in ${arch}*-linux-user ; do + test -d "${target}" && find "${target}" -type f -name "*.d" \ + -exec grep -q "${source_path}/linux-user/${arch}/syscall_nr.h" {} \; \ + -print | while read file ; do rm "${file}" "${file%.d}.o" ; done + done done if test -z "$python" diff --git a/linux-user/i386/cpu_loop.c b/linux-user/i386/cpu_loop.c index 024b6f4d58..70cde417e6 100644 --- a/linux-user/i386/cpu_loop.c +++ b/linux-user/i386/cpu_loop.c @@ -81,13 +81,126 @@ static void set_idt(int n, unsigned int dpl) } #endif +static void gen_signal(CPUX86State *env, int sig, int code, abi_ptr addr) +{ + target_siginfo_t info = { + .si_signo = sig, + .si_code = code, + ._sifields._sigfault._addr = addr + }; + + queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); +} + +#ifdef TARGET_X86_64 +static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len) +{ + /* + * For all the vsyscalls, NULL means "don't write anything" not + * "write it at address 0". + */ + if (addr == 0 || access_ok(VERIFY_WRITE, addr, len)) { + return true; + } + + env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK; + gen_signal(env, TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr); + return false; +} + +/* + * Since v3.1, the kernel traps and emulates the vsyscall page. + * Entry points other than the official generate SIGSEGV. + */ +static void emulate_vsyscall(CPUX86State *env) +{ + int syscall; + abi_ulong ret; + uint64_t caller; + + /* + * Validate the entry point. We have already validated the page + * during translation to get here; now verify the offset. + */ + switch (env->eip & ~TARGET_PAGE_MASK) { + case 0x000: + syscall = TARGET_NR_gettimeofday; + break; + case 0x400: + syscall = TARGET_NR_time; + break; + case 0x800: + syscall = TARGET_NR_getcpu; + break; + default: + goto sigsegv; + } + + /* + * Validate the return address. + * Note that the kernel treats this the same as an invalid entry point. + */ + if (get_user_u64(caller, env->regs[R_ESP])) { + goto sigsegv; + } + + /* + * Validate the the pointer arguments. + */ + switch (syscall) { + case TARGET_NR_gettimeofday: + if (!write_ok_or_segv(env, env->regs[R_EDI], + sizeof(struct target_timeval)) || + !write_ok_or_segv(env, env->regs[R_ESI], + sizeof(struct target_timezone))) { + return; + } + break; + case TARGET_NR_time: + if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) { + return; + } + break; + case TARGET_NR_getcpu: + if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) || + !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) { + return; + } + break; + default: + g_assert_not_reached(); + } + + /* + * Perform the syscall. None of the vsyscalls should need restarting. + */ + ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI], + env->regs[R_EDX], env->regs[10], env->regs[8], + env->regs[9], 0, 0); + g_assert(ret != -TARGET_ERESTARTSYS); + g_assert(ret != -TARGET_QEMU_ESIGRETURN); + if (ret == -TARGET_EFAULT) { + goto sigsegv; + } + env->regs[R_EAX] = ret; + + /* Emulate a ret instruction to leave the vsyscall page. */ + env->eip = caller; + env->regs[R_ESP] += 8; + return; + + sigsegv: + /* Like force_sig(SIGSEGV). */ + gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); +} +#endif + void cpu_loop(CPUX86State *env) { CPUState *cs = env_cpu(env); int trapnr; abi_ulong pc; abi_ulong ret; - target_siginfo_t info; for(;;) { cpu_exec_start(cs); @@ -131,73 +244,53 @@ void cpu_loop(CPUX86State *env) env->regs[R_EAX] = ret; } break; +#endif +#ifdef TARGET_X86_64 + case EXCP_VSYSCALL: + emulate_vsyscall(env); + break; #endif case EXCP0B_NOSEG: case EXCP0C_STACK: - info.si_signo = TARGET_SIGBUS; - info.si_errno = 0; - info.si_code = TARGET_SI_KERNEL; - info._sifields._sigfault._addr = 0; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + gen_signal(env, TARGET_SIGBUS, TARGET_SI_KERNEL, 0); break; case EXCP0D_GPF: /* XXX: potential problem if ABI32 */ #ifndef TARGET_X86_64 if (env->eflags & VM_MASK) { handle_vm86_fault(env); - } else -#endif - { - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - info.si_code = TARGET_SI_KERNEL; - info._sifields._sigfault._addr = 0; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + break; } +#endif + gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); break; case EXCP0E_PAGE: - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - if (!(env->error_code & 1)) - info.si_code = TARGET_SEGV_MAPERR; - else - info.si_code = TARGET_SEGV_ACCERR; - info._sifields._sigfault._addr = env->cr[2]; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + gen_signal(env, TARGET_SIGSEGV, + (env->error_code & 1 ? + TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR), + env->cr[2]); break; case EXCP00_DIVZ: #ifndef TARGET_X86_64 if (env->eflags & VM_MASK) { handle_vm86_trap(env, trapnr); - } else -#endif - { - /* division by zero */ - info.si_signo = TARGET_SIGFPE; - info.si_errno = 0; - info.si_code = TARGET_FPE_INTDIV; - info._sifields._sigfault._addr = env->eip; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + break; } +#endif + gen_signal(env, TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip); break; case EXCP01_DB: case EXCP03_INT3: #ifndef TARGET_X86_64 if (env->eflags & VM_MASK) { handle_vm86_trap(env, trapnr); - } else + break; + } #endif - { - info.si_signo = TARGET_SIGTRAP; - info.si_errno = 0; - if (trapnr == EXCP01_DB) { - info.si_code = TARGET_TRAP_BRKPT; - info._sifields._sigfault._addr = env->eip; - } else { - info.si_code = TARGET_SI_KERNEL; - info._sifields._sigfault._addr = 0; - } - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + if (trapnr == EXCP01_DB) { + gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip); + } else { + gen_signal(env, TARGET_SIGTRAP, TARGET_SI_KERNEL, 0); } break; case EXCP04_INTO: @@ -205,31 +298,19 @@ void cpu_loop(CPUX86State *env) #ifndef TARGET_X86_64 if (env->eflags & VM_MASK) { handle_vm86_trap(env, trapnr); - } else -#endif - { - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - info.si_code = TARGET_SI_KERNEL; - info._sifields._sigfault._addr = 0; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + break; } +#endif + gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); break; case EXCP06_ILLOP: - info.si_signo = TARGET_SIGILL; - info.si_errno = 0; - info.si_code = TARGET_ILL_ILLOPN; - info._sifields._sigfault._addr = env->eip; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + gen_signal(env, TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip); break; case EXCP_INTERRUPT: /* just indicate that signals should be handled asap */ break; case EXCP_DEBUG: - info.si_signo = TARGET_SIGTRAP; - info.si_errno = 0; - info.si_code = TARGET_TRAP_BRKPT; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, 0); break; case EXCP_ATOMIC: cpu_exec_step_atomic(cs); diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 35f4146662..49395dcea9 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -1273,6 +1273,25 @@ static inline abi_long host_to_target_timespec64(abi_ulong target_addr, return 0; } +#if defined(TARGET_NR_gettimeofday) +static inline abi_long copy_to_user_timezone(abi_ulong target_tz_addr, + struct timezone *tz) +{ + struct target_timezone *target_tz; + + if (!lock_user_struct(VERIFY_WRITE, target_tz, target_tz_addr, 1)) { + return -TARGET_EFAULT; + } + + __put_user(tz->tz_minuteswest, &target_tz->tz_minuteswest); + __put_user(tz->tz_dsttime, &target_tz->tz_dsttime); + + unlock_user_struct(target_tz, target_tz_addr, 1); + + return 0; +} +#endif + #if defined(TARGET_NR_settimeofday) static inline abi_long copy_from_user_timezone(struct timezone *tz, abi_ulong target_tz_addr) @@ -7144,6 +7163,16 @@ static int open_self_maps(void *cpu_env, int fd) } } +#ifdef TARGET_VSYSCALL_PAGE + /* + * We only support execution from the vsyscall page. + * This is as if CONFIG_LEGACY_VSYSCALL_XONLY=y from v5.3. + */ + dprintf(fd, TARGET_FMT_lx "-" TARGET_FMT_lx + " --xp 00000000 00:00 0 [vsyscall]\n", + TARGET_VSYSCALL_PAGE, TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE); +#endif + free(line); fclose(fp); @@ -8700,10 +8729,16 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, case TARGET_NR_gettimeofday: { struct timeval tv; - ret = get_errno(gettimeofday(&tv, NULL)); + struct timezone tz; + + ret = get_errno(gettimeofday(&tv, &tz)); if (!is_error(ret)) { - if (copy_to_user_timeval(arg1, &tv)) + if (arg1 && copy_to_user_timeval(arg1, &tv)) { return -TARGET_EFAULT; + } + if (arg2 && copy_to_user_timezone(arg2, &tz)) { + return -TARGET_EFAULT; + } } } return ret; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 60d797d594..9af1b0c12e 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1001,9 +1001,9 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; #define EXCP11_ALGN 17 #define EXCP12_MCHK 18 -#define EXCP_SYSCALL 0x100 /* only happens in user only emulation - for syscall instruction */ -#define EXCP_VMEXIT 0x100 +#define EXCP_VMEXIT 0x100 /* only for system emulation */ +#define EXCP_SYSCALL 0x101 /* only for user emulation */ +#define EXCP_VSYSCALL 0x102 /* only for user emulation */ /* i386-specific interrupt pending bits. */ #define CPU_INTERRUPT_POLL CPU_INTERRUPT_TGT_EXT_1 @@ -2219,4 +2219,10 @@ static inline bool hyperv_feat_enabled(X86CPU *cpu, int feat) return !!(cpu->hyperv_features & BIT(feat)); } +#if defined(TARGET_X86_64) && \ + defined(CONFIG_USER_ONLY) && \ + defined(CONFIG_LINUX) +# define TARGET_VSYSCALL_PAGE (UINT64_C(-10) << 20) +#endif + #endif /* I386_CPU_H */ diff --git a/target/i386/translate.c b/target/i386/translate.c index d9af8f4078..5e5dbb41b0 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -8555,7 +8555,19 @@ static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu, static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); - target_ulong pc_next = disas_insn(dc, cpu); + target_ulong pc_next; + +#ifdef TARGET_VSYSCALL_PAGE + /* + * Detect entry into the vsyscall page and invoke the syscall. + */ + if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) { + gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next); + return; + } +#endif + + pc_next = disas_insn(dc, cpu); if (dc->tf || (dc->base.tb->flags & HF_INHIBIT_IRQ_MASK)) { /* if single step mode, we generate only one instruction and