From a35f3ec76be8e0a5424349831127e538c6b91ef5 Mon Sep 17 00:00:00 2001 From: aurel32 Date: Tue, 8 Apr 2008 19:51:29 +0000 Subject: [PATCH] 3DNow! instruction set emulation (Michael Tross) git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4180 c046a42c-6fe2-441c-8c8c-71466251a162 --- target-i386/cpu.h | 8 +- target-i386/helper2.c | 16 +++- target-i386/ops_sse.h | 175 +++++++++++++++++++++++++++++++++++++++- target-i386/translate.c | 57 +++++++++++-- 4 files changed, 246 insertions(+), 10 deletions(-) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 819db7037b..1c49b3691b 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -428,8 +428,9 @@ typedef union { typedef union { uint8_t _b[8]; - uint16_t _w[2]; - uint32_t _l[1]; + uint16_t _w[4]; + uint32_t _l[2]; + float32 _s[2]; uint64_t q; } MMXReg; @@ -444,6 +445,7 @@ typedef union { #define MMX_B(n) _b[7 - (n)] #define MMX_W(n) _w[3 - (n)] #define MMX_L(n) _l[1 - (n)] +#define MMX_S(n) _s[1 - (n)] #else #define XMM_B(n) _b[n] #define XMM_W(n) _w[n] @@ -455,6 +457,7 @@ typedef union { #define MMX_B(n) _b[n] #define MMX_W(n) _w[n] #define MMX_L(n) _l[n] +#define MMX_S(n) _s[n] #endif #define MMX_Q(n) q @@ -520,6 +523,7 @@ typedef struct CPUX86State { int64_t i64; } fp_convert; + float_status mmx_status; /* for 3DNow! float ops */ float_status sse_status; uint32_t mxcsr; XMMReg xmm_regs[CPU_NB_REGS]; diff --git a/target-i386/helper2.c b/target-i386/helper2.c index 551a0d8b93..87b79ae7cf 100644 --- a/target-i386/helper2.c +++ b/target-i386/helper2.c @@ -150,7 +150,8 @@ static x86_def_t x86_defs[] = { CPUID_PSE36, .ext_features = CPUID_EXT_SSE3, .ext2_features = (PPRO_FEATURES & 0x0183F3FF) | - CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, + CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | + CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT, .ext3_features = CPUID_EXT3_SVM, .xlevel = 0x8000000A, }, @@ -201,6 +202,19 @@ static x86_def_t x86_defs[] = { .features = 0x0383F9FF, .xlevel = 0, }, + { + .name = "athlon", + .level = 2, + .vendor1 = 0x68747541, /* "Auth" */ + .vendor2 = 0x69746e65, /* "enti" */ + .vendor3 = 0x444d4163, /* "cAMD" */ + .family = 6, + .model = 2, + .stepping = 3, + .features = PPRO_FEATURES | PPRO_FEATURES | CPUID_PSE36 | CPUID_VME | CPUID_MTRR | CPUID_MCA, + .ext2_features = (PPRO_FEATURES & 0x0183F3FF) | CPUID_EXT2_MMXEXT | CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT, + .xlevel = 0x80000008, + }, }; static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model) diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h index 82d1ec0a1e..6610e750f8 100644 --- a/target-i386/ops_sse.h +++ b/target-i386/ops_sse.h @@ -1,5 +1,5 @@ /* - * MMX/SSE/SSE2/PNI support + * MMX/3DNow!/SSE/SSE2/SSE3/PNI support * * Copyright (c) 2005 Fabrice Bellard * @@ -409,6 +409,7 @@ static inline int satsw(int x) #define FCMPEQ(a, b) (a) == (b) ? -1 : 0 #define FMULLW(a, b) (a) * (b) +#define FMULHRW(a, b) ((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16 #define FMULHUW(a, b) (a) * (b) >> 16 #define FMULHW(a, b) (int16_t)(a) * (int16_t)(b) >> 16 @@ -455,6 +456,9 @@ SSE_OP_W(op_pcmpeqw, FCMPEQ) SSE_OP_L(op_pcmpeql, FCMPEQ) SSE_OP_W(op_pmullw, FMULLW) +#if SHIFT == 0 +SSE_OP_W(op_pmulhrw, FMULHRW) +#endif SSE_OP_W(op_pmulhuw, FMULHUW) SSE_OP_W(op_pmulhw, FMULHW) @@ -1383,6 +1387,175 @@ void OPPROTO glue(op_punpck ## base_name ## qdq, SUFFIX) (void) \ UNPCK_OP(l, 0) UNPCK_OP(h, 1) +/* 3DNow! float ops */ +#if SHIFT == 0 +void OPPROTO op_pi2fd(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status); + d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status); +} + +void OPPROTO op_pi2fw(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status); + d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status); +} + +void OPPROTO op_pf2id(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status); + d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status); +} + +void OPPROTO op_pf2iw(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status)); + d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status)); +} + +void OPPROTO op_pfacc(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + MMXReg r; + r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); + r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); + *d = r; +} + +void OPPROTO op_pfadd(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); + d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); +} + +void OPPROTO op_pfcmpeq(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0; + d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0; +} + +void OPPROTO op_pfcmpge(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0; + d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0; +} + +void OPPROTO op_pfcmpgt(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0; + d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0; +} + +void OPPROTO op_pfmax(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status)) + d->MMX_S(0) = s->MMX_S(0); + if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status)) + d->MMX_S(1) = s->MMX_S(1); +} + +void OPPROTO op_pfmin(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status)) + d->MMX_S(0) = s->MMX_S(0); + if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status)) + d->MMX_S(1) = s->MMX_S(1); +} + +void OPPROTO op_pfmul(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); + d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); +} + +void OPPROTO op_pfnacc(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + MMXReg r; + r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); + r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); + *d = r; +} + +void OPPROTO op_pfpnacc(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + MMXReg r; + r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); + r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); + *d = r; +} + +void OPPROTO op_pfrcp(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_S(0) = approx_rcp(s->MMX_S(0)); + d->MMX_S(1) = d->MMX_S(0); +} + +void OPPROTO op_pfrsqrt(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff; + d->MMX_S(1) = approx_rsqrt(d->MMX_S(1)); + d->MMX_L(1) |= s->MMX_L(0) & 0x80000000; + d->MMX_L(0) = d->MMX_L(1); +} + +void OPPROTO op_pfsub(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); + d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); +} + +void OPPROTO op_pfsubr(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status); + d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status); +} + +void OPPROTO op_pswapd(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + MMXReg r; + r.MMX_L(0) = s->MMX_L(1); + r.MMX_L(1) = s->MMX_L(0); + *d = r; +} +#endif + #undef SHIFT #undef XMM_ONLY #undef Reg diff --git a/target-i386/translate.c b/target-i386/translate.c index 356ceffbda..74ad2dc639 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -2408,12 +2408,16 @@ static GenOpFunc1 *gen_sto_env_A0[3] = { }; #define SSE_SPECIAL ((GenOpFunc2 *)1) +#define SSE_DUMMY ((GenOpFunc2 *)2) #define MMX_OP2(x) { gen_op_ ## x ## _mmx, gen_op_ ## x ## _xmm } #define SSE_FOP(x) { gen_op_ ## x ## ps, gen_op_ ## x ## pd, \ gen_op_ ## x ## ss, gen_op_ ## x ## sd, } static GenOpFunc2 *sse_op_table1[256][4] = { + /* 3DNow! extensions */ + [0x0e] = { SSE_DUMMY }, /* femms */ + [0x0f] = { SSE_DUMMY }, /* pf... */ /* pure SSE operations */ [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ @@ -2480,7 +2484,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = { [0x74] = MMX_OP2(pcmpeqb), [0x75] = MMX_OP2(pcmpeqw), [0x76] = MMX_OP2(pcmpeql), - [0x77] = { SSE_SPECIAL }, /* emms */ + [0x77] = { SSE_DUMMY }, /* emms */ [0x7c] = { NULL, gen_op_haddpd, NULL, gen_op_haddps }, [0x7d] = { NULL, gen_op_hsubpd, NULL, gen_op_hsubps }, [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */ @@ -2577,6 +2581,33 @@ static GenOpFunc2 *sse_op_table4[8][4] = { SSE_FOP(cmpord), }; +static GenOpFunc2 *sse_op_table5[256] = { + [0x0c] = gen_op_pi2fw, + [0x0d] = gen_op_pi2fd, + [0x1c] = gen_op_pf2iw, + [0x1d] = gen_op_pf2id, + [0x8a] = gen_op_pfnacc, + [0x8e] = gen_op_pfpnacc, + [0x90] = gen_op_pfcmpge, + [0x94] = gen_op_pfmin, + [0x96] = gen_op_pfrcp, + [0x97] = gen_op_pfrsqrt, + [0x9a] = gen_op_pfsub, + [0x9e] = gen_op_pfadd, + [0xa0] = gen_op_pfcmpgt, + [0xa4] = gen_op_pfmax, + [0xa6] = gen_op_movq, /* pfrcpit1; no need to actually increase precision */ + [0xa7] = gen_op_movq, /* pfrsqit1 */ + [0xaa] = gen_op_pfsubr, + [0xae] = gen_op_pfacc, + [0xb0] = gen_op_pfcmpeq, + [0xb4] = gen_op_pfmul, + [0xb6] = gen_op_movq, /* pfrcpit2 */ + [0xb7] = gen_op_pmulhrw_mmx, + [0xbb] = gen_op_pswapd, + [0xbf] = gen_op_pavgb_mmx /* pavgusb */ +}; + static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { int b1, op1_offset, op2_offset, is_xmm, val, ot; @@ -2596,7 +2627,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) sse_op2 = sse_op_table1[b][b1]; if (!sse_op2) goto illegal_op; - if (b <= 0x5f || b == 0xc6 || b == 0xc2) { + if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) { is_xmm = 1; } else { if (b1 == 0) { @@ -2618,8 +2649,8 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) } if (is_xmm && !(s->flags & HF_OSFXSR_MASK)) goto illegal_op; - if (b == 0x77) { - /* emms */ + if (b == 0x77 || b == 0x0e) { + /* emms or femms */ gen_op_emms(); return; } @@ -3151,6 +3182,13 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) } } switch(b) { + case 0x0f: /* 3DNow! data insns */ + val = ldub_code(s->pc++); + sse_op2 = sse_op_table5[val]; + if (!sse_op2) + goto illegal_op; + sse_op2(op1_offset, op2_offset); + break; case 0x70: /* pshufx insn */ case 0xc6: /* pshufx insn */ val = ldub_code(s->pc++); @@ -6148,7 +6186,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) gen_eob(s); } break; - /* MMX/SSE/SSE2/PNI support */ + /* MMX/3DNow!/SSE/SSE2/SSE3 support */ case 0x1c3: /* MOVNTI reg, mem */ if (!(s->cpuid_features & CPUID_SSE2)) goto illegal_op; @@ -6214,6 +6252,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) case 7: /* sfence / clflush */ if ((modrm & 0xc7) == 0xc0) { /* sfence */ + /* XXX: also check for cpuid_ext2_features & CPUID_EXT2_EMMX */ if (!(s->cpuid_features & CPUID_SSE)) goto illegal_op; } else { @@ -6227,8 +6266,11 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) goto illegal_op; } break; - case 0x10d: /* prefetch */ + case 0x10d: /* 3DNow! prefetch(w) */ modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + if (mod == 3) + goto illegal_op; gen_lea_modrm(s, modrm, ®_addr, &offset_addr); /* ignore for now */ break; @@ -6245,6 +6287,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) gen_op_rsm(); gen_eob(s); break; + case 0x10e ... 0x10f: + /* 3DNow! instructions, ignore prefixes */ + s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA); case 0x110 ... 0x117: case 0x128 ... 0x12f: case 0x150 ... 0x177: