diff --git a/hw/spapr.c b/hw/spapr.c index c24c92b1a0..57140d231d 100644 --- a/hw/spapr.c +++ b/hw/spapr.c @@ -52,12 +52,15 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize, sPAPREnvironment *spapr, target_phys_addr_t initrd_base, target_phys_addr_t initrd_size, - const char *kernel_cmdline) + const char *kernel_cmdline, + long hash_shift) { void *fdt; uint64_t mem_reg_property[] = { 0, cpu_to_be64(ramsize) }; uint32_t start_prop = cpu_to_be32(initrd_base); uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size); + uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)}; + char hypertas_prop[] = "hcall-pft\0hcall-term"; int i; char *modelname; int ret; @@ -145,6 +148,8 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize, * full emu, for kvm we should copy it from the host */ _FDT((fdt_property_cell(fdt, "clock-frequency", 1000000000))); _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr))); + _FDT((fdt_property(fdt, "ibm,pft-size", + pft_size_prop, sizeof(pft_size_prop)))); _FDT((fdt_property_string(fdt, "status", "okay"))); _FDT((fdt_property(fdt, "64-bit", NULL, 0))); @@ -160,6 +165,14 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize, _FDT((fdt_end_node(fdt))); + /* RTAS */ + _FDT((fdt_begin_node(fdt, "rtas"))); + + _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop, + sizeof(hypertas_prop)))); + + _FDT((fdt_end_node(fdt))); + /* vdevice */ _FDT((fdt_begin_node(fdt, "vdevice"))); @@ -208,12 +221,13 @@ static void ppc_spapr_init(ram_addr_t ram_size, const char *cpu_model) { CPUState *envs[MAX_CPUS]; - void *fdt; + void *fdt, *htab; int i; ram_addr_t ram_offset; target_phys_addr_t fdt_addr; uint32_t kernel_base, initrd_base; - long kernel_size, initrd_size; + long kernel_size, initrd_size, htab_size; + long pteg_shift = 17; int fdt_size; spapr = qemu_malloc(sizeof(*spapr)); @@ -250,6 +264,18 @@ static void ppc_spapr_init(ram_addr_t ram_size, ram_offset = qemu_ram_alloc(NULL, "ppc_spapr.ram", ram_size); cpu_register_physical_memory(0, ram_size, ram_offset); + /* allocate hash page table. For now we always make this 16mb, + * later we should probably make it scale to the size of guest + * RAM */ + htab_size = 1ULL << (pteg_shift + 7); + htab = qemu_mallocz(htab_size); + + for (i = 0; i < smp_cpus; i++) { + envs[i]->external_htab = htab; + envs[i]->htab_base = -1; + envs[i]->htab_mask = htab_size - 1; + } + spapr->vio_bus = spapr_vio_bus_init(); for (i = 0; i < MAX_SERIAL_PORTS; i++) { @@ -296,7 +322,8 @@ static void ppc_spapr_init(ram_addr_t ram_size, /* Prepare the device tree */ fdt = spapr_create_fdt(&fdt_size, ram_size, cpu_model, envs, spapr, - initrd_base, initrd_size, kernel_cmdline); + initrd_base, initrd_size, kernel_cmdline, + pteg_shift + 7); assert(fdt != NULL); cpu_physical_memory_write(fdt_addr, fdt, fdt_size); diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c index e3919d01c2..0ba17172da 100644 --- a/hw/spapr_hcall.c +++ b/hw/spapr_hcall.c @@ -1,8 +1,253 @@ #include "sysemu.h" #include "cpu.h" #include "qemu-char.h" +#include "sysemu.h" +#include "qemu-char.h" +#include "exec-all.h" #include "hw/spapr.h" +#define HPTES_PER_GROUP 8 + +#define HPTE_V_SSIZE_SHIFT 62 +#define HPTE_V_AVPN_SHIFT 7 +#define HPTE_V_AVPN 0x3fffffffffffff80ULL +#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) +#define HPTE_V_COMPARE(x, y) (!(((x) ^ (y)) & 0xffffffffffffff80UL)) +#define HPTE_V_BOLTED 0x0000000000000010ULL +#define HPTE_V_LOCK 0x0000000000000008ULL +#define HPTE_V_LARGE 0x0000000000000004ULL +#define HPTE_V_SECONDARY 0x0000000000000002ULL +#define HPTE_V_VALID 0x0000000000000001ULL + +#define HPTE_R_PP0 0x8000000000000000ULL +#define HPTE_R_TS 0x4000000000000000ULL +#define HPTE_R_KEY_HI 0x3000000000000000ULL +#define HPTE_R_RPN_SHIFT 12 +#define HPTE_R_RPN 0x3ffffffffffff000ULL +#define HPTE_R_FLAGS 0x00000000000003ffULL +#define HPTE_R_PP 0x0000000000000003ULL +#define HPTE_R_N 0x0000000000000004ULL +#define HPTE_R_G 0x0000000000000008ULL +#define HPTE_R_M 0x0000000000000010ULL +#define HPTE_R_I 0x0000000000000020ULL +#define HPTE_R_W 0x0000000000000040ULL +#define HPTE_R_WIMG 0x0000000000000078ULL +#define HPTE_R_C 0x0000000000000080ULL +#define HPTE_R_R 0x0000000000000100ULL +#define HPTE_R_KEY_LO 0x0000000000000e00ULL + +#define HPTE_V_1TB_SEG 0x4000000000000000ULL +#define HPTE_V_VRMA_MASK 0x4001ffffff000000ULL + +#define HPTE_V_HVLOCK 0x40ULL + +static inline int lock_hpte(void *hpte, target_ulong bits) +{ + uint64_t pteh; + + pteh = ldq_p(hpte); + + /* We're protected by qemu's global lock here */ + if (pteh & bits) { + return 0; + } + stq_p(hpte, pteh | HPTE_V_HVLOCK); + return 1; +} + +static target_ulong compute_tlbie_rb(target_ulong v, target_ulong r, + target_ulong pte_index) +{ + target_ulong rb, va_low; + + rb = (v & ~0x7fULL) << 16; /* AVA field */ + va_low = pte_index >> 3; + if (v & HPTE_V_SECONDARY) { + va_low = ~va_low; + } + /* xor vsid from AVA */ + if (!(v & HPTE_V_1TB_SEG)) { + va_low ^= v >> 12; + } else { + va_low ^= v >> 24; + } + va_low &= 0x7ff; + if (v & HPTE_V_LARGE) { + rb |= 1; /* L field */ +#if 0 /* Disable that P7 specific bit for now */ + if (r & 0xff000) { + /* non-16MB large page, must be 64k */ + /* (masks depend on page size) */ + rb |= 0x1000; /* page encoding in LP field */ + rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */ + rb |= (va_low & 0xfe); /* AVAL field */ + } +#endif + } else { + /* 4kB page */ + rb |= (va_low & 0x7ff) << 12; /* remaining 11b of AVA */ + } + rb |= (v >> 54) & 0x300; /* B field */ + return rb; +} + +static target_ulong h_enter(CPUState *env, sPAPREnvironment *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong flags = args[0]; + target_ulong pte_index = args[1]; + target_ulong pteh = args[2]; + target_ulong ptel = args[3]; + target_ulong porder; + target_ulong i, pa; + uint8_t *hpte; + + /* only handle 4k and 16M pages for now */ + porder = 12; + if (pteh & HPTE_V_LARGE) { +#if 0 /* We don't support 64k pages yet */ + if ((ptel & 0xf000) == 0x1000) { + /* 64k page */ + porder = 16; + } else +#endif + if ((ptel & 0xff000) == 0) { + /* 16M page */ + porder = 24; + /* lowest AVA bit must be 0 for 16M pages */ + if (pteh & 0x80) { + return H_PARAMETER; + } + } else { + return H_PARAMETER; + } + } + + pa = ptel & HPTE_R_RPN; + /* FIXME: bounds check the pa? */ + + /* Check WIMG */ + if ((ptel & HPTE_R_WIMG) != HPTE_R_M) { + return H_PARAMETER; + } + pteh &= ~0x60ULL; + + if ((pte_index * HASH_PTE_SIZE_64) & ~env->htab_mask) { + return H_PARAMETER; + } + if (likely((flags & H_EXACT) == 0)) { + pte_index &= ~7ULL; + hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64); + for (i = 0; ; ++i) { + if (i == 8) { + return H_PTEG_FULL; + } + if (((ldq_p(hpte) & HPTE_V_VALID) == 0) && + lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) { + break; + } + hpte += HASH_PTE_SIZE_64; + } + } else { + i = 0; + hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64); + if (!lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) { + return H_PTEG_FULL; + } + } + stq_p(hpte + (HASH_PTE_SIZE_64/2), ptel); + /* eieio(); FIXME: need some sort of barrier for smp? */ + stq_p(hpte, pteh); + + assert(!(ldq_p(hpte) & HPTE_V_HVLOCK)); + args[0] = pte_index + i; + return H_SUCCESS; +} + +static target_ulong h_remove(CPUState *env, sPAPREnvironment *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong flags = args[0]; + target_ulong pte_index = args[1]; + target_ulong avpn = args[2]; + uint8_t *hpte; + target_ulong v, r, rb; + + if ((pte_index * HASH_PTE_SIZE_64) & ~env->htab_mask) { + return H_PARAMETER; + } + + hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64); + while (!lock_hpte(hpte, HPTE_V_HVLOCK)) { + /* We have no real concurrency in qemu soft-emulation, so we + * will never actually have a contested lock */ + assert(0); + } + + v = ldq_p(hpte); + r = ldq_p(hpte + (HASH_PTE_SIZE_64/2)); + + if ((v & HPTE_V_VALID) == 0 || + ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) || + ((flags & H_ANDCOND) && (v & avpn) != 0)) { + stq_p(hpte, v & ~HPTE_V_HVLOCK); + assert(!(ldq_p(hpte) & HPTE_V_HVLOCK)); + return H_NOT_FOUND; + } + args[0] = v & ~HPTE_V_HVLOCK; + args[1] = r; + stq_p(hpte, 0); + rb = compute_tlbie_rb(v, r, pte_index); + ppc_tlb_invalidate_one(env, rb); + assert(!(ldq_p(hpte) & HPTE_V_HVLOCK)); + return H_SUCCESS; +} + +static target_ulong h_protect(CPUState *env, sPAPREnvironment *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong flags = args[0]; + target_ulong pte_index = args[1]; + target_ulong avpn = args[2]; + uint8_t *hpte; + target_ulong v, r, rb; + + if ((pte_index * HASH_PTE_SIZE_64) & ~env->htab_mask) { + return H_PARAMETER; + } + + hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64); + while (!lock_hpte(hpte, HPTE_V_HVLOCK)) { + /* We have no real concurrency in qemu soft-emulation, so we + * will never actually have a contested lock */ + assert(0); + } + + v = ldq_p(hpte); + r = ldq_p(hpte + (HASH_PTE_SIZE_64/2)); + + if ((v & HPTE_V_VALID) == 0 || + ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) { + stq_p(hpte, v & ~HPTE_V_HVLOCK); + assert(!(ldq_p(hpte) & HPTE_V_HVLOCK)); + return H_NOT_FOUND; + } + + r &= ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | + HPTE_R_KEY_HI | HPTE_R_KEY_LO); + r |= (flags << 55) & HPTE_R_PP0; + r |= (flags << 48) & HPTE_R_KEY_HI; + r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); + rb = compute_tlbie_rb(v, r, pte_index); + stq_p(hpte, v & ~HPTE_V_VALID); + ppc_tlb_invalidate_one(env, rb); + stq_p(hpte + (HASH_PTE_SIZE_64/2), r); + /* Don't need a memory barrier, due to qemu's global lock */ + stq_p(hpte, v & ~HPTE_V_HVLOCK); + assert(!(ldq_p(hpte) & HPTE_V_HVLOCK)); + return H_SUCCESS; +} + spapr_hcall_fn hypercall_table[(MAX_HCALL_OPCODE / 4) + 1]; void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn) @@ -39,3 +284,12 @@ target_ulong spapr_hypercall(CPUState *env, target_ulong opcode, hcall_dprintf("Unimplemented hcall 0x" TARGET_FMT_lx "\n", opcode); return H_FUNCTION; } + +static void hypercall_init(void) +{ + /* hcall-pft */ + spapr_register_hypercall(H_ENTER, h_enter); + spapr_register_hypercall(H_REMOVE, h_remove); + spapr_register_hypercall(H_PROTECT, h_protect); +} +device_init(hypercall_init); diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 25d0658c47..b4c2555626 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -670,6 +670,8 @@ struct CPUPPCState { target_phys_addr_t htab_base; target_phys_addr_t htab_mask; target_ulong sr[32]; + /* externally stored hash table */ + uint8_t *external_htab; /* BATs */ int nb_BATs; target_ulong DBAT[2][8]; diff --git a/target-ppc/helper.c b/target-ppc/helper.c index 278bee4f17..5e4030bb53 100644 --- a/target-ppc/helper.c +++ b/target-ppc/helper.c @@ -589,8 +589,13 @@ static inline int _find_pte(CPUState *env, mmu_ctx_t *ctx, int is_64b, int h, for (i = 0; i < 8; i++) { #if defined(TARGET_PPC64) if (is_64b) { - pte0 = ldq_phys(env->htab_base + pteg_off + (i * 16)); - pte1 = ldq_phys(env->htab_base + pteg_off + (i * 16) + 8); + if (env->external_htab) { + pte0 = ldq_p(env->external_htab + pteg_off + (i * 16)); + pte1 = ldq_p(env->external_htab + pteg_off + (i * 16) + 8); + } else { + pte0 = ldq_phys(env->htab_base + pteg_off + (i * 16)); + pte1 = ldq_phys(env->htab_base + pteg_off + (i * 16) + 8); + } /* We have a TLB that saves 4K pages, so let's * split a huge page to 4k chunks */ @@ -606,8 +611,13 @@ static inline int _find_pte(CPUState *env, mmu_ctx_t *ctx, int is_64b, int h, } else #endif { - pte0 = ldl_phys(env->htab_base + pteg_off + (i * 8)); - pte1 = ldl_phys(env->htab_base + pteg_off + (i * 8) + 4); + if (env->external_htab) { + pte0 = ldl_p(env->external_htab + pteg_off + (i * 8)); + pte1 = ldl_p(env->external_htab + pteg_off + (i * 8) + 4); + } else { + pte0 = ldl_phys(env->htab_base + pteg_off + (i * 8)); + pte1 = ldl_phys(env->htab_base + pteg_off + (i * 8) + 4); + } r = pte32_check(ctx, pte0, pte1, h, rw, type); LOG_MMU("Load pte from " TARGET_FMT_lx " => " TARGET_FMT_lx " " TARGET_FMT_lx " %d %d %d " TARGET_FMT_lx "\n", @@ -647,13 +657,23 @@ static inline int _find_pte(CPUState *env, mmu_ctx_t *ctx, int is_64b, int h, if (pte_update_flags(ctx, &pte1, ret, rw) == 1) { #if defined(TARGET_PPC64) if (is_64b) { - stq_phys_notdirty(env->htab_base + pteg_off + (good * 16) + 8, - pte1); + if (env->external_htab) { + stq_p(env->external_htab + pteg_off + (good * 16) + 8, + pte1); + } else { + stq_phys_notdirty(env->htab_base + pteg_off + + (good * 16) + 8, pte1); + } } else #endif { - stl_phys_notdirty(env->htab_base + pteg_off + (good * 8) + 4, - pte1); + if (env->external_htab) { + stl_p(env->external_htab + pteg_off + (good * 8) + 4, + pte1); + } else { + stl_phys_notdirty(env->htab_base + pteg_off + + (good * 8) + 4, pte1); + } } } }