diff --git a/MAINTAINERS b/MAINTAINERS index 29aac4fce6..78d4ff227e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -349,9 +349,31 @@ PowerPC Machines 405 M: Alexander Graf L: qemu-ppc@nongnu.org -S: Maintained +S: Odd Fixes F: hw/ppc405_boards.c +Bamboo +M: Alexander Graf +L: qemu-ppc@nongnu.org +S: Odd Fixes +F: hw/ppc440_bamboo.c + +e500 +M: Alexander Graf +M: Scott Wood +L: qemu-ppc@nongnu.org +S: Supported +F: hw/ppc/e500.[hc] +F: hw/ppc/e500plat.c + +mpc8544ds +M: Alexander Graf +M: Scott Wood +L: qemu-ppc@nongnu.org +S: Supported +F: hw/ppc/mpc8544ds.c +F: hw/mpc8544_guts.c + New World M: Alexander Graf L: qemu-ppc@nongnu.org @@ -374,6 +396,19 @@ S: Odd Fixes F: hw/ppc_prep.c F: hw/prep_pci.[hc] +sPAPR +M: David Gibson +M: Alexander Graf +L: qemu-ppc@nongnu.org +S: Supported +F: hw/spapr* + +virtex_ml507 +M: Edgar E. Iglesias +L: qemu-ppc@nongnu.org +S: Odd Fixes +F: hw/virtex_ml507.c + SH4 Machines ------------ R2D @@ -457,6 +492,19 @@ S: Supported F: hw/pci* F: hw/piix* +ppc4xx +M: Alexander Graf +L: qemu-ppc@nongnu.org +S: Odd Fixes +F: hw/ppc4xx*.[hc] + +ppce500 +M: Alexander Graf +M: Scott Wood +L: qemu-ppc@nongnu.org +S: Supported +F: hw/ppce500_* + SCSI M: Paolo Bonzini S: Supported diff --git a/device_tree.c b/device_tree.c index d7a9b6bb89..a9236133c7 100644 --- a/device_tree.c +++ b/device_tree.c @@ -304,3 +304,18 @@ int qemu_devtree_add_subnode(void *fdt, const char *name) g_free(dupname); return retval; } + +void qemu_devtree_dumpdtb(void *fdt, int size) +{ + QemuOpts *machine_opts; + + machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0); + if (machine_opts) { + const char *dumpdtb = qemu_opt_get(machine_opts, "dumpdtb"); + if (dumpdtb) { + /* Dump the dtb to a file and quit */ + exit(g_file_set_contents(dumpdtb, fdt, size, NULL) ? 0 : 1); + } + } + +} diff --git a/device_tree.h b/device_tree.h index f7a3e6cfc5..f0b3f35e03 100644 --- a/device_tree.h +++ b/device_tree.h @@ -49,4 +49,6 @@ int qemu_devtree_add_subnode(void *fdt, const char *name); sizeof(qdt_tmp)); \ } while (0) +void qemu_devtree_dumpdtb(void *fdt, int size); + #endif /* __DEVICE_TREE_H__ */ diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index 6f0de6d959..d23f9b2f60 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -36,7 +36,7 @@ #define BINARY_DEVICE_TREE_FILE "mpc8544ds.dtb" #define UIMAGE_LOAD_BASE 0 -#define DTC_LOAD_PAD 0x500000 +#define DTC_LOAD_PAD 0x1800000 #define DTC_PAD_MASK 0xFFFFF #define INITRD_LOAD_PAD 0x2000000 #define INITRD_PAD_MASK 0xFFFFFF @@ -139,12 +139,10 @@ static int ppce500_load_device_tree(CPUPPCState *env, 0x0, 0x10000, }; QemuOpts *machine_opts; - const char *dumpdtb = NULL; const char *dtb_file = NULL; machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0); if (machine_opts) { - dumpdtb = qemu_opt_get(machine_opts, "dumpdtb"); dtb_file = qemu_opt_get(machine_opts, "dtb"); toplevel_compat = qemu_opt_get(machine_opts, "dt_compatible"); } @@ -334,18 +332,7 @@ static int ppce500_load_device_tree(CPUPPCState *env, } done: - if (dumpdtb) { - /* Dump the dtb to a file and quit */ - FILE *f = fopen(dumpdtb, "wb"); - size_t len; - len = fwrite(fdt, fdt_size, 1, f); - fclose(f); - if (len != fdt_size) { - exit(1); - } - exit(0); - } - + qemu_devtree_dumpdtb(fdt, fdt_size); ret = rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr); if (ret < 0) { goto out; @@ -375,6 +362,10 @@ static void mmubooke_create_initial_mapping(CPUPPCState *env) the device tree top */ dt_end = bi->dt_base + bi->dt_size; ps = booke206_page_size_to_tlb(dt_end) + 1; + if (ps & 1) { + /* e500v2 can only do even TLB size bits */ + ps++; + } size = (ps << MAS1_TSIZE_SHIFT); tlb->mas1 = MAS1_VALID | size; tlb->mas2 = 0; @@ -553,7 +544,8 @@ void ppce500_init(PPCE500Params *params) /* Load initrd. */ if (params->initrd_filename) { - initrd_base = (kernel_size + INITRD_LOAD_PAD) & ~INITRD_PAD_MASK; + initrd_base = (loadaddr + kernel_size + INITRD_LOAD_PAD) & + ~INITRD_PAD_MASK; initrd_size = load_image_targphys(params->initrd_filename, initrd_base, ram_size - initrd_base); diff --git a/hw/ppc405_uc.c b/hw/ppc405_uc.c index 89e5013b57..b52ab2f179 100644 --- a/hw/ppc405_uc.c +++ b/hw/ppc405_uc.c @@ -191,7 +191,8 @@ enum { typedef struct ppc4xx_pob_t ppc4xx_pob_t; struct ppc4xx_pob_t { uint32_t bear; - uint32_t besr[2]; + uint32_t besr0; + uint32_t besr1; }; static uint32_t dcr_read_pob (void *opaque, int dcrn) @@ -205,8 +206,10 @@ static uint32_t dcr_read_pob (void *opaque, int dcrn) ret = pob->bear; break; case POB0_BESR0: + ret = pob->besr0; + break; case POB0_BESR1: - ret = pob->besr[dcrn - POB0_BESR0]; + ret = pob->besr1; break; default: /* Avoid gcc warning */ @@ -227,9 +230,12 @@ static void dcr_write_pob (void *opaque, int dcrn, uint32_t val) /* Read only */ break; case POB0_BESR0: + /* Write-clear */ + pob->besr0 &= ~val; + break; case POB0_BESR1: /* Write-clear */ - pob->besr[dcrn - POB0_BESR0] &= ~val; + pob->besr1 &= ~val; break; } } @@ -241,8 +247,8 @@ static void ppc4xx_pob_reset (void *opaque) pob = opaque; /* No error */ pob->bear = 0x00000000; - pob->besr[0] = 0x0000000; - pob->besr[1] = 0x0000000; + pob->besr0 = 0x0000000; + pob->besr1 = 0x0000000; } static void ppc4xx_pob_init(CPUPPCState *env) diff --git a/hw/spapr.c b/hw/spapr.c index 8b0c390269..09b8e99221 100644 --- a/hw/spapr.c +++ b/hw/spapr.c @@ -84,9 +84,11 @@ #define PHANDLE_XICP 0x00001111 +#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) + sPAPREnvironment *spapr; -int spapr_allocate_irq(int hint, enum xics_irq_type type) +int spapr_allocate_irq(int hint, bool lsi) { int irq; @@ -102,13 +104,13 @@ int spapr_allocate_irq(int hint, enum xics_irq_type type) return 0; } - xics_set_irq_type(spapr->icp, irq, type); + xics_set_irq_type(spapr->icp, irq, lsi); return irq; } /* Allocate block of consequtive IRQs, returns a number of the first */ -int spapr_allocate_irq_block(int num, enum xics_irq_type type) +int spapr_allocate_irq_block(int num, bool lsi) { int first = -1; int i; @@ -116,7 +118,7 @@ int spapr_allocate_irq_block(int num, enum xics_irq_type type) for (i = 0; i < num; ++i) { int irq; - irq = spapr_allocate_irq(0, type); + irq = spapr_allocate_irq(0, lsi); if (!irq) { return -1; } @@ -133,12 +135,13 @@ int spapr_allocate_irq_block(int num, enum xics_irq_type type) return first; } -static int spapr_set_associativity(void *fdt, sPAPREnvironment *spapr) +static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr) { int ret = 0, offset; CPUPPCState *env; char cpu_model[32]; int smt = kvmppc_smt_threads(); + uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)}; assert(spapr->cpu_model); @@ -162,8 +165,16 @@ static int spapr_set_associativity(void *fdt, sPAPREnvironment *spapr) return offset; } - ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity, - sizeof(associativity)); + if (nb_numa_nodes > 1) { + ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity, + sizeof(associativity)); + if (ret < 0) { + return ret; + } + } + + ret = fdt_setprop(fdt, offset, "ibm,pft-size", + pft_size_prop, sizeof(pft_size_prop)); if (ret < 0) { return ret; } @@ -205,36 +216,6 @@ static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop, return (p - prop) * sizeof(uint32_t); } -static void *spapr_create_fdt_skel(const char *cpu_model, - target_phys_addr_t rma_size, - target_phys_addr_t initrd_base, - target_phys_addr_t initrd_size, - target_phys_addr_t kernel_size, - const char *boot_device, - const char *kernel_cmdline, - long hash_shift) -{ - void *fdt; - CPUPPCState *env; - uint64_t mem_reg_property[2]; - uint32_t start_prop = cpu_to_be32(initrd_base); - uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size); - uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)}; - char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt" - "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk"; - char qemu_hypertas_prop[] = "hcall-memop1"; - uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)}; - int i; - char *modelname; - int smt = kvmppc_smt_threads(); - unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80}; - uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)}; - uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0), - cpu_to_be32(0x0), cpu_to_be32(0x0), - cpu_to_be32(0x0)}; - char mem_name[32]; - target_phys_addr_t node0_size, mem_start; - #define _FDT(exp) \ do { \ int ret = (exp); \ @@ -245,6 +226,27 @@ static void *spapr_create_fdt_skel(const char *cpu_model, } \ } while (0) + +static void *spapr_create_fdt_skel(const char *cpu_model, + target_phys_addr_t initrd_base, + target_phys_addr_t initrd_size, + target_phys_addr_t kernel_size, + const char *boot_device, + const char *kernel_cmdline) +{ + void *fdt; + CPUPPCState *env; + uint32_t start_prop = cpu_to_be32(initrd_base); + uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size); + char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt" + "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk"; + char qemu_hypertas_prop[] = "hcall-memop1"; + uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)}; + uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)}; + char *modelname; + int i, smt = kvmppc_smt_threads(); + unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80}; + fdt = g_malloc0(FDT_MAX_SIZE); _FDT((fdt_create(fdt, FDT_MAX_SIZE))); @@ -288,55 +290,6 @@ static void *spapr_create_fdt_skel(const char *cpu_model, _FDT((fdt_end_node(fdt))); - /* memory node(s) */ - node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size; - if (rma_size > node0_size) { - rma_size = node0_size; - } - - /* RMA */ - mem_reg_property[0] = 0; - mem_reg_property[1] = cpu_to_be64(rma_size); - _FDT((fdt_begin_node(fdt, "memory@0"))); - _FDT((fdt_property_string(fdt, "device_type", "memory"))); - _FDT((fdt_property(fdt, "reg", mem_reg_property, - sizeof(mem_reg_property)))); - _FDT((fdt_property(fdt, "ibm,associativity", associativity, - sizeof(associativity)))); - _FDT((fdt_end_node(fdt))); - - /* RAM: Node 0 */ - if (node0_size > rma_size) { - mem_reg_property[0] = cpu_to_be64(rma_size); - mem_reg_property[1] = cpu_to_be64(node0_size - rma_size); - - sprintf(mem_name, "memory@" TARGET_FMT_lx, rma_size); - _FDT((fdt_begin_node(fdt, mem_name))); - _FDT((fdt_property_string(fdt, "device_type", "memory"))); - _FDT((fdt_property(fdt, "reg", mem_reg_property, - sizeof(mem_reg_property)))); - _FDT((fdt_property(fdt, "ibm,associativity", associativity, - sizeof(associativity)))); - _FDT((fdt_end_node(fdt))); - } - - /* RAM: Node 1 and beyond */ - mem_start = node0_size; - for (i = 1; i < nb_numa_nodes; i++) { - mem_reg_property[0] = cpu_to_be64(mem_start); - mem_reg_property[1] = cpu_to_be64(node_mem[i]); - associativity[3] = associativity[4] = cpu_to_be32(i); - sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start); - _FDT((fdt_begin_node(fdt, mem_name))); - _FDT((fdt_property_string(fdt, "device_type", "memory"))); - _FDT((fdt_property(fdt, "reg", mem_reg_property, - sizeof(mem_reg_property)))); - _FDT((fdt_property(fdt, "ibm,associativity", associativity, - sizeof(associativity)))); - _FDT((fdt_end_node(fdt))); - mem_start += node_mem[i]; - } - /* cpus */ _FDT((fdt_begin_node(fdt, "cpus"))); @@ -388,8 +341,6 @@ static void *spapr_create_fdt_skel(const char *cpu_model, _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq))); _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq))); _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr))); - _FDT((fdt_property(fdt, "ibm,pft-size", - pft_size_prop, sizeof(pft_size_prop)))); _FDT((fdt_property_string(fdt, "status", "okay"))); _FDT((fdt_property(fdt, "64-bit", NULL, 0))); @@ -488,6 +439,68 @@ static void *spapr_create_fdt_skel(const char *cpu_model, return fdt; } +static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) +{ + uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0), + cpu_to_be32(0x0), cpu_to_be32(0x0), + cpu_to_be32(0x0)}; + char mem_name[32]; + target_phys_addr_t node0_size, mem_start; + uint64_t mem_reg_property[2]; + int i, off; + + /* memory node(s) */ + node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size; + if (spapr->rma_size > node0_size) { + spapr->rma_size = node0_size; + } + + /* RMA */ + mem_reg_property[0] = 0; + mem_reg_property[1] = cpu_to_be64(spapr->rma_size); + off = fdt_add_subnode(fdt, 0, "memory@0"); + _FDT(off); + _FDT((fdt_setprop_string(fdt, off, "device_type", "memory"))); + _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property, + sizeof(mem_reg_property)))); + _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity, + sizeof(associativity)))); + + /* RAM: Node 0 */ + if (node0_size > spapr->rma_size) { + mem_reg_property[0] = cpu_to_be64(spapr->rma_size); + mem_reg_property[1] = cpu_to_be64(node0_size - spapr->rma_size); + + sprintf(mem_name, "memory@" TARGET_FMT_lx, spapr->rma_size); + off = fdt_add_subnode(fdt, 0, mem_name); + _FDT(off); + _FDT((fdt_setprop_string(fdt, off, "device_type", "memory"))); + _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property, + sizeof(mem_reg_property)))); + _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity, + sizeof(associativity)))); + } + + /* RAM: Node 1 and beyond */ + mem_start = node0_size; + for (i = 1; i < nb_numa_nodes; i++) { + mem_reg_property[0] = cpu_to_be64(mem_start); + mem_reg_property[1] = cpu_to_be64(node_mem[i]); + associativity[3] = associativity[4] = cpu_to_be32(i); + sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start); + off = fdt_add_subnode(fdt, 0, mem_name); + _FDT(off); + _FDT((fdt_setprop_string(fdt, off, "device_type", "memory"))); + _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property, + sizeof(mem_reg_property)))); + _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity, + sizeof(associativity)))); + mem_start += node_mem[i]; + } + + return 0; +} + static void spapr_finalize_fdt(sPAPREnvironment *spapr, target_phys_addr_t fdt_addr, target_phys_addr_t rtas_addr, @@ -502,6 +515,12 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr, /* open out the base tree into a temp buffer for the final tweaks */ _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE))); + ret = spapr_populate_memory(spapr, fdt); + if (ret < 0) { + fprintf(stderr, "couldn't setup memory nodes in fdt\n"); + exit(1); + } + ret = spapr_populate_vdevice(spapr->vio_bus, fdt); if (ret < 0) { fprintf(stderr, "couldn't setup vio devices in fdt\n"); @@ -524,11 +543,9 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr, } /* Advertise NUMA via ibm,associativity */ - if (nb_numa_nodes > 1) { - ret = spapr_set_associativity(fdt, spapr); - if (ret < 0) { - fprintf(stderr, "Couldn't set up NUMA device tree properties\n"); - } + ret = spapr_fixup_cpu_dt(fdt, spapr); + if (ret < 0) { + fprintf(stderr, "Couldn't finalize CPU device tree properties\n"); } if (!spapr->has_graphics) { @@ -555,15 +572,49 @@ static uint64_t translate_kernel_address(void *opaque, uint64_t addr) static void emulate_spapr_hypercall(CPUPPCState *env) { - env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]); + if (msr_pr) { + hcall_dprintf("Hypercall made with MSR[PR]=1\n"); + env->gpr[3] = H_PRIVILEGE; + } else { + env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]); + } } -static void spapr_reset(void *opaque) +static void spapr_reset_htab(sPAPREnvironment *spapr) { - sPAPREnvironment *spapr = (sPAPREnvironment *)opaque; + long shift; - /* flush out the hash table */ - memset(spapr->htab, 0, spapr->htab_size); + /* allocate hash page table. For now we always make this 16mb, + * later we should probably make it scale to the size of guest + * RAM */ + + shift = kvmppc_reset_htab(spapr->htab_shift); + + if (shift > 0) { + /* Kernel handles htab, we don't need to allocate one */ + spapr->htab_shift = shift; + } else { + if (!spapr->htab) { + /* Allocate an htab if we don't yet have one */ + spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr)); + } + + /* And clear it */ + memset(spapr->htab, 0, HTAB_SIZE(spapr)); + } + + /* Update the RMA size if necessary */ + if (spapr->vrma_adjust) { + spapr->rma_size = kvmppc_rma_size(ram_size, spapr->htab_shift); + } +} + +static void ppc_spapr_reset(void) +{ + /* Reset the hash table & recalc the RMA */ + spapr_reset_htab(spapr); + + qemu_devices_reset(); /* Load the fdt */ spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr, @@ -580,8 +631,22 @@ static void spapr_reset(void *opaque) static void spapr_cpu_reset(void *opaque) { PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; cpu_reset(CPU(cpu)); + + /* All CPUs start halted. CPU0 is unhalted from the machine level + * reset code and the rest are explicitly started up by the guest + * using an RTAS call */ + env->halted = 1; + + env->spr[SPR_HIOR] = 0; + + env->external_htab = spapr->htab; + env->htab_base = -1; + env->htab_mask = HTAB_SIZE(spapr) - 1; + env->spr[SPR_SDR1] = (unsigned long)spapr->htab | + (spapr->htab_shift - 18); } /* Returns whether we want to use VGA or not */ @@ -613,11 +678,10 @@ static void ppc_spapr_init(ram_addr_t ram_size, int i; MemoryRegion *sysmem = get_system_memory(); MemoryRegion *ram = g_new(MemoryRegion, 1); - target_phys_addr_t rma_alloc_size, rma_size; + target_phys_addr_t rma_alloc_size; uint32_t initrd_base = 0; long kernel_size = 0, initrd_size = 0; long load_limit, rtas_limit, fw_size; - long pteg_shift = 17; char *filename; msi_supported = true; @@ -634,20 +698,46 @@ static void ppc_spapr_init(ram_addr_t ram_size, hw_error("qemu: Unable to create RMA\n"); exit(1); } + if (rma_alloc_size && (rma_alloc_size < ram_size)) { - rma_size = rma_alloc_size; + spapr->rma_size = rma_alloc_size; } else { - rma_size = ram_size; + spapr->rma_size = ram_size; + + /* With KVM, we don't actually know whether KVM supports an + * unbounded RMA (PR KVM) or is limited by the hash table size + * (HV KVM using VRMA), so we always assume the latter + * + * In that case, we also limit the initial allocations for RTAS + * etc... to 256M since we have no way to know what the VRMA size + * is going to be as it depends on the size of the hash table + * isn't determined yet. + */ + if (kvm_enabled()) { + spapr->vrma_adjust = 1; + spapr->rma_size = MIN(spapr->rma_size, 0x10000000); + } } /* We place the device tree and RTAS just below either the top of the RMA, * or just below 2GB, whichever is lowere, so that it can be * processed with 32-bit real mode code if necessary */ - rtas_limit = MIN(rma_size, 0x80000000); + rtas_limit = MIN(spapr->rma_size, 0x80000000); spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE; spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE; load_limit = spapr->fdt_addr - FW_OVERHEAD; + /* We aim for a hash table of size 1/128 the size of RAM. The + * normal rule of thumb is 1/64 the size of RAM, but that's much + * more than needed for the Linux guests we support. */ + spapr->htab_shift = 18; /* Minimum architected size */ + while (spapr->htab_shift <= 46) { + if ((1ULL << (spapr->htab_shift + 7)) >= ram_size) { + break; + } + spapr->htab_shift++; + } + /* init CPUs */ if (cpu_model == NULL) { cpu_model = kvm_enabled() ? "host" : "POWER7"; @@ -662,11 +752,16 @@ static void ppc_spapr_init(ram_addr_t ram_size, /* Set time-base frequency to 512 MHz */ cpu_ppc_tb_init(env, TIMEBASE_FREQ); - qemu_register_reset(spapr_cpu_reset, cpu); - env->hreset_vector = 0x60; + /* PAPR always has exception vectors in RAM not ROM */ env->hreset_excp_prefix = 0; - env->gpr[3] = env->cpu_index; + + /* Tell KVM that we're in PAPR mode */ + if (kvm_enabled()) { + kvmppc_set_papr(env); + } + + qemu_register_reset(spapr_cpu_reset, cpu); } /* allocate RAM */ @@ -680,27 +775,6 @@ static void ppc_spapr_init(ram_addr_t ram_size, memory_region_add_subregion(sysmem, nonrma_base, ram); } - /* allocate hash page table. For now we always make this 16mb, - * later we should probably make it scale to the size of guest - * RAM */ - spapr->htab_size = 1ULL << (pteg_shift + 7); - spapr->htab = qemu_memalign(spapr->htab_size, spapr->htab_size); - - for (env = first_cpu; env != NULL; env = env->next_cpu) { - env->external_htab = spapr->htab; - env->htab_base = -1; - env->htab_mask = spapr->htab_size - 1; - - /* Tell KVM that we're in PAPR mode */ - env->spr[SPR_SDR1] = (unsigned long)spapr->htab | - ((pteg_shift + 7) - 18); - env->spr[SPR_HIOR] = 0; - - if (kvm_enabled()) { - kvmppc_set_papr(env); - } - } - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin"); spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr, rtas_limit - spapr->rtas_addr); @@ -773,7 +847,7 @@ static void ppc_spapr_init(ram_addr_t ram_size, } } - if (rma_size < (MIN_RMA_SLOF << 20)) { + if (spapr->rma_size < (MIN_RMA_SLOF << 20)) { fprintf(stderr, "qemu: pSeries SLOF firmware requires >= " "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF); exit(1); @@ -824,26 +898,19 @@ static void ppc_spapr_init(ram_addr_t ram_size, spapr->entry_point = 0x100; - /* SLOF will startup the secondary CPUs using RTAS */ - for (env = first_cpu; env != NULL; env = env->next_cpu) { - env->halted = 1; - } - /* Prepare the device tree */ - spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, rma_size, + spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, initrd_base, initrd_size, kernel_size, - boot_device, kernel_cmdline, - pteg_shift + 7); + boot_device, kernel_cmdline); assert(spapr->fdt_skel != NULL); - - qemu_register_reset(spapr_reset, spapr); } static QEMUMachine spapr_machine = { .name = "pseries", .desc = "pSeries Logical Partition (PAPR compliant)", .init = ppc_spapr_init, + .reset = ppc_spapr_reset, .max_cpus = MAX_CPUS, .no_parallel = 1, .use_scsi = 1, diff --git a/hw/spapr.h b/hw/spapr.h index ac34a171e3..e984e3fc3c 100644 --- a/hw/spapr.h +++ b/hw/spapr.h @@ -15,7 +15,9 @@ typedef struct sPAPREnvironment { target_phys_addr_t ram_limit; void *htab; - long htab_size; + long htab_shift; + target_phys_addr_t rma_size; + int vrma_adjust; target_phys_addr_t fdt_addr, rtas_addr; long rtas_size; void *fdt_skel; @@ -289,17 +291,17 @@ void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn); target_ulong spapr_hypercall(CPUPPCState *env, target_ulong opcode, target_ulong *args); -int spapr_allocate_irq(int hint, enum xics_irq_type type); -int spapr_allocate_irq_block(int num, enum xics_irq_type type); +int spapr_allocate_irq(int hint, bool lsi); +int spapr_allocate_irq_block(int num, bool lsi); static inline int spapr_allocate_msi(int hint) { - return spapr_allocate_irq(hint, XICS_MSI); + return spapr_allocate_irq(hint, false); } static inline int spapr_allocate_lsi(int hint) { - return spapr_allocate_irq(hint, XICS_LSI); + return spapr_allocate_irq(hint, true); } static inline uint32_t rtas_ld(target_ulong phys, int n) @@ -336,6 +338,8 @@ typedef struct sPAPRTCE { void spapr_iommu_init(void); DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size); void spapr_tce_free(DMAContext *dma); +void spapr_tce_reset(DMAContext *dma); +void spapr_tce_set_bypass(DMAContext *dma, bool bypass); int spapr_dma_dt(void *fdt, int node_off, const char *propname, uint32_t liobn, uint64_t window, uint32_t size); int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname, diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c index abd847f96c..194d9c24b5 100644 --- a/hw/spapr_hcall.c +++ b/hw/spapr_hcall.c @@ -39,22 +39,6 @@ #define HPTE_V_1TB_SEG 0x4000000000000000ULL #define HPTE_V_VRMA_MASK 0x4001ffffff000000ULL -#define HPTE_V_HVLOCK 0x40ULL - -static inline int lock_hpte(void *hpte, target_ulong bits) -{ - uint64_t pteh; - - pteh = ldq_p(hpte); - - /* We're protected by qemu's global lock here */ - if (pteh & bits) { - return 0; - } - stq_p(hpte, pteh | HPTE_V_HVLOCK); - return 1; -} - static target_ulong compute_tlbie_rb(target_ulong v, target_ulong r, target_ulong pte_index) { @@ -151,8 +135,7 @@ static target_ulong h_enter(CPUPPCState *env, sPAPREnvironment *spapr, if (i == 8) { return H_PTEG_FULL; } - if (((ldq_p(hpte) & HPTE_V_VALID) == 0) && - lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) { + if ((ldq_p(hpte) & HPTE_V_VALID) == 0) { break; } hpte += HASH_PTE_SIZE_64; @@ -160,7 +143,7 @@ static target_ulong h_enter(CPUPPCState *env, sPAPREnvironment *spapr, } else { i = 0; hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64); - if (!lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) { + if (ldq_p(hpte) & HPTE_V_VALID) { return H_PTEG_FULL; } } @@ -168,7 +151,6 @@ static target_ulong h_enter(CPUPPCState *env, sPAPREnvironment *spapr, /* eieio(); FIXME: need some sort of barrier for smp? */ stq_p(hpte, pteh); - assert(!(ldq_p(hpte) & HPTE_V_HVLOCK)); args[0] = pte_index + i; return H_SUCCESS; } @@ -193,11 +175,6 @@ static target_ulong remove_hpte(CPUPPCState *env, target_ulong ptex, } hpte = env->external_htab + (ptex * HASH_PTE_SIZE_64); - while (!lock_hpte(hpte, HPTE_V_HVLOCK)) { - /* We have no real concurrency in qemu soft-emulation, so we - * will never actually have a contested lock */ - assert(0); - } v = ldq_p(hpte); r = ldq_p(hpte + (HASH_PTE_SIZE_64/2)); @@ -205,16 +182,13 @@ static target_ulong remove_hpte(CPUPPCState *env, target_ulong ptex, if ((v & HPTE_V_VALID) == 0 || ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) || ((flags & H_ANDCOND) && (v & avpn) != 0)) { - stq_p(hpte, v & ~HPTE_V_HVLOCK); - assert(!(ldq_p(hpte) & HPTE_V_HVLOCK)); return REMOVE_NOT_FOUND; } - *vp = v & ~HPTE_V_HVLOCK; + *vp = v; *rp = r; stq_p(hpte, 0); rb = compute_tlbie_rb(v, r, ptex); ppc_tlb_invalidate_one(env, rb); - assert(!(ldq_p(hpte) & HPTE_V_HVLOCK)); return REMOVE_SUCCESS; } @@ -324,19 +298,12 @@ static target_ulong h_protect(CPUPPCState *env, sPAPREnvironment *spapr, } hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64); - while (!lock_hpte(hpte, HPTE_V_HVLOCK)) { - /* We have no real concurrency in qemu soft-emulation, so we - * will never actually have a contested lock */ - assert(0); - } v = ldq_p(hpte); r = ldq_p(hpte + (HASH_PTE_SIZE_64/2)); if ((v & HPTE_V_VALID) == 0 || ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) { - stq_p(hpte, v & ~HPTE_V_HVLOCK); - assert(!(ldq_p(hpte) & HPTE_V_HVLOCK)); return H_NOT_FOUND; } @@ -350,8 +317,7 @@ static target_ulong h_protect(CPUPPCState *env, sPAPREnvironment *spapr, ppc_tlb_invalidate_one(env, rb); stq_p(hpte + (HASH_PTE_SIZE_64/2), r); /* Don't need a memory barrier, due to qemu's global lock */ - stq_p(hpte, v & ~HPTE_V_HVLOCK); - assert(!(ldq_p(hpte) & HPTE_V_HVLOCK)); + stq_p(hpte, v); return H_SUCCESS; } @@ -544,6 +510,8 @@ static target_ulong h_cede(CPUPPCState *env, sPAPREnvironment *spapr, hreg_compute_hflags(env); if (!cpu_has_work(env)) { env->halted = 1; + env->exception_index = EXCP_HLT; + env->exit_request = 1; } return H_SUCCESS; } @@ -713,11 +681,6 @@ void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn) target_ulong spapr_hypercall(CPUPPCState *env, target_ulong opcode, target_ulong *args) { - if (msr_pr) { - hcall_dprintf("Hypercall made with MSR[PR]=1\n"); - return H_PRIVILEGE; - } - if ((opcode <= MAX_HCALL_OPCODE) && ((opcode & 0x3) == 0)) { spapr_hcall_fn fn = papr_hypercall_table[opcode / 4]; diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c index 53b731773a..38034c07bd 100644 --- a/hw/spapr_iommu.c +++ b/hw/spapr_iommu.c @@ -42,6 +42,7 @@ struct sPAPRTCETable { uint32_t liobn; uint32_t window_size; sPAPRTCE *table; + bool bypass; int fd; QLIST_ENTRY(sPAPRTCETable) list; }; @@ -78,6 +79,12 @@ static int spapr_tce_translate(DMAContext *dma, DMA_ADDR_FMT "\n", tcet->liobn, addr); #endif + if (tcet->bypass) { + *paddr = addr; + *len = (target_phys_addr_t)-1; + return 0; + } + /* Check if we are in bound */ if (addr >= tcet->window_size) { #ifdef DEBUG_TCE @@ -162,6 +169,23 @@ void spapr_tce_free(DMAContext *dma) } } +void spapr_tce_set_bypass(DMAContext *dma, bool bypass) +{ + sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma); + + tcet->bypass = bypass; +} + +void spapr_tce_reset(DMAContext *dma) +{ + sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma); + size_t table_size = (tcet->window_size >> SPAPR_TCE_PAGE_SHIFT) + * sizeof(sPAPRTCE); + + tcet->bypass = false; + memset(tcet->table, 0, table_size); +} + static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba, target_ulong tce) { diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c index 661c05bc30..b628f89a02 100644 --- a/hw/spapr_pci.c +++ b/hw/spapr_pci.c @@ -351,7 +351,7 @@ static void rtas_ibm_change_msi(sPAPREnvironment *spapr, /* There is no cached config, allocate MSIs */ if (!phb->msi_table[ndev].nvec) { - irq = spapr_allocate_irq_block(req_num, XICS_MSI); + irq = spapr_allocate_irq_block(req_num, true); if (irq < 0) { fprintf(stderr, "Cannot allocate MSIs for device#%d", ndev); rtas_st(rets, 0, -1); /* Hardware error */ @@ -595,6 +595,15 @@ static int spapr_phb_init(SysBusDevice *s) return 0; } +static void spapr_phb_reset(DeviceState *qdev) +{ + SysBusDevice *s = sysbus_from_qdev(qdev); + sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s); + + /* Reset the IOMMU state */ + spapr_tce_reset(sphb->dma); +} + static Property spapr_phb_properties[] = { DEFINE_PROP_HEX64("buid", sPAPRPHBState, buid, 0), DEFINE_PROP_STRING("busname", sPAPRPHBState, busname), @@ -613,6 +622,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) sdc->init = spapr_phb_init; dc->props = spapr_phb_properties; + dc->reset = spapr_phb_reset; } static const TypeInfo spapr_phb_info = { diff --git a/hw/spapr_rtas.c b/hw/spapr_rtas.c index ae18595150..b808f80017 100644 --- a/hw/spapr_rtas.c +++ b/hw/spapr_rtas.c @@ -184,6 +184,11 @@ static void rtas_start_cpu(sPAPREnvironment *spapr, return; } + /* This will make sure qemu state is up to date with kvm, and + * mark it dirty so our changes get flushed back before the + * new cpu enters */ + kvm_cpu_synchronize_state(env); + env->msr = (1ULL << MSR_SF) | (1ULL << MSR_ME); env->nip = start; env->gpr[3] = r3; diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c index 7ca445216d..848806d3f1 100644 --- a/hw/spapr_vio.c +++ b/hw/spapr_vio.c @@ -316,17 +316,10 @@ int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq) static void spapr_vio_quiesce_one(VIOsPAPRDevice *dev) { - VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); - uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg; - if (dev->dma) { - spapr_tce_free(dev->dma); + spapr_tce_reset(dev->dma); } - dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size); - - dev->crq.qladdr = 0; - dev->crq.qsize = 0; - dev->crq.qnext = 0; + free_crq(dev); } static void rtas_set_tce_bypass(sPAPREnvironment *spapr, uint32_t token, @@ -348,16 +341,14 @@ static void rtas_set_tce_bypass(sPAPREnvironment *spapr, uint32_t token, rtas_st(rets, 0, -3); return; } - if (enable) { - spapr_tce_free(dev->dma); - dev->dma = NULL; - } else { - VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); - uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg; - dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size); + if (!dev->dma) { + rtas_st(rets, 0, -3); + return; } + spapr_tce_set_bypass(dev->dma, !!enable); + rtas_st(rets, 0, 0); } @@ -409,9 +400,10 @@ static void spapr_vio_busdev_reset(DeviceState *qdev) VIOsPAPRDevice *dev = DO_UPCAST(VIOsPAPRDevice, qdev, qdev); VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); - if (dev->crq.qsize) { - free_crq(dev); - } + /* Shut down the request queue and TCEs if necessary */ + spapr_vio_quiesce_one(dev); + + dev->signal_state = 0; if (pc->reset) { pc->reset(dev); @@ -422,7 +414,6 @@ static int spapr_vio_busdev_init(DeviceState *qdev) { VIOsPAPRDevice *dev = (VIOsPAPRDevice *)qdev; VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); - uint32_t liobn; char *id; if (dev->reg != -1) { @@ -464,8 +455,10 @@ static int spapr_vio_busdev_init(DeviceState *qdev) return -1; } - liobn = SPAPR_VIO_BASE_LIOBN | dev->reg; - dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size); + if (pc->rtce_window_size) { + uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg; + dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size); + } return pc->init(dev); } diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h index ea6aa43e26..cc85d26101 100644 --- a/hw/spapr_vio.h +++ b/hw/spapr_vio.h @@ -60,7 +60,6 @@ typedef struct VIOsPAPRDeviceClass { struct VIOsPAPRDevice { DeviceState qdev; uint32_t reg; - uint32_t flags; uint32_t irq; target_ulong signal_state; VIOsPAPR_CRQ crq; @@ -132,7 +131,6 @@ void spapr_vscsi_create(VIOsPAPRBus *bus); VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus); -int spapr_tce_set_bypass(uint32_t unit, uint32_t enable); void spapr_vio_quiesce(void); #endif /* _HW_SPAPR_VIO_H */ diff --git a/hw/xics.c b/hw/xics.c index b674771dc4..ce88aa750b 100644 --- a/hw/xics.c +++ b/hw/xics.c @@ -165,11 +165,12 @@ struct ics_irq_state { int server; uint8_t priority; uint8_t saved_priority; - enum xics_irq_type type; - int asserted:1; - int sent:1; - int rejected:1; - int masked_pending:1; +#define XICS_STATUS_ASSERTED 0x1 +#define XICS_STATUS_SENT 0x2 +#define XICS_STATUS_REJECTED 0x4 +#define XICS_STATUS_MASKED_PENDING 0x8 + uint8_t status; + bool lsi; }; struct ics_state { @@ -191,8 +192,8 @@ static void resend_msi(struct ics_state *ics, int srcno) struct ics_irq_state *irq = ics->irqs + srcno; /* FIXME: filter by server#? */ - if (irq->rejected) { - irq->rejected = 0; + if (irq->status & XICS_STATUS_REJECTED) { + irq->status &= ~XICS_STATUS_REJECTED; if (irq->priority != 0xff) { icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority); @@ -204,8 +205,10 @@ static void resend_lsi(struct ics_state *ics, int srcno) { struct ics_irq_state *irq = ics->irqs + srcno; - if ((irq->priority != 0xff) && irq->asserted && !irq->sent) { - irq->sent = 1; + if ((irq->priority != 0xff) + && (irq->status & XICS_STATUS_ASSERTED) + && !(irq->status & XICS_STATUS_SENT)) { + irq->status |= XICS_STATUS_SENT; icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority); } } @@ -216,7 +219,7 @@ static void set_irq_msi(struct ics_state *ics, int srcno, int val) if (val) { if (irq->priority == 0xff) { - irq->masked_pending = 1; + irq->status |= XICS_STATUS_MASKED_PENDING; /* masked pending */ ; } else { icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority); @@ -228,7 +231,11 @@ static void set_irq_lsi(struct ics_state *ics, int srcno, int val) { struct ics_irq_state *irq = ics->irqs + srcno; - irq->asserted = val; + if (val) { + irq->status |= XICS_STATUS_ASSERTED; + } else { + irq->status &= ~XICS_STATUS_ASSERTED; + } resend_lsi(ics, srcno); } @@ -237,7 +244,7 @@ static void ics_set_irq(void *opaque, int srcno, int val) struct ics_state *ics = (struct ics_state *)opaque; struct ics_irq_state *irq = ics->irqs + srcno; - if (irq->type == XICS_LSI) { + if (irq->lsi) { set_irq_lsi(ics, srcno, val); } else { set_irq_msi(ics, srcno, val); @@ -248,11 +255,12 @@ static void write_xive_msi(struct ics_state *ics, int srcno) { struct ics_irq_state *irq = ics->irqs + srcno; - if (!irq->masked_pending || (irq->priority == 0xff)) { + if (!(irq->status & XICS_STATUS_MASKED_PENDING) + || (irq->priority == 0xff)) { return; } - irq->masked_pending = 0; + irq->status &= ~XICS_STATUS_MASKED_PENDING; icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority); } @@ -262,15 +270,16 @@ static void write_xive_lsi(struct ics_state *ics, int srcno) } static void ics_write_xive(struct ics_state *ics, int nr, int server, - uint8_t priority) + uint8_t priority, uint8_t saved_priority) { int srcno = nr - ics->offset; struct ics_irq_state *irq = ics->irqs + srcno; irq->server = server; irq->priority = priority; + irq->saved_priority = saved_priority; - if (irq->type == XICS_LSI) { + if (irq->lsi) { write_xive_lsi(ics, srcno); } else { write_xive_msi(ics, srcno); @@ -281,8 +290,8 @@ static void ics_reject(struct ics_state *ics, int nr) { struct ics_irq_state *irq = ics->irqs + nr - ics->offset; - irq->rejected = 1; /* Irrelevant but harmless for LSI */ - irq->sent = 0; /* Irrelevant but harmless for MSI */ + irq->status |= XICS_STATUS_REJECTED; /* Irrelevant but harmless for LSI */ + irq->status &= ~XICS_STATUS_SENT; /* Irrelevant but harmless for MSI */ } static void ics_resend(struct ics_state *ics) @@ -293,7 +302,7 @@ static void ics_resend(struct ics_state *ics) struct ics_irq_state *irq = ics->irqs + i; /* FIXME: filter by server#? */ - if (irq->type == XICS_LSI) { + if (irq->lsi) { resend_lsi(ics, i); } else { resend_msi(ics, i); @@ -306,8 +315,8 @@ static void ics_eoi(struct ics_state *ics, int nr) int srcno = nr - ics->offset; struct ics_irq_state *irq = ics->irqs + srcno; - if (irq->type == XICS_LSI) { - irq->sent = 0; + if (irq->lsi) { + irq->status &= ~XICS_STATUS_SENT; } } @@ -325,14 +334,12 @@ qemu_irq xics_get_qirq(struct icp_state *icp, int irq) return icp->ics->qirqs[irq - icp->ics->offset]; } -void xics_set_irq_type(struct icp_state *icp, int irq, - enum xics_irq_type type) +void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi) { assert((irq >= icp->ics->offset) && (irq < (icp->ics->offset + icp->ics->nr_irqs))); - assert((type == XICS_MSI) || (type == XICS_LSI)); - icp->ics->irqs[irq - icp->ics->offset].type = type; + icp->ics->irqs[irq - icp->ics->offset].lsi = lsi; } static target_ulong h_cppr(CPUPPCState *env, sPAPREnvironment *spapr, @@ -399,7 +406,7 @@ static void rtas_set_xive(sPAPREnvironment *spapr, uint32_t token, return; } - ics_write_xive(ics, nr, server, priority); + ics_write_xive(ics, nr, server, priority, priority); rtas_st(rets, 0, 0); /* Success */ } @@ -447,14 +454,8 @@ static void rtas_int_off(sPAPREnvironment *spapr, uint32_t token, return; } - /* This is a NOP for now, since the described PAPR semantics don't - * seem to gel with what Linux does */ -#if 0 - struct ics_irq_state *irq = xics->irqs + (nr - xics->offset); - - irq->saved_priority = irq->priority; - ics_write_xive_msi(xics, nr, irq->server, 0xff); -#endif + ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server, 0xff, + ics->irqs[nr - ics->offset].priority); rtas_st(rets, 0, 0); /* Success */ } @@ -478,22 +479,40 @@ static void rtas_int_on(sPAPREnvironment *spapr, uint32_t token, return; } - /* This is a NOP for now, since the described PAPR semantics don't - * seem to gel with what Linux does */ -#if 0 - struct ics_irq_state *irq = xics->irqs + (nr - xics->offset); - - ics_write_xive_msi(xics, nr, irq->server, irq->saved_priority); -#endif + ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server, + ics->irqs[nr - ics->offset].saved_priority, + ics->irqs[nr - ics->offset].saved_priority); rtas_st(rets, 0, 0); /* Success */ } +static void xics_reset(void *opaque) +{ + struct icp_state *icp = (struct icp_state *)opaque; + struct ics_state *ics = icp->ics; + int i; + + for (i = 0; i < icp->nr_servers; i++) { + icp->ss[i].xirr = 0; + icp->ss[i].pending_priority = 0; + icp->ss[i].mfrr = 0xff; + /* Make all outputs are deasserted */ + qemu_set_irq(icp->ss[i].output, 0); + } + + for (i = 0; i < ics->nr_irqs; i++) { + /* Reset everything *except* the type */ + ics->irqs[i].server = 0; + ics->irqs[i].status = 0; + ics->irqs[i].priority = 0xff; + ics->irqs[i].saved_priority = 0xff; + } +} + struct icp_state *xics_system_init(int nr_irqs) { CPUPPCState *env; int max_server_num; - int i; struct icp_state *icp; struct ics_state *ics; @@ -508,10 +527,6 @@ struct icp_state *xics_system_init(int nr_irqs) icp->nr_servers = max_server_num + 1; icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state)); - for (i = 0; i < icp->nr_servers; i++) { - icp->ss[i].mfrr = 0xff; - } - for (env = first_cpu; env != NULL; env = env->next_cpu) { struct icp_server_state *ss = &icp->ss[env->cpu_index]; @@ -539,11 +554,6 @@ struct icp_state *xics_system_init(int nr_irqs) icp->ics = ics; ics->icp = icp; - for (i = 0; i < nr_irqs; i++) { - ics->irqs[i].priority = 0xff; - ics->irqs[i].saved_priority = 0xff; - } - ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, nr_irqs); spapr_register_hypercall(H_CPPR, h_cppr); @@ -556,5 +566,7 @@ struct icp_state *xics_system_init(int nr_irqs) spapr_rtas_register("ibm,int-off", rtas_int_off); spapr_rtas_register("ibm,int-on", rtas_int_on); + qemu_register_reset(xics_reset, icp); + return icp; } diff --git a/hw/xics.h b/hw/xics.h index 99b96ac85a..6817268697 100644 --- a/hw/xics.h +++ b/hw/xics.h @@ -31,14 +31,8 @@ struct icp_state; -enum xics_irq_type { - XICS_MSI, /* Message-signalled (edge) interrupt */ - XICS_LSI, /* Level-signalled interrupt */ -}; - qemu_irq xics_get_qirq(struct icp_state *icp, int irq); -void xics_set_irq_type(struct icp_state *icp, int irq, - enum xics_irq_type type); +void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi); struct icp_state *xics_system_init(int nr_irqs); diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index ca2fc2198e..faf4404078 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -1079,7 +1079,6 @@ struct CPUPPCState { int mmu_idx; /* precomputed MMU index to speed up mem accesses */ /* Power management */ - int power_mode; int (*check_pow)(CPUPPCState *env); #if !defined(CONFIG_USER_ONLY) diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index f638b2a07c..f39b4f682a 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -287,23 +287,6 @@ target_ulong helper_602_mfrom(target_ulong arg) for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--) #endif -/* If X is a NaN, store the corresponding QNaN into RESULT. Otherwise, - * execute the following block. */ -#define DO_HANDLE_NAN(result, x) \ - if (float32_is_any_nan(x)) { \ - CPU_FloatU __f; \ - __f.f = x; \ - __f.l = __f.l | (1 << 22); /* Set QNaN bit. */ \ - result = __f.f; \ - } else - -#define HANDLE_NAN1(result, x) \ - DO_HANDLE_NAN(result, x) -#define HANDLE_NAN2(result, x, y) \ - DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y) -#define HANDLE_NAN3(result, x, y, z) \ - DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y) DO_HANDLE_NAN(result, z) - /* Saturating arithmetic helpers. */ #define SATCVT(from, to, from_type, to_type, min, max) \ static inline to_type cvt##from##to(from_type x, int *sat) \ @@ -409,15 +392,29 @@ VARITH(uwm, u32) int i; \ \ for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ - HANDLE_NAN2(r->f[i], a->f[i], b->f[i]) { \ - r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \ - } \ + r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \ } \ } VARITHFP(addfp, float32_add) VARITHFP(subfp, float32_sub) +VARITHFP(minfp, float32_min) +VARITHFP(maxfp, float32_max) #undef VARITHFP +#define VARITHFPFMA(suffix, type) \ + void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ + ppc_avr_t *b, ppc_avr_t *c) \ + { \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ + r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \ + type, &env->vec_status); \ + } \ + } +VARITHFPFMA(maddfp, 0); +VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); +#undef VARITHFPFMA + #define VARITHSAT_CASE(type, op, cvt, element) \ { \ type result = (type)a->element[i] op (type)b->element[i]; \ @@ -649,27 +646,6 @@ VCT(uxs, cvtsduw, u32) VCT(sxs, cvtsdsw, s32) #undef VCT -void helper_vmaddfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, - ppc_avr_t *c) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(r->f); i++) { - HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) { - /* Need to do the computation in higher precision and round - * once at the end. */ - float64 af, bf, cf, t; - - af = float32_to_float64(a->f[i], &env->vec_status); - bf = float32_to_float64(b->f[i], &env->vec_status); - cf = float32_to_float64(c->f[i], &env->vec_status); - t = float64_mul(af, cf, &env->vec_status); - t = float64_add(t, bf, &env->vec_status); - r->f[i] = float64_to_float32(t, &env->vec_status); - } - } -} - void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { @@ -730,27 +706,6 @@ VMINMAX(uw, u32) #undef VMINMAX_DO #undef VMINMAX -#define VMINMAXFP(suffix, rT, rF) \ - void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ - ppc_avr_t *b) \ - { \ - int i; \ - \ - for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ - HANDLE_NAN2(r->f[i], a->f[i], b->f[i]) { \ - if (float32_lt_quiet(a->f[i], b->f[i], \ - &env->vec_status)) { \ - r->f[i] = rT->f[i]; \ - } else { \ - r->f[i] = rF->f[i]; \ - } \ - } \ - } \ - } -VMINMAXFP(minfp, a, b) -VMINMAXFP(maxfp, b, a) -#undef VMINMAXFP - void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { int i; @@ -930,28 +885,6 @@ VMUL(uh, u16, u32) #undef VMUL_DO #undef VMUL -void helper_vnmsubfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, - ppc_avr_t *b, ppc_avr_t *c) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(r->f); i++) { - HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) { - /* Need to do the computation is higher precision and round - * once at the end. */ - float64 af, bf, cf, t; - - af = float32_to_float64(a->f[i], &env->vec_status); - bf = float32_to_float64(b->f[i], &env->vec_status); - cf = float32_to_float64(c->f[i], &env->vec_status); - t = float64_mul(af, cf, &env->vec_status); - t = float64_sub(t, bf, &env->vec_status); - t = float64_chs(t); - r->f[i] = float64_to_float32(t, &env->vec_status); - } - } -} - void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { @@ -1039,9 +972,7 @@ void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) int i; for (i = 0; i < ARRAY_SIZE(r->f); i++) { - HANDLE_NAN1(r->f[i], b->f[i]) { - r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status); - } + r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status); } } @@ -1054,9 +985,7 @@ void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) \ set_float_rounding_mode(rounding, &s); \ for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ - HANDLE_NAN1(r->f[i], b->f[i]) { \ - r->f[i] = float32_round_to_int (b->f[i], &s); \ - } \ + r->f[i] = float32_round_to_int (b->f[i], &s); \ } \ } VRFI(n, float_round_nearest_even) @@ -1089,11 +1018,9 @@ void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) int i; for (i = 0; i < ARRAY_SIZE(r->f); i++) { - HANDLE_NAN1(r->f[i], b->f[i]) { - float32 t = float32_sqrt(b->f[i], &env->vec_status); + float32 t = float32_sqrt(b->f[i], &env->vec_status); - r->f[i] = float32_div(float32_one, t, &env->vec_status); - } + r->f[i] = float32_div(float32_one, t, &env->vec_status); } } @@ -1109,9 +1036,7 @@ void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) int i; for (i = 0; i < ARRAY_SIZE(r->f); i++) { - HANDLE_NAN1(r->f[i], b->f[i]) { - r->f[i] = float32_exp2(b->f[i], &env->vec_status); - } + r->f[i] = float32_exp2(b->f[i], &env->vec_status); } } @@ -1120,9 +1045,7 @@ void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) int i; for (i = 0; i < ARRAY_SIZE(r->f); i++) { - HANDLE_NAN1(r->f[i], b->f[i]) { - r->f[i] = float32_log2(b->f[i], &env->vec_status); - } + r->f[i] = float32_log2(b->f[i], &env->vec_status); } } @@ -1473,10 +1396,6 @@ VUPK(lsh, s32, s16, UPKLO) #undef UPKHI #undef UPKLO -#undef DO_HANDLE_NAN -#undef HANDLE_NAN1 -#undef HANDLE_NAN2 -#undef HANDLE_NAN3 #undef VECTOR_FOR_INORDER_I #undef HI_IDX #undef LO_IDX diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 7f6e4e0b87..5cbe98a164 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -60,6 +60,7 @@ static int cap_booke_sregs; static int cap_ppc_smt; static int cap_ppc_rma; static int cap_spapr_tce; +static int cap_hior; /* XXX We have a race condition where we actually have a level triggered * interrupt, but the infrastructure can't expose that yet, so the guest @@ -86,6 +87,7 @@ int kvm_arch_init(KVMState *s) cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT); cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); + cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); if (!cap_interrupt_level) { fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " @@ -469,6 +471,54 @@ int kvm_arch_put_registers(CPUPPCState *env, int level) env->tlb_dirty = false; } + if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { + struct kvm_sregs sregs; + + sregs.pvr = env->spr[SPR_PVR]; + + sregs.u.s.sdr1 = env->spr[SPR_SDR1]; + + /* Sync SLB */ +#ifdef TARGET_PPC64 + for (i = 0; i < 64; i++) { + sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; + sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; + } +#endif + + /* Sync SRs */ + for (i = 0; i < 16; i++) { + sregs.u.s.ppc32.sr[i] = env->sr[i]; + } + + /* Sync BATs */ + for (i = 0; i < 8; i++) { + /* Beware. We have to swap upper and lower bits here */ + sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) + | env->DBAT[1][i]; + sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) + | env->IBAT[1][i]; + } + + ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs); + if (ret) { + return ret; + } + } + + if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { + uint64_t hior = env->spr[SPR_HIOR]; + struct kvm_one_reg reg = { + .id = KVM_REG_PPC_HIOR, + .addr = (uintptr_t) &hior, + }; + + ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, ®); + if (ret) { + return ret; + } + } + return ret; } @@ -946,52 +996,14 @@ int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) void kvmppc_set_papr(CPUPPCState *env) { struct kvm_enable_cap cap = {}; - struct kvm_one_reg reg = {}; - struct kvm_sregs sregs = {}; int ret; - uint64_t hior = env->spr[SPR_HIOR]; cap.cap = KVM_CAP_PPC_PAPR; ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap); if (ret) { - goto fail; + cpu_abort(env, "This KVM version does not support PAPR\n"); } - - /* - * XXX We set HIOR here. It really should be a qdev property of - * the CPU node, but we don't have CPUs converted to qdev yet. - * - * Once we have qdev CPUs, move HIOR to a qdev property and - * remove this chunk. - */ - reg.id = KVM_REG_PPC_HIOR; - reg.addr = (uintptr_t)&hior; - ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, ®); - if (ret) { - fprintf(stderr, "Couldn't set HIOR. Maybe you're running an old \n" - "kernel with support for HV KVM but no PAPR PR \n" - "KVM in which case things will work. If they don't \n" - "please update your host kernel!\n"); - } - - /* Set SDR1 so kernel space finds the HTAB */ - ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs); - if (ret) { - goto fail; - } - - sregs.u.s.sdr1 = env->spr[SPR_SDR1]; - - ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs); - if (ret) { - goto fail; - } - - return; - -fail: - cpu_abort(env, "This KVM version does not support PAPR\n"); } int kvmppc_smt_threads(void) @@ -999,6 +1011,7 @@ int kvmppc_smt_threads(void) return cap_ppc_smt ? cap_ppc_smt : 1; } +#ifdef TARGET_PPC64 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem) { void *rma; @@ -1042,6 +1055,16 @@ off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem) return size; } +uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) +{ + if (cap_ppc_rma >= 2) { + return current_size; + } + return MIN(current_size, + getrampagesize() << (hash_shift - 7)); +} +#endif + void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd) { struct kvm_create_spapr_tce args = { @@ -1101,6 +1124,44 @@ int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size) return 0; } +int kvmppc_reset_htab(int shift_hint) +{ + uint32_t shift = shift_hint; + + if (!kvm_enabled()) { + /* Full emulation, tell caller to allocate htab itself */ + return 0; + } + if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { + int ret; + ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); + if (ret == -ENOTTY) { + /* At least some versions of PR KVM advertise the + * capability, but don't implement the ioctl(). Oops. + * Return 0 so that we allocate the htab in qemu, as is + * correct for PR. */ + return 0; + } else if (ret < 0) { + return ret; + } + return shift; + } + + /* We have a kernel that predates the htab reset calls. For PR + * KVM, we need to allocate the htab ourselves, for an HV KVM of + * this era, it has allocated a 16MB fixed size hash table + * already. Kernels of this era have the GET_PVINFO capability + * only on PR, so we use this hack to determine the right + * answer */ + if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) { + /* PR - tell caller to allocate htab */ + return 0; + } else { + /* HV - assume 16MB kernel allocated htab */ + return 24; + } +} + static inline uint32_t mfpvr(void) { uint32_t pvr; diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h index e2f8703853..baad6eb75b 100644 --- a/target-ppc/kvm_ppc.h +++ b/target-ppc/kvm_ppc.h @@ -27,6 +27,8 @@ int kvmppc_smt_threads(void); off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem); void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd); int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size); +int kvmppc_reset_htab(int shift_hint); +uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift); #endif /* !CONFIG_USER_ONLY */ const ppc_def_t *kvmppc_host_cpu_def(void); int kvmppc_fixup_cpu(CPUPPCState *env); @@ -94,6 +96,23 @@ static inline int kvmppc_remove_spapr_tce(void *table, int pfd, { return -1; } + +static inline int kvmppc_reset_htab(int shift_hint) +{ + return -1; +} + +static inline uint64_t kvmppc_rma_size(uint64_t current_size, + unsigned int hash_shift) +{ + return ram_size; +} + +static inline int kvmppc_update_sdr1(CPUPPCState *env) +{ + return 0; +} + #endif /* !CONFIG_USER_ONLY */ static inline const ppc_def_t *kvmppc_host_cpu_def(void) diff --git a/target-ppc/machine.c b/target-ppc/machine.c index d6c2ee41b3..21ce7575e3 100644 --- a/target-ppc/machine.c +++ b/target-ppc/machine.c @@ -82,7 +82,7 @@ void cpu_save(QEMUFile *f, void *opaque) qemu_put_betls(f, &env->hflags); qemu_put_betls(f, &env->hflags_nmsr); qemu_put_sbe32s(f, &env->mmu_idx); - qemu_put_sbe32s(f, &env->power_mode); + qemu_put_sbe32(f, 0); } int cpu_load(QEMUFile *f, void *opaque, int version_id) @@ -167,7 +167,7 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) qemu_get_betls(f, &env->hflags); qemu_get_betls(f, &env->hflags_nmsr); qemu_get_sbe32s(f, &env->mmu_idx); - qemu_get_sbe32s(f, &env->power_mode); + qemu_get_sbe32(f); /* Discard unused power_mode */ return 0; } diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index fba2b42427..a972287035 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -10423,6 +10423,14 @@ static void ppc_cpu_reset(CPUState *s) env->pending_interrupts = 0; env->exception_index = POWERPC_EXCP_NONE; env->error_code = 0; + +#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) + env->vpa = 0; + env->slb_shadow = 0; + env->dispatch_trace_log = 0; + env->dtl_size = 0; +#endif /* TARGET_PPC64 */ + /* Flush all TLBs */ tlb_flush(env, 1); }