Merge branch 'ppc-for-upstream' of git://repo.or.cz/qemu/agraf

* 'ppc-for-upstream' of git://repo.or.cz/qemu/agraf: (35 commits) PPC: KVM: Fix BAT put PPC: e500: Only expose even TLB sizes in initial TLB ppc/pseries: Reset VPA registration on CPU reset pseries: Don't test for MSR_PR for hypercalls under KVM PPC: e500: calculate initrd_base like dt_base PPC: e500: increase DTC_LOAD_PAD device tree: simplify dumpdtb code fdt: move dumpdtb interpretation code to device_tree.c target-ppc: Remove unused power_mode field from cpu state pseries: Set hash table size based on RAM size pseries: Remove unnecessary locking from PAPR hash table hcalls ppc405_uc: Fix buffer overflow target-ppc: KVM: Fix some kernel version edge cases for kvmppc_reset_htab() pseries: Fix semantics of RTAS int-on, int-off and set-xive functions pseries: Rework implementation of TCE bypass pseries: Remove never used flags field from spapr vio devices pseries: Remove XICS irq type enum type pseries: Remove C bitfields from xics code pseries: Small cleanup to H_CEDE implementation pseries: Fix XICS reset ...
2012-10-06 18:51:36 +02:00 · 2012-10-06 18:51:36 +02:00 · 6b2f90fbbd
parent 1d31fca470 ef8beb0e94
commit 6b2f90fbbd
21 changed files with 576 additions and 437 deletions
--- a/50
+++ b/50
@ -349,9 +349,31 @@ PowerPC Machines
 405
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
-S: Maintained
+S: Odd Fixes
 F: hw/ppc405_boards.c

+Bamboo
+M: Alexander Graf <agraf@suse.de>
+L: qemu-ppc@nongnu.org
+S: Odd Fixes
+F: hw/ppc440_bamboo.c
+
+e500
+M: Alexander Graf <agraf@suse.de>
+M: Scott Wood <scottwood@freescale.com>
+L: qemu-ppc@nongnu.org
+S: Supported
+F: hw/ppc/e500.[hc]
+F: hw/ppc/e500plat.c
+
+mpc8544ds
+M: Alexander Graf <agraf@suse.de>
+M: Scott Wood <scottwood@freescale.com>
+L: qemu-ppc@nongnu.org
+S: Supported
+F: hw/ppc/mpc8544ds.c
+F: hw/mpc8544_guts.c
+
 New World
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
@ -374,6 +396,19 @@ S: Odd Fixes
 F: hw/ppc_prep.c
 F: hw/prep_pci.[hc]

+sPAPR
+M: David Gibson <david@gibson.dropbear.id.au>
+M: Alexander Graf <agraf@suse.de>
+L: qemu-ppc@nongnu.org
+S: Supported
+F: hw/spapr*
+
+virtex_ml507
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
+L: qemu-ppc@nongnu.org
+S: Odd Fixes
+F: hw/virtex_ml507.c
+
 SH4 Machines
 ------------
 R2D
@ -457,6 +492,19 @@ S: Supported
 F: hw/pci*
 F: hw/piix*

+ppc4xx
+M: Alexander Graf <agraf@suse.de>
+L: qemu-ppc@nongnu.org
+S: Odd Fixes
+F: hw/ppc4xx*.[hc]
+
+ppce500
+M: Alexander Graf <agraf@suse.de>
+M: Scott Wood <scottwood@freescale.com>
+L: qemu-ppc@nongnu.org
+S: Supported
+F: hw/ppce500_*
+
 SCSI
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Supported
--- a/device_tree.c
+++ b/device_tree.c
@ -304,3 +304,18 @@ int qemu_devtree_add_subnode(void *fdt, const char *name)
    g_free(dupname);
    return retval;
 }
+
+void qemu_devtree_dumpdtb(void *fdt, int size)
+{
+    QemuOpts *machine_opts;
+
+    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
+    if (machine_opts) {
+        const char *dumpdtb = qemu_opt_get(machine_opts, "dumpdtb");
+        if (dumpdtb) {
+            /* Dump the dtb to a file and quit */
+            exit(g_file_set_contents(dumpdtb, fdt, size, NULL) ? 0 : 1);
+        }
+    }
+
+}
--- a/device_tree.h
+++ b/device_tree.h
@ -49,4 +49,6 @@ int qemu_devtree_add_subnode(void *fdt, const char *name);
                             sizeof(qdt_tmp));                                \
    } while (0)

+void qemu_devtree_dumpdtb(void *fdt, int size);
+
 #endif /* __DEVICE_TREE_H__ */
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@ -36,7 +36,7 @@

 #define BINARY_DEVICE_TREE_FILE    "mpc8544ds.dtb"
 #define UIMAGE_LOAD_BASE           0
-#define DTC_LOAD_PAD               0x500000
+#define DTC_LOAD_PAD               0x1800000
 #define DTC_PAD_MASK               0xFFFFF
 #define INITRD_LOAD_PAD            0x2000000
 #define INITRD_PAD_MASK            0xFFFFFF
@ -139,12 +139,10 @@ static int ppce500_load_device_tree(CPUPPCState *env,
            0x0, 0x10000,
        };
    QemuOpts *machine_opts;
-    const char *dumpdtb = NULL;
    const char *dtb_file = NULL;

    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
    if (machine_opts) {
-        dumpdtb = qemu_opt_get(machine_opts, "dumpdtb");
        dtb_file = qemu_opt_get(machine_opts, "dtb");
        toplevel_compat = qemu_opt_get(machine_opts, "dt_compatible");
    }
@ -334,18 +332,7 @@ static int ppce500_load_device_tree(CPUPPCState *env,
    }

 done:
-    if (dumpdtb) {
-        /* Dump the dtb to a file and quit */
-        FILE *f = fopen(dumpdtb, "wb");
-        size_t len;
-        len = fwrite(fdt, fdt_size, 1, f);
-        fclose(f);
-        if (len != fdt_size) {
-            exit(1);
-        }
-        exit(0);
-    }
-
+    qemu_devtree_dumpdtb(fdt, fdt_size);
    ret = rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr);
    if (ret < 0) {
        goto out;
@ -375,6 +362,10 @@ static void mmubooke_create_initial_mapping(CPUPPCState *env)
       the device tree top */
    dt_end = bi->dt_base + bi->dt_size;
    ps = booke206_page_size_to_tlb(dt_end) + 1;
+    if (ps & 1) {
+        /* e500v2 can only do even TLB size bits */
+        ps++;
+    }
    size = (ps << MAS1_TSIZE_SHIFT);
    tlb->mas1 = MAS1_VALID | size;
    tlb->mas2 = 0;
@ -553,7 +544,8 @@ void ppce500_init(PPCE500Params *params)

    /* Load initrd. */
    if (params->initrd_filename) {
-        initrd_base = (kernel_size + INITRD_LOAD_PAD) & ~INITRD_PAD_MASK;
+        initrd_base = (loadaddr + kernel_size + INITRD_LOAD_PAD) &
+            ~INITRD_PAD_MASK;
        initrd_size = load_image_targphys(params->initrd_filename, initrd_base,
                                          ram_size - initrd_base);

--- a/hw/ppc405_uc.c
+++ b/hw/ppc405_uc.c
@ -191,7 +191,8 @@ enum {
 typedef struct ppc4xx_pob_t ppc4xx_pob_t;
 struct ppc4xx_pob_t {
    uint32_t bear;
-    uint32_t besr[2];
+    uint32_t besr0;
+    uint32_t besr1;
 };

 static uint32_t dcr_read_pob (void *opaque, int dcrn)
@ -205,8 +206,10 @@ static uint32_t dcr_read_pob (void *opaque, int dcrn)
        ret = pob->bear;
        break;
    case POB0_BESR0:
+        ret = pob->besr0;
+        break;
    case POB0_BESR1:
-        ret = pob->besr[dcrn - POB0_BESR0];
+        ret = pob->besr1;
        break;
    default:
        /* Avoid gcc warning */
@ -227,9 +230,12 @@ static void dcr_write_pob (void *opaque, int dcrn, uint32_t val)
        /* Read only */
        break;
    case POB0_BESR0:
+        /* Write-clear */
+        pob->besr0 &= ~val;
+        break;
    case POB0_BESR1:
        /* Write-clear */
-        pob->besr[dcrn - POB0_BESR0] &= ~val;
+        pob->besr1 &= ~val;
        break;
    }
 }
@ -241,8 +247,8 @@ static void ppc4xx_pob_reset (void *opaque)
    pob = opaque;
    /* No error */
    pob->bear = 0x00000000;
-    pob->besr[0] = 0x0000000;
-    pob->besr[1] = 0x0000000;
+    pob->besr0 = 0x0000000;
+    pob->besr1 = 0x0000000;
 }

 static void ppc4xx_pob_init(CPUPPCState *env)
--- a/hw/spapr.c
+++ b/hw/spapr.c
@ -84,9 +84,11 @@

 #define PHANDLE_XICP            0x00001111

+#define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))
+
 sPAPREnvironment *spapr;

-int spapr_allocate_irq(int hint, enum xics_irq_type type)
+int spapr_allocate_irq(int hint, bool lsi)
 {
    int irq;

@ -102,13 +104,13 @@ int spapr_allocate_irq(int hint, enum xics_irq_type type)
        return 0;
    }

-    xics_set_irq_type(spapr->icp, irq, type);
+    xics_set_irq_type(spapr->icp, irq, lsi);

    return irq;
 }

 /* Allocate block of consequtive IRQs, returns a number of the first */
-int spapr_allocate_irq_block(int num, enum xics_irq_type type)
+int spapr_allocate_irq_block(int num, bool lsi)
 {
    int first = -1;
    int i;
@ -116,7 +118,7 @@ int spapr_allocate_irq_block(int num, enum xics_irq_type type)
    for (i = 0; i < num; ++i) {
        int irq;

-        irq = spapr_allocate_irq(0, type);
+        irq = spapr_allocate_irq(0, lsi);
        if (!irq) {
            return -1;
        }
@ -133,12 +135,13 @@ int spapr_allocate_irq_block(int num, enum xics_irq_type type)
    return first;
 }

-static int spapr_set_associativity(void *fdt, sPAPREnvironment *spapr)
+static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
 {
    int ret = 0, offset;
    CPUPPCState *env;
    char cpu_model[32];
    int smt = kvmppc_smt_threads();
+    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};

    assert(spapr->cpu_model);

@ -162,8 +165,16 @@ static int spapr_set_associativity(void *fdt, sPAPREnvironment *spapr)
            return offset;
        }

-        ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
-                          sizeof(associativity));
+        if (nb_numa_nodes > 1) {
+            ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
+                              sizeof(associativity));
+            if (ret < 0) {
+                return ret;
+            }
+        }
+
+        ret = fdt_setprop(fdt, offset, "ibm,pft-size",
+                          pft_size_prop, sizeof(pft_size_prop));
        if (ret < 0) {
            return ret;
        }
@ -205,36 +216,6 @@ static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
    return (p - prop) * sizeof(uint32_t);
 }

-static void *spapr_create_fdt_skel(const char *cpu_model,
-                                   target_phys_addr_t rma_size,
-                                   target_phys_addr_t initrd_base,
-                                   target_phys_addr_t initrd_size,
-                                   target_phys_addr_t kernel_size,
-                                   const char *boot_device,
-                                   const char *kernel_cmdline,
-                                   long hash_shift)
-{
-    void *fdt;
-    CPUPPCState *env;
-    uint64_t mem_reg_property[2];
-    uint32_t start_prop = cpu_to_be32(initrd_base);
-    uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
-    uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)};
-    char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
-        "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
-    char qemu_hypertas_prop[] = "hcall-memop1";
-    uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
-    int i;
-    char *modelname;
-    int smt = kvmppc_smt_threads();
-    unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
-    uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
-    uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
-                                cpu_to_be32(0x0), cpu_to_be32(0x0),
-                                cpu_to_be32(0x0)};
-    char mem_name[32];
-    target_phys_addr_t node0_size, mem_start;
-
 #define _FDT(exp) \
    do { \
        int ret = (exp);                                           \
@ -245,6 +226,27 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
        }                                                          \
    } while (0)

+
+static void *spapr_create_fdt_skel(const char *cpu_model,
+                                   target_phys_addr_t initrd_base,
+                                   target_phys_addr_t initrd_size,
+                                   target_phys_addr_t kernel_size,
+                                   const char *boot_device,
+                                   const char *kernel_cmdline)
+{
+    void *fdt;
+    CPUPPCState *env;
+    uint32_t start_prop = cpu_to_be32(initrd_base);
+    uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
+    char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
+        "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
+    char qemu_hypertas_prop[] = "hcall-memop1";
+    uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
+    uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
+    char *modelname;
+    int i, smt = kvmppc_smt_threads();
+    unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
+
    fdt = g_malloc0(FDT_MAX_SIZE);
    _FDT((fdt_create(fdt, FDT_MAX_SIZE)));

@ -288,55 +290,6 @@ static void *spapr_create_fdt_skel(const char *cpu_model,

    _FDT((fdt_end_node(fdt)));

-    /* memory node(s) */
-    node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
-    if (rma_size > node0_size) {
-        rma_size = node0_size;
-    }
-
-    /* RMA */
-    mem_reg_property[0] = 0;
-    mem_reg_property[1] = cpu_to_be64(rma_size);
-    _FDT((fdt_begin_node(fdt, "memory@0")));
-    _FDT((fdt_property_string(fdt, "device_type", "memory")));
-    _FDT((fdt_property(fdt, "reg", mem_reg_property,
-        sizeof(mem_reg_property))));
-    _FDT((fdt_property(fdt, "ibm,associativity", associativity,
-        sizeof(associativity))));
-    _FDT((fdt_end_node(fdt)));
-
-    /* RAM: Node 0 */
-    if (node0_size > rma_size) {
-        mem_reg_property[0] = cpu_to_be64(rma_size);
-        mem_reg_property[1] = cpu_to_be64(node0_size - rma_size);
-
-        sprintf(mem_name, "memory@" TARGET_FMT_lx, rma_size);
-        _FDT((fdt_begin_node(fdt, mem_name)));
-        _FDT((fdt_property_string(fdt, "device_type", "memory")));
-        _FDT((fdt_property(fdt, "reg", mem_reg_property,
-                           sizeof(mem_reg_property))));
-        _FDT((fdt_property(fdt, "ibm,associativity", associativity,
-                           sizeof(associativity))));
-        _FDT((fdt_end_node(fdt)));
-    }
-
-    /* RAM: Node 1 and beyond */
-    mem_start = node0_size;
-    for (i = 1; i < nb_numa_nodes; i++) {
-        mem_reg_property[0] = cpu_to_be64(mem_start);
-        mem_reg_property[1] = cpu_to_be64(node_mem[i]);
-        associativity[3] = associativity[4] = cpu_to_be32(i);
-        sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
-        _FDT((fdt_begin_node(fdt, mem_name)));
-        _FDT((fdt_property_string(fdt, "device_type", "memory")));
-        _FDT((fdt_property(fdt, "reg", mem_reg_property,
-            sizeof(mem_reg_property))));
-        _FDT((fdt_property(fdt, "ibm,associativity", associativity,
-            sizeof(associativity))));
-        _FDT((fdt_end_node(fdt)));
-        mem_start += node_mem[i];
-    }
-
    /* cpus */
    _FDT((fdt_begin_node(fdt, "cpus")));

@ -388,8 +341,6 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
        _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
        _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
        _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
-        _FDT((fdt_property(fdt, "ibm,pft-size",
-                           pft_size_prop, sizeof(pft_size_prop))));
        _FDT((fdt_property_string(fdt, "status", "okay")));
        _FDT((fdt_property(fdt, "64-bit", NULL, 0)));

@ -488,6 +439,68 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
    return fdt;
 }

+static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
+{
+    uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
+                                cpu_to_be32(0x0), cpu_to_be32(0x0),
+                                cpu_to_be32(0x0)};
+    char mem_name[32];
+    target_phys_addr_t node0_size, mem_start;
+    uint64_t mem_reg_property[2];
+    int i, off;
+
+    /* memory node(s) */
+    node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
+    if (spapr->rma_size > node0_size) {
+        spapr->rma_size = node0_size;
+    }
+
+    /* RMA */
+    mem_reg_property[0] = 0;
+    mem_reg_property[1] = cpu_to_be64(spapr->rma_size);
+    off = fdt_add_subnode(fdt, 0, "memory@0");
+    _FDT(off);
+    _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+    _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
+                      sizeof(mem_reg_property))));
+    _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
+                      sizeof(associativity))));
+
+    /* RAM: Node 0 */
+    if (node0_size > spapr->rma_size) {
+        mem_reg_property[0] = cpu_to_be64(spapr->rma_size);
+        mem_reg_property[1] = cpu_to_be64(node0_size - spapr->rma_size);
+
+        sprintf(mem_name, "memory@" TARGET_FMT_lx, spapr->rma_size);
+        off = fdt_add_subnode(fdt, 0, mem_name);
+        _FDT(off);
+        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+        _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
+                          sizeof(mem_reg_property))));
+        _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
+                          sizeof(associativity))));
+    }
+
+    /* RAM: Node 1 and beyond */
+    mem_start = node0_size;
+    for (i = 1; i < nb_numa_nodes; i++) {
+        mem_reg_property[0] = cpu_to_be64(mem_start);
+        mem_reg_property[1] = cpu_to_be64(node_mem[i]);
+        associativity[3] = associativity[4] = cpu_to_be32(i);
+        sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
+        off = fdt_add_subnode(fdt, 0, mem_name);
+        _FDT(off);
+        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+        _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
+                          sizeof(mem_reg_property))));
+        _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
+                          sizeof(associativity))));
+        mem_start += node_mem[i];
+    }
+
+    return 0;
+}
+
 static void spapr_finalize_fdt(sPAPREnvironment *spapr,
                               target_phys_addr_t fdt_addr,
                               target_phys_addr_t rtas_addr,
@ -502,6 +515,12 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
    /* open out the base tree into a temp buffer for the final tweaks */
    _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));

+    ret = spapr_populate_memory(spapr, fdt);
+    if (ret < 0) {
+        fprintf(stderr, "couldn't setup memory nodes in fdt\n");
+        exit(1);
+    }
+
    ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
    if (ret < 0) {
        fprintf(stderr, "couldn't setup vio devices in fdt\n");
@ -524,11 +543,9 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
    }

    /* Advertise NUMA via ibm,associativity */
-    if (nb_numa_nodes > 1) {
-        ret = spapr_set_associativity(fdt, spapr);
-        if (ret < 0) {
-            fprintf(stderr, "Couldn't set up NUMA device tree properties\n");
-        }
+    ret = spapr_fixup_cpu_dt(fdt, spapr);
+    if (ret < 0) {
+        fprintf(stderr, "Couldn't finalize CPU device tree properties\n");
    }

    if (!spapr->has_graphics) {
@ -555,15 +572,49 @@ static uint64_t translate_kernel_address(void *opaque, uint64_t addr)

 static void emulate_spapr_hypercall(CPUPPCState *env)
 {
-    env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]);
+    if (msr_pr) {
+        hcall_dprintf("Hypercall made with MSR[PR]=1\n");
+        env->gpr[3] = H_PRIVILEGE;
+    } else {
+        env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]);
+    }
 }

-static void spapr_reset(void *opaque)
+static void spapr_reset_htab(sPAPREnvironment *spapr)
 {
-    sPAPREnvironment *spapr = (sPAPREnvironment *)opaque;
+    long shift;

-    /* flush out the hash table */
-    memset(spapr->htab, 0, spapr->htab_size);
+    /* allocate hash page table.  For now we always make this 16mb,
+     * later we should probably make it scale to the size of guest
+     * RAM */
+
+    shift = kvmppc_reset_htab(spapr->htab_shift);
+
+    if (shift > 0) {
+        /* Kernel handles htab, we don't need to allocate one */
+        spapr->htab_shift = shift;
+    } else {
+        if (!spapr->htab) {
+            /* Allocate an htab if we don't yet have one */
+            spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
+        }
+
+        /* And clear it */
+        memset(spapr->htab, 0, HTAB_SIZE(spapr));
+    }
+
+    /* Update the RMA size if necessary */
+    if (spapr->vrma_adjust) {
+        spapr->rma_size = kvmppc_rma_size(ram_size, spapr->htab_shift);
+    }
+}
+
+static void ppc_spapr_reset(void)
+{
+    /* Reset the hash table & recalc the RMA */
+    spapr_reset_htab(spapr);
+
+    qemu_devices_reset();

    /* Load the fdt */
    spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
@ -580,8 +631,22 @@ static void spapr_reset(void *opaque)
 static void spapr_cpu_reset(void *opaque)
 {
    PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;

    cpu_reset(CPU(cpu));
+
+    /* All CPUs start halted.  CPU0 is unhalted from the machine level
+     * reset code and the rest are explicitly started up by the guest
+     * using an RTAS call */
+    env->halted = 1;
+
+    env->spr[SPR_HIOR] = 0;
+
+    env->external_htab = spapr->htab;
+    env->htab_base = -1;
+    env->htab_mask = HTAB_SIZE(spapr) - 1;
+    env->spr[SPR_SDR1] = (unsigned long)spapr->htab |
+        (spapr->htab_shift - 18);
 }

 /* Returns whether we want to use VGA or not */
@ -613,11 +678,10 @@ static void ppc_spapr_init(ram_addr_t ram_size,
    int i;
    MemoryRegion *sysmem = get_system_memory();
    MemoryRegion *ram = g_new(MemoryRegion, 1);
-    target_phys_addr_t rma_alloc_size, rma_size;
+    target_phys_addr_t rma_alloc_size;
    uint32_t initrd_base = 0;
    long kernel_size = 0, initrd_size = 0;
    long load_limit, rtas_limit, fw_size;
-    long pteg_shift = 17;
    char *filename;

    msi_supported = true;
@ -634,20 +698,46 @@ static void ppc_spapr_init(ram_addr_t ram_size,
        hw_error("qemu: Unable to create RMA\n");
        exit(1);
    }
+
    if (rma_alloc_size && (rma_alloc_size < ram_size)) {
-        rma_size = rma_alloc_size;
+        spapr->rma_size = rma_alloc_size;
    } else {
-        rma_size = ram_size;
+        spapr->rma_size = ram_size;
+
+        /* With KVM, we don't actually know whether KVM supports an
+         * unbounded RMA (PR KVM) or is limited by the hash table size
+         * (HV KVM using VRMA), so we always assume the latter
+         *
+         * In that case, we also limit the initial allocations for RTAS
+         * etc... to 256M since we have no way to know what the VRMA size
+         * is going to be as it depends on the size of the hash table
+         * isn't determined yet.
+         */
+        if (kvm_enabled()) {
+            spapr->vrma_adjust = 1;
+            spapr->rma_size = MIN(spapr->rma_size, 0x10000000);
+        }
    }

    /* We place the device tree and RTAS just below either the top of the RMA,
     * or just below 2GB, whichever is lowere, so that it can be
     * processed with 32-bit real mode code if necessary */
-    rtas_limit = MIN(rma_size, 0x80000000);
+    rtas_limit = MIN(spapr->rma_size, 0x80000000);
    spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
    spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
    load_limit = spapr->fdt_addr - FW_OVERHEAD;

+    /* We aim for a hash table of size 1/128 the size of RAM.  The
+     * normal rule of thumb is 1/64 the size of RAM, but that's much
+     * more than needed for the Linux guests we support. */
+    spapr->htab_shift = 18; /* Minimum architected size */
+    while (spapr->htab_shift <= 46) {
+        if ((1ULL << (spapr->htab_shift + 7)) >= ram_size) {
+            break;
+        }
+        spapr->htab_shift++;
+    }
+
    /* init CPUs */
    if (cpu_model == NULL) {
        cpu_model = kvm_enabled() ? "host" : "POWER7";
@ -662,11 +752,16 @@ static void ppc_spapr_init(ram_addr_t ram_size,

        /* Set time-base frequency to 512 MHz */
        cpu_ppc_tb_init(env, TIMEBASE_FREQ);
-        qemu_register_reset(spapr_cpu_reset, cpu);

-        env->hreset_vector = 0x60;
+        /* PAPR always has exception vectors in RAM not ROM */
        env->hreset_excp_prefix = 0;
-        env->gpr[3] = env->cpu_index;
+
+        /* Tell KVM that we're in PAPR mode */
+        if (kvm_enabled()) {
+            kvmppc_set_papr(env);
+        }
+
+        qemu_register_reset(spapr_cpu_reset, cpu);
    }

    /* allocate RAM */
@ -680,27 +775,6 @@ static void ppc_spapr_init(ram_addr_t ram_size,
        memory_region_add_subregion(sysmem, nonrma_base, ram);
    }

-    /* allocate hash page table.  For now we always make this 16mb,
-     * later we should probably make it scale to the size of guest
-     * RAM */
-    spapr->htab_size = 1ULL << (pteg_shift + 7);
-    spapr->htab = qemu_memalign(spapr->htab_size, spapr->htab_size);
-
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        env->external_htab = spapr->htab;
-        env->htab_base = -1;
-        env->htab_mask = spapr->htab_size - 1;
-
-        /* Tell KVM that we're in PAPR mode */
-        env->spr[SPR_SDR1] = (unsigned long)spapr->htab |
-                             ((pteg_shift + 7) - 18);
-        env->spr[SPR_HIOR] = 0;
-
-        if (kvm_enabled()) {
-            kvmppc_set_papr(env);
-        }
-    }
-
    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
    spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
                                           rtas_limit - spapr->rtas_addr);
@ -773,7 +847,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
        }
    }

-    if (rma_size < (MIN_RMA_SLOF << 20)) {
+    if (spapr->rma_size < (MIN_RMA_SLOF << 20)) {
        fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
                "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
        exit(1);
@ -824,26 +898,19 @@ static void ppc_spapr_init(ram_addr_t ram_size,

    spapr->entry_point = 0x100;

-    /* SLOF will startup the secondary CPUs using RTAS */
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        env->halted = 1;
-    }
-
    /* Prepare the device tree */
-    spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, rma_size,
+    spapr->fdt_skel = spapr_create_fdt_skel(cpu_model,
                                            initrd_base, initrd_size,
                                            kernel_size,
-                                            boot_device, kernel_cmdline,
-                                            pteg_shift + 7);
+                                            boot_device, kernel_cmdline);
    assert(spapr->fdt_skel != NULL);
-
-    qemu_register_reset(spapr_reset, spapr);
 }

 static QEMUMachine spapr_machine = {
    .name = "pseries",
    .desc = "pSeries Logical Partition (PAPR compliant)",
    .init = ppc_spapr_init,
+    .reset = ppc_spapr_reset,
    .max_cpus = MAX_CPUS,
    .no_parallel = 1,
    .use_scsi = 1,
--- a/hw/spapr.h
+++ b/hw/spapr.h
@ -15,7 +15,9 @@ typedef struct sPAPREnvironment {

    target_phys_addr_t ram_limit;
    void *htab;
-    long htab_size;
+    long htab_shift;
+    target_phys_addr_t rma_size;
+    int vrma_adjust;
    target_phys_addr_t fdt_addr, rtas_addr;
    long rtas_size;
    void *fdt_skel;
@ -289,17 +291,17 @@ void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn);
 target_ulong spapr_hypercall(CPUPPCState *env, target_ulong opcode,
                             target_ulong *args);

-int spapr_allocate_irq(int hint, enum xics_irq_type type);
-int spapr_allocate_irq_block(int num, enum xics_irq_type type);
+int spapr_allocate_irq(int hint, bool lsi);
+int spapr_allocate_irq_block(int num, bool lsi);

 static inline int spapr_allocate_msi(int hint)
 {
-    return spapr_allocate_irq(hint, XICS_MSI);
+    return spapr_allocate_irq(hint, false);
 }

 static inline int spapr_allocate_lsi(int hint)
 {
-    return spapr_allocate_irq(hint, XICS_LSI);
+    return spapr_allocate_irq(hint, true);
 }

 static inline uint32_t rtas_ld(target_ulong phys, int n)
@ -336,6 +338,8 @@ typedef struct sPAPRTCE {
 void spapr_iommu_init(void);
 DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size);
 void spapr_tce_free(DMAContext *dma);
+void spapr_tce_reset(DMAContext *dma);
+void spapr_tce_set_bypass(DMAContext *dma, bool bypass);
 int spapr_dma_dt(void *fdt, int node_off, const char *propname,
                 uint32_t liobn, uint64_t window, uint32_t size);
 int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
--- a/hw/spapr_hcall.c
+++ b/hw/spapr_hcall.c
@ -39,22 +39,6 @@
 #define HPTE_V_1TB_SEG          0x4000000000000000ULL
 #define HPTE_V_VRMA_MASK        0x4001ffffff000000ULL

-#define HPTE_V_HVLOCK           0x40ULL
-
-static inline int lock_hpte(void *hpte, target_ulong bits)
-{
-    uint64_t pteh;
-
-    pteh = ldq_p(hpte);
-
-    /* We're protected by qemu's global lock here */
-    if (pteh & bits) {
-        return 0;
-    }
-    stq_p(hpte, pteh | HPTE_V_HVLOCK);
-    return 1;
-}
-
 static target_ulong compute_tlbie_rb(target_ulong v, target_ulong r,
                                     target_ulong pte_index)
 {
@ -151,8 +135,7 @@ static target_ulong h_enter(CPUPPCState *env, sPAPREnvironment *spapr,
            if (i == 8) {
                return H_PTEG_FULL;
            }
-            if (((ldq_p(hpte) & HPTE_V_VALID) == 0) &&
-                lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) {
+            if ((ldq_p(hpte) & HPTE_V_VALID) == 0) {
                break;
            }
            hpte += HASH_PTE_SIZE_64;
@ -160,7 +143,7 @@ static target_ulong h_enter(CPUPPCState *env, sPAPREnvironment *spapr,
    } else {
        i = 0;
        hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
-        if (!lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) {
+        if (ldq_p(hpte) & HPTE_V_VALID) {
            return H_PTEG_FULL;
        }
    }
@ -168,7 +151,6 @@ static target_ulong h_enter(CPUPPCState *env, sPAPREnvironment *spapr,
    /* eieio();  FIXME: need some sort of barrier for smp? */
    stq_p(hpte, pteh);

-    assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
    args[0] = pte_index + i;
    return H_SUCCESS;
 }
@ -193,11 +175,6 @@ static target_ulong remove_hpte(CPUPPCState *env, target_ulong ptex,
    }

    hpte = env->external_htab + (ptex * HASH_PTE_SIZE_64);
-    while (!lock_hpte(hpte, HPTE_V_HVLOCK)) {
-        /* We have no real concurrency in qemu soft-emulation, so we
-         * will never actually have a contested lock */
-        assert(0);
-    }

    v = ldq_p(hpte);
    r = ldq_p(hpte + (HASH_PTE_SIZE_64/2));
@ -205,16 +182,13 @@ static target_ulong remove_hpte(CPUPPCState *env, target_ulong ptex,
    if ((v & HPTE_V_VALID) == 0 ||
        ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
        ((flags & H_ANDCOND) && (v & avpn) != 0)) {
-        stq_p(hpte, v & ~HPTE_V_HVLOCK);
-        assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
        return REMOVE_NOT_FOUND;
    }
-    *vp = v & ~HPTE_V_HVLOCK;
+    *vp = v;
    *rp = r;
    stq_p(hpte, 0);
    rb = compute_tlbie_rb(v, r, ptex);
    ppc_tlb_invalidate_one(env, rb);
-    assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
    return REMOVE_SUCCESS;
 }

@ -324,19 +298,12 @@ static target_ulong h_protect(CPUPPCState *env, sPAPREnvironment *spapr,
    }

    hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
-    while (!lock_hpte(hpte, HPTE_V_HVLOCK)) {
-        /* We have no real concurrency in qemu soft-emulation, so we
-         * will never actually have a contested lock */
-        assert(0);
-    }

    v = ldq_p(hpte);
    r = ldq_p(hpte + (HASH_PTE_SIZE_64/2));

    if ((v & HPTE_V_VALID) == 0 ||
        ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
-        stq_p(hpte, v & ~HPTE_V_HVLOCK);
-        assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
        return H_NOT_FOUND;
    }

@ -350,8 +317,7 @@ static target_ulong h_protect(CPUPPCState *env, sPAPREnvironment *spapr,
    ppc_tlb_invalidate_one(env, rb);
    stq_p(hpte + (HASH_PTE_SIZE_64/2), r);
    /* Don't need a memory barrier, due to qemu's global lock */
-    stq_p(hpte, v & ~HPTE_V_HVLOCK);
-    assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
+    stq_p(hpte, v);
    return H_SUCCESS;
 }

@ -544,6 +510,8 @@ static target_ulong h_cede(CPUPPCState *env, sPAPREnvironment *spapr,
    hreg_compute_hflags(env);
    if (!cpu_has_work(env)) {
        env->halted = 1;
+        env->exception_index = EXCP_HLT;
+        env->exit_request = 1;
    }
    return H_SUCCESS;
 }
@ -713,11 +681,6 @@ void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn)
 target_ulong spapr_hypercall(CPUPPCState *env, target_ulong opcode,
                             target_ulong *args)
 {
-    if (msr_pr) {
-        hcall_dprintf("Hypercall made with MSR[PR]=1\n");
-        return H_PRIVILEGE;
-    }
-
    if ((opcode <= MAX_HCALL_OPCODE)
        && ((opcode & 0x3) == 0)) {
        spapr_hcall_fn fn = papr_hypercall_table[opcode / 4];
--- a/hw/spapr_iommu.c
+++ b/hw/spapr_iommu.c
@ -42,6 +42,7 @@ struct sPAPRTCETable {
    uint32_t liobn;
    uint32_t window_size;
    sPAPRTCE *table;
+    bool bypass;
    int fd;
    QLIST_ENTRY(sPAPRTCETable) list;
 };
@ -78,6 +79,12 @@ static int spapr_tce_translate(DMAContext *dma,
            DMA_ADDR_FMT "\n", tcet->liobn, addr);
 #endif

+    if (tcet->bypass) {
+        *paddr = addr;
+        *len = (target_phys_addr_t)-1;
+        return 0;
+    }
+
    /* Check if we are in bound */
    if (addr >= tcet->window_size) {
 #ifdef DEBUG_TCE
@ -162,6 +169,23 @@ void spapr_tce_free(DMAContext *dma)
    }
 }

+void spapr_tce_set_bypass(DMAContext *dma, bool bypass)
+{
+    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+
+    tcet->bypass = bypass;
+}
+
+void spapr_tce_reset(DMAContext *dma)
+{
+    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+    size_t table_size = (tcet->window_size >> SPAPR_TCE_PAGE_SHIFT)
+        * sizeof(sPAPRTCE);
+
+    tcet->bypass = false;
+    memset(tcet->table, 0, table_size);
+}
+
 static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
                                target_ulong tce)
 {
--- a/hw/spapr_pci.c
+++ b/hw/spapr_pci.c
@ -351,7 +351,7 @@ static void rtas_ibm_change_msi(sPAPREnvironment *spapr,

    /* There is no cached config, allocate MSIs */
    if (!phb->msi_table[ndev].nvec) {
-        irq = spapr_allocate_irq_block(req_num, XICS_MSI);
+        irq = spapr_allocate_irq_block(req_num, true);
        if (irq < 0) {
            fprintf(stderr, "Cannot allocate MSIs for device#%d", ndev);
            rtas_st(rets, 0, -1); /* Hardware error */
@ -595,6 +595,15 @@ static int spapr_phb_init(SysBusDevice *s)
    return 0;
 }

+static void spapr_phb_reset(DeviceState *qdev)
+{
+    SysBusDevice *s = sysbus_from_qdev(qdev);
+    sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
+
+    /* Reset the IOMMU state */
+    spapr_tce_reset(sphb->dma);
+}
+
 static Property spapr_phb_properties[] = {
    DEFINE_PROP_HEX64("buid", sPAPRPHBState, buid, 0),
    DEFINE_PROP_STRING("busname", sPAPRPHBState, busname),
@ -613,6 +622,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data)

    sdc->init = spapr_phb_init;
    dc->props = spapr_phb_properties;
+    dc->reset = spapr_phb_reset;
 }

 static const TypeInfo spapr_phb_info = {
--- a/hw/spapr_rtas.c
+++ b/hw/spapr_rtas.c
@ -184,6 +184,11 @@ static void rtas_start_cpu(sPAPREnvironment *spapr,
            return;
        }

+        /* This will make sure qemu state is up to date with kvm, and
+         * mark it dirty so our changes get flushed back before the
+         * new cpu enters */
+        kvm_cpu_synchronize_state(env);
+
        env->msr = (1ULL << MSR_SF) | (1ULL << MSR_ME);
        env->nip = start;
        env->gpr[3] = r3;
--- a/hw/spapr_vio.c
+++ b/hw/spapr_vio.c
@ -316,17 +316,10 @@ int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq)

 static void spapr_vio_quiesce_one(VIOsPAPRDevice *dev)
 {
-    VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
-    uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
-
    if (dev->dma) {
-        spapr_tce_free(dev->dma);
+        spapr_tce_reset(dev->dma);
    }
-    dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
-
-    dev->crq.qladdr = 0;
-    dev->crq.qsize = 0;
-    dev->crq.qnext = 0;
+    free_crq(dev);
 }

 static void rtas_set_tce_bypass(sPAPREnvironment *spapr, uint32_t token,
@ -348,16 +341,14 @@ static void rtas_set_tce_bypass(sPAPREnvironment *spapr, uint32_t token,
        rtas_st(rets, 0, -3);
        return;
    }
-    if (enable) {
-        spapr_tce_free(dev->dma);
-        dev->dma = NULL;
-    } else {
-        VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
-        uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;

-        dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
+    if (!dev->dma) {
+        rtas_st(rets, 0, -3);
+        return;
    }

+    spapr_tce_set_bypass(dev->dma, !!enable);
+
    rtas_st(rets, 0, 0);
 }

@ -409,9 +400,10 @@ static void spapr_vio_busdev_reset(DeviceState *qdev)
    VIOsPAPRDevice *dev = DO_UPCAST(VIOsPAPRDevice, qdev, qdev);
    VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);

-    if (dev->crq.qsize) {
-        free_crq(dev);
-    }
+    /* Shut down the request queue and TCEs if necessary */
+    spapr_vio_quiesce_one(dev);
+
+    dev->signal_state = 0;

    if (pc->reset) {
        pc->reset(dev);
@ -422,7 +414,6 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
 {
    VIOsPAPRDevice *dev = (VIOsPAPRDevice *)qdev;
    VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
-    uint32_t liobn;
    char *id;

    if (dev->reg != -1) {
@ -464,8 +455,10 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
        return -1;
    }

-    liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
-    dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
+    if (pc->rtce_window_size) {
+        uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
+        dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
+    }

    return pc->init(dev);
 }
--- a/hw/spapr_vio.h
+++ b/hw/spapr_vio.h
@ -60,7 +60,6 @@ typedef struct VIOsPAPRDeviceClass {
 struct VIOsPAPRDevice {
    DeviceState qdev;
    uint32_t reg;
-    uint32_t flags;
    uint32_t irq;
    target_ulong signal_state;
    VIOsPAPR_CRQ crq;
@ -132,7 +131,6 @@ void spapr_vscsi_create(VIOsPAPRBus *bus);

 VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus);

-int spapr_tce_set_bypass(uint32_t unit, uint32_t enable);
 void spapr_vio_quiesce(void);

 #endif /* _HW_SPAPR_VIO_H */
--- a/hw/xics.c
+++ b/hw/xics.c
@ -165,11 +165,12 @@ struct ics_irq_state {
    int server;
    uint8_t priority;
    uint8_t saved_priority;
-    enum xics_irq_type type;
-    int asserted:1;
-    int sent:1;
-    int rejected:1;
-    int masked_pending:1;
+#define XICS_STATUS_ASSERTED           0x1
+#define XICS_STATUS_SENT               0x2
+#define XICS_STATUS_REJECTED           0x4
+#define XICS_STATUS_MASKED_PENDING     0x8
+    uint8_t status;
+    bool lsi;
 };

 struct ics_state {
@ -191,8 +192,8 @@ static void resend_msi(struct ics_state *ics, int srcno)
    struct ics_irq_state *irq = ics->irqs + srcno;

    /* FIXME: filter by server#? */
-    if (irq->rejected) {
-        irq->rejected = 0;
+    if (irq->status & XICS_STATUS_REJECTED) {
+        irq->status &= ~XICS_STATUS_REJECTED;
        if (irq->priority != 0xff) {
            icp_irq(ics->icp, irq->server, srcno + ics->offset,
                    irq->priority);
@ -204,8 +205,10 @@ static void resend_lsi(struct ics_state *ics, int srcno)
 {
    struct ics_irq_state *irq = ics->irqs + srcno;

-    if ((irq->priority != 0xff) && irq->asserted && !irq->sent) {
-        irq->sent = 1;
+    if ((irq->priority != 0xff)
+        && (irq->status & XICS_STATUS_ASSERTED)
+        && !(irq->status & XICS_STATUS_SENT)) {
+        irq->status |= XICS_STATUS_SENT;
        icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
    }
 }
@ -216,7 +219,7 @@ static void set_irq_msi(struct ics_state *ics, int srcno, int val)

    if (val) {
        if (irq->priority == 0xff) {
-            irq->masked_pending = 1;
+            irq->status |= XICS_STATUS_MASKED_PENDING;
            /* masked pending */ ;
        } else  {
            icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
@ -228,7 +231,11 @@ static void set_irq_lsi(struct ics_state *ics, int srcno, int val)
 {
    struct ics_irq_state *irq = ics->irqs + srcno;

-    irq->asserted = val;
+    if (val) {
+        irq->status |= XICS_STATUS_ASSERTED;
+    } else {
+        irq->status &= ~XICS_STATUS_ASSERTED;
+    }
    resend_lsi(ics, srcno);
 }

@ -237,7 +244,7 @@ static void ics_set_irq(void *opaque, int srcno, int val)
    struct ics_state *ics = (struct ics_state *)opaque;
    struct ics_irq_state *irq = ics->irqs + srcno;

-    if (irq->type == XICS_LSI) {
+    if (irq->lsi) {
        set_irq_lsi(ics, srcno, val);
    } else {
        set_irq_msi(ics, srcno, val);
@ -248,11 +255,12 @@ static void write_xive_msi(struct ics_state *ics, int srcno)
 {
    struct ics_irq_state *irq = ics->irqs + srcno;

-    if (!irq->masked_pending || (irq->priority == 0xff)) {
+    if (!(irq->status & XICS_STATUS_MASKED_PENDING)
+        || (irq->priority == 0xff)) {
        return;
    }

-    irq->masked_pending = 0;
+    irq->status &= ~XICS_STATUS_MASKED_PENDING;
    icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
 }

@ -262,15 +270,16 @@ static void write_xive_lsi(struct ics_state *ics, int srcno)
 }

 static void ics_write_xive(struct ics_state *ics, int nr, int server,
-                           uint8_t priority)
+                           uint8_t priority, uint8_t saved_priority)
 {
    int srcno = nr - ics->offset;
    struct ics_irq_state *irq = ics->irqs + srcno;

    irq->server = server;
    irq->priority = priority;
+    irq->saved_priority = saved_priority;

-    if (irq->type == XICS_LSI) {
+    if (irq->lsi) {
        write_xive_lsi(ics, srcno);
    } else {
        write_xive_msi(ics, srcno);
@ -281,8 +290,8 @@ static void ics_reject(struct ics_state *ics, int nr)
 {
    struct ics_irq_state *irq = ics->irqs + nr - ics->offset;

-    irq->rejected = 1; /* Irrelevant but harmless for LSI */
-    irq->sent = 0; /* Irrelevant but harmless for MSI */
+    irq->status |= XICS_STATUS_REJECTED; /* Irrelevant but harmless for LSI */
+    irq->status &= ~XICS_STATUS_SENT; /* Irrelevant but harmless for MSI */
 }

 static void ics_resend(struct ics_state *ics)
@ -293,7 +302,7 @@ static void ics_resend(struct ics_state *ics)
        struct ics_irq_state *irq = ics->irqs + i;

        /* FIXME: filter by server#? */
-        if (irq->type == XICS_LSI) {
+        if (irq->lsi) {
            resend_lsi(ics, i);
        } else {
            resend_msi(ics, i);
@ -306,8 +315,8 @@ static void ics_eoi(struct ics_state *ics, int nr)
    int srcno = nr - ics->offset;
    struct ics_irq_state *irq = ics->irqs + srcno;

-    if (irq->type == XICS_LSI) {
-        irq->sent = 0;
+    if (irq->lsi) {
+        irq->status &= ~XICS_STATUS_SENT;
    }
 }

@ -325,14 +334,12 @@ qemu_irq xics_get_qirq(struct icp_state *icp, int irq)
    return icp->ics->qirqs[irq - icp->ics->offset];
 }

-void xics_set_irq_type(struct icp_state *icp, int irq,
-                       enum xics_irq_type type)
+void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi)
 {
    assert((irq >= icp->ics->offset)
           && (irq < (icp->ics->offset + icp->ics->nr_irqs)));
-    assert((type == XICS_MSI) || (type == XICS_LSI));

-    icp->ics->irqs[irq - icp->ics->offset].type = type;
+    icp->ics->irqs[irq - icp->ics->offset].lsi = lsi;
 }

 static target_ulong h_cppr(CPUPPCState *env, sPAPREnvironment *spapr,
@ -399,7 +406,7 @@ static void rtas_set_xive(sPAPREnvironment *spapr, uint32_t token,
        return;
    }

-    ics_write_xive(ics, nr, server, priority);
+    ics_write_xive(ics, nr, server, priority, priority);

    rtas_st(rets, 0, 0); /* Success */
 }
@ -447,14 +454,8 @@ static void rtas_int_off(sPAPREnvironment *spapr, uint32_t token,
        return;
    }

-    /* This is a NOP for now, since the described PAPR semantics don't
-     * seem to gel with what Linux does */
-#if 0
-    struct ics_irq_state *irq = xics->irqs + (nr - xics->offset);
-
-    irq->saved_priority = irq->priority;
-    ics_write_xive_msi(xics, nr, irq->server, 0xff);
-#endif
+    ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server, 0xff,
+                   ics->irqs[nr - ics->offset].priority);

    rtas_st(rets, 0, 0); /* Success */
 }
@ -478,22 +479,40 @@ static void rtas_int_on(sPAPREnvironment *spapr, uint32_t token,
        return;
    }

-    /* This is a NOP for now, since the described PAPR semantics don't
-     * seem to gel with what Linux does */
-#if 0
-    struct ics_irq_state *irq = xics->irqs + (nr - xics->offset);
-
-    ics_write_xive_msi(xics, nr, irq->server, irq->saved_priority);
-#endif
+    ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server,
+                   ics->irqs[nr - ics->offset].saved_priority,
+                   ics->irqs[nr - ics->offset].saved_priority);

    rtas_st(rets, 0, 0); /* Success */
 }

+static void xics_reset(void *opaque)
+{
+    struct icp_state *icp = (struct icp_state *)opaque;
+    struct ics_state *ics = icp->ics;
+    int i;
+
+    for (i = 0; i < icp->nr_servers; i++) {
+        icp->ss[i].xirr = 0;
+        icp->ss[i].pending_priority = 0;
+        icp->ss[i].mfrr = 0xff;
+        /* Make all outputs are deasserted */
+        qemu_set_irq(icp->ss[i].output, 0);
+    }
+
+    for (i = 0; i < ics->nr_irqs; i++) {
+        /* Reset everything *except* the type */
+        ics->irqs[i].server = 0;
+        ics->irqs[i].status = 0;
+        ics->irqs[i].priority = 0xff;
+        ics->irqs[i].saved_priority = 0xff;
+    }
+}
+
 struct icp_state *xics_system_init(int nr_irqs)
 {
    CPUPPCState *env;
    int max_server_num;
-    int i;
    struct icp_state *icp;
    struct ics_state *ics;

@ -508,10 +527,6 @@ struct icp_state *xics_system_init(int nr_irqs)
    icp->nr_servers = max_server_num + 1;
    icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state));

-    for (i = 0; i < icp->nr_servers; i++) {
-        icp->ss[i].mfrr = 0xff;
-    }
-
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        struct icp_server_state *ss = &icp->ss[env->cpu_index];

@ -539,11 +554,6 @@ struct icp_state *xics_system_init(int nr_irqs)
    icp->ics = ics;
    ics->icp = icp;

-    for (i = 0; i < nr_irqs; i++) {
-        ics->irqs[i].priority = 0xff;
-        ics->irqs[i].saved_priority = 0xff;
-    }
-
    ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, nr_irqs);

    spapr_register_hypercall(H_CPPR, h_cppr);
@ -556,5 +566,7 @@ struct icp_state *xics_system_init(int nr_irqs)
    spapr_rtas_register("ibm,int-off", rtas_int_off);
    spapr_rtas_register("ibm,int-on", rtas_int_on);

+    qemu_register_reset(xics_reset, icp);
+
    return icp;
 }
--- a/hw/xics.h
+++ b/hw/xics.h
@ -31,14 +31,8 @@

 struct icp_state;

-enum xics_irq_type {
-    XICS_MSI,        /* Message-signalled (edge) interrupt */
-    XICS_LSI,        /* Level-signalled interrupt */
-};
-
 qemu_irq xics_get_qirq(struct icp_state *icp, int irq);
-void xics_set_irq_type(struct icp_state *icp, int irq,
-                       enum xics_irq_type type);
+void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi);

 struct icp_state *xics_system_init(int nr_irqs);

--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@ -1079,7 +1079,6 @@ struct CPUPPCState {
    int mmu_idx;         /* precomputed MMU index to speed up mem accesses */

    /* Power management */
-    int power_mode;
    int (*check_pow)(CPUPPCState *env);

 #if !defined(CONFIG_USER_ONLY)
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@ -287,23 +287,6 @@ target_ulong helper_602_mfrom(target_ulong arg)
    for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
 #endif

-/* If X is a NaN, store the corresponding QNaN into RESULT.  Otherwise,
- * execute the following block.  */
-#define DO_HANDLE_NAN(result, x)                        \
-    if (float32_is_any_nan(x)) {                        \
-        CPU_FloatU __f;                                 \
-        __f.f = x;                                      \
-        __f.l = __f.l | (1 << 22);  /* Set QNaN bit. */ \
-        result = __f.f;                                 \
-    } else
-
-#define HANDLE_NAN1(result, x)                  \
-    DO_HANDLE_NAN(result, x)
-#define HANDLE_NAN2(result, x, y)                       \
-    DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y)
-#define HANDLE_NAN3(result, x, y, z)                                    \
-    DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y) DO_HANDLE_NAN(result, z)
-
 /* Saturating arithmetic helpers.  */
 #define SATCVT(from, to, from_type, to_type, min, max)          \
    static inline to_type cvt##from##to(from_type x, int *sat)  \
@ -409,15 +392,29 @@ VARITH(uwm, u32)
        int i;                                                          \
                                                                        \
        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
-            HANDLE_NAN2(r->f[i], a->f[i], b->f[i]) {                    \
-                r->f[i] = func(a->f[i], b->f[i], &env->vec_status);     \
-            }                                                           \
+            r->f[i] = func(a->f[i], b->f[i], &env->vec_status);         \
        }                                                               \
    }
 VARITHFP(addfp, float32_add)
 VARITHFP(subfp, float32_sub)
+VARITHFP(minfp, float32_min)
+VARITHFP(maxfp, float32_max)
 #undef VARITHFP

+#define VARITHFPFMA(suffix, type)                                       \
+    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
+                           ppc_avr_t *b, ppc_avr_t *c)                  \
+    {                                                                   \
+        int i;                                                          \
+        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
+            r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i],         \
+                                     type, &env->vec_status);           \
+        }                                                               \
+    }
+VARITHFPFMA(maddfp, 0);
+VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
+#undef VARITHFPFMA
+
 #define VARITHSAT_CASE(type, op, cvt, element)                          \
    {                                                                   \
        type result = (type)a->element[i] op (type)b->element[i];       \
@ -649,27 +646,6 @@ VCT(uxs, cvtsduw, u32)
 VCT(sxs, cvtsdsw, s32)
 #undef VCT

-void helper_vmaddfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
-                    ppc_avr_t *c)
-{
-    int i;
-
-    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
-            /* Need to do the computation in higher precision and round
-             * once at the end.  */
-            float64 af, bf, cf, t;
-
-            af = float32_to_float64(a->f[i], &env->vec_status);
-            bf = float32_to_float64(b->f[i], &env->vec_status);
-            cf = float32_to_float64(c->f[i], &env->vec_status);
-            t = float64_mul(af, cf, &env->vec_status);
-            t = float64_add(t, bf, &env->vec_status);
-            r->f[i] = float64_to_float32(t, &env->vec_status);
-        }
-    }
-}
-
 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
                      ppc_avr_t *b, ppc_avr_t *c)
 {
@ -730,27 +706,6 @@ VMINMAX(uw, u32)
 #undef VMINMAX_DO
 #undef VMINMAX

-#define VMINMAXFP(suffix, rT, rF)                                       \
-    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
-                          ppc_avr_t *b)                                 \
-    {                                                                   \
-        int i;                                                          \
-                                                                        \
-        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
-            HANDLE_NAN2(r->f[i], a->f[i], b->f[i]) {                    \
-                if (float32_lt_quiet(a->f[i], b->f[i],                  \
-                                     &env->vec_status)) {               \
-                    r->f[i] = rT->f[i];                                 \
-                } else {                                                \
-                    r->f[i] = rF->f[i];                                 \
-                }                                                       \
-            }                                                           \
-        }                                                               \
-    }
-VMINMAXFP(minfp, a, b)
-VMINMAXFP(maxfp, b, a)
-#undef VMINMAXFP
-
 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
 {
    int i;
@ -930,28 +885,6 @@ VMUL(uh, u16, u32)
 #undef VMUL_DO
 #undef VMUL

-void helper_vnmsubfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
-                     ppc_avr_t *b, ppc_avr_t *c)
-{
-    int i;
-
-    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
-            /* Need to do the computation is higher precision and round
-             * once at the end.  */
-            float64 af, bf, cf, t;
-
-            af = float32_to_float64(a->f[i], &env->vec_status);
-            bf = float32_to_float64(b->f[i], &env->vec_status);
-            cf = float32_to_float64(c->f[i], &env->vec_status);
-            t = float64_mul(af, cf, &env->vec_status);
-            t = float64_sub(t, bf, &env->vec_status);
-            t = float64_chs(t);
-            r->f[i] = float64_to_float32(t, &env->vec_status);
-        }
-    }
-}
-
 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
                  ppc_avr_t *c)
 {
@ -1039,9 +972,7 @@ void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
    int i;

    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN1(r->f[i], b->f[i]) {
-            r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
-        }
+        r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
    }
 }

@ -1054,9 +985,7 @@ void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
                                                                \
        set_float_rounding_mode(rounding, &s);                  \
        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                \
-            HANDLE_NAN1(r->f[i], b->f[i]) {                     \
-                r->f[i] = float32_round_to_int (b->f[i], &s);   \
-            }                                                   \
+            r->f[i] = float32_round_to_int (b->f[i], &s);       \
        }                                                       \
    }
 VRFI(n, float_round_nearest_even)
@ -1089,11 +1018,9 @@ void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
    int i;

    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN1(r->f[i], b->f[i]) {
-            float32 t = float32_sqrt(b->f[i], &env->vec_status);
+        float32 t = float32_sqrt(b->f[i], &env->vec_status);

-            r->f[i] = float32_div(float32_one, t, &env->vec_status);
-        }
+        r->f[i] = float32_div(float32_one, t, &env->vec_status);
    }
 }

@ -1109,9 +1036,7 @@ void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
    int i;

    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN1(r->f[i], b->f[i]) {
-            r->f[i] = float32_exp2(b->f[i], &env->vec_status);
-        }
+        r->f[i] = float32_exp2(b->f[i], &env->vec_status);
    }
 }

@ -1120,9 +1045,7 @@ void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
    int i;

    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN1(r->f[i], b->f[i]) {
-            r->f[i] = float32_log2(b->f[i], &env->vec_status);
-        }
+        r->f[i] = float32_log2(b->f[i], &env->vec_status);
    }
 }

@ -1473,10 +1396,6 @@ VUPK(lsh, s32, s16, UPKLO)
 #undef UPKHI
 #undef UPKLO

-#undef DO_HANDLE_NAN
-#undef HANDLE_NAN1
-#undef HANDLE_NAN2
-#undef HANDLE_NAN3
 #undef VECTOR_FOR_INORDER_I
 #undef HI_IDX
 #undef LO_IDX
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@ -60,6 +60,7 @@ static int cap_booke_sregs;
 static int cap_ppc_smt;
 static int cap_ppc_rma;
 static int cap_spapr_tce;
+static int cap_hior;

 /* XXX We have a race condition where we actually have a level triggered
 *     interrupt, but the infrastructure can't expose that yet, so the guest
@ -86,6 +87,7 @@ int kvm_arch_init(KVMState *s)
    cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
    cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
+    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);

    if (!cap_interrupt_level) {
        fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
@ -469,6 +471,54 @@ int kvm_arch_put_registers(CPUPPCState *env, int level)
        env->tlb_dirty = false;
    }

+    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
+        struct kvm_sregs sregs;
+
+        sregs.pvr = env->spr[SPR_PVR];
+
+        sregs.u.s.sdr1 = env->spr[SPR_SDR1];
+
+        /* Sync SLB */
+#ifdef TARGET_PPC64
+        for (i = 0; i < 64; i++) {
+            sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
+            sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
+        }
+#endif
+
+        /* Sync SRs */
+        for (i = 0; i < 16; i++) {
+            sregs.u.s.ppc32.sr[i] = env->sr[i];
+        }
+
+        /* Sync BATs */
+        for (i = 0; i < 8; i++) {
+            /* Beware. We have to swap upper and lower bits here */
+            sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
+                | env->DBAT[1][i];
+            sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
+                | env->IBAT[1][i];
+        }
+
+        ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
+        if (ret) {
+            return ret;
+        }
+    }
+
+    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
+        uint64_t hior = env->spr[SPR_HIOR];
+        struct kvm_one_reg reg = {
+            .id = KVM_REG_PPC_HIOR,
+            .addr = (uintptr_t) &hior,
+        };
+
+        ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
+        if (ret) {
+            return ret;
+        }
+    }
+
    return ret;
 }

@ -946,52 +996,14 @@ int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
 void kvmppc_set_papr(CPUPPCState *env)
 {
    struct kvm_enable_cap cap = {};
-    struct kvm_one_reg reg = {};
-    struct kvm_sregs sregs = {};
    int ret;
-    uint64_t hior = env->spr[SPR_HIOR];

    cap.cap = KVM_CAP_PPC_PAPR;
    ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);

    if (ret) {
-        goto fail;
+        cpu_abort(env, "This KVM version does not support PAPR\n");
    }
-
-    /*
-     * XXX We set HIOR here. It really should be a qdev property of
-     *     the CPU node, but we don't have CPUs converted to qdev yet.
-     *
-     *     Once we have qdev CPUs, move HIOR to a qdev property and
-     *     remove this chunk.
-     */
-    reg.id = KVM_REG_PPC_HIOR;
-    reg.addr = (uintptr_t)&hior;
-    ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
-    if (ret) {
-        fprintf(stderr, "Couldn't set HIOR. Maybe you're running an old \n"
-                        "kernel with support for HV KVM but no PAPR PR \n"
-                        "KVM in which case things will work. If they don't \n"
-                        "please update your host kernel!\n");
-    }
-
-    /* Set SDR1 so kernel space finds the HTAB */
-    ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
-    if (ret) {
-        goto fail;
-    }
-
-    sregs.u.s.sdr1 = env->spr[SPR_SDR1];
-
-    ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
-    if (ret) {
-        goto fail;
-    }
-
-    return;
-
-fail:
-    cpu_abort(env, "This KVM version does not support PAPR\n");
 }

 int kvmppc_smt_threads(void)
@ -999,6 +1011,7 @@ int kvmppc_smt_threads(void)
    return cap_ppc_smt ? cap_ppc_smt : 1;
 }

+#ifdef TARGET_PPC64
 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
 {
    void *rma;
@ -1042,6 +1055,16 @@ off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
    return size;
 }

+uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
+{
+    if (cap_ppc_rma >= 2) {
+        return current_size;
+    }
+    return MIN(current_size,
+               getrampagesize() << (hash_shift - 7));
+}
+#endif
+
 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
 {
    struct kvm_create_spapr_tce args = {
@ -1101,6 +1124,44 @@ int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
    return 0;
 }

+int kvmppc_reset_htab(int shift_hint)
+{
+    uint32_t shift = shift_hint;
+
+    if (!kvm_enabled()) {
+        /* Full emulation, tell caller to allocate htab itself */
+        return 0;
+    }
+    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
+        int ret;
+        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
+        if (ret == -ENOTTY) {
+            /* At least some versions of PR KVM advertise the
+             * capability, but don't implement the ioctl().  Oops.
+             * Return 0 so that we allocate the htab in qemu, as is
+             * correct for PR. */
+            return 0;
+        } else if (ret < 0) {
+            return ret;
+        }
+        return shift;
+    }
+
+    /* We have a kernel that predates the htab reset calls.  For PR
+     * KVM, we need to allocate the htab ourselves, for an HV KVM of
+     * this era, it has allocated a 16MB fixed size hash table
+     * already.  Kernels of this era have the GET_PVINFO capability
+     * only on PR, so we use this hack to determine the right
+     * answer */
+    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
+        /* PR - tell caller to allocate htab */
+        return 0;
+    } else {
+        /* HV - assume 16MB kernel allocated htab */
+        return 24;
+    }
+}
+
 static inline uint32_t mfpvr(void)
 {
    uint32_t pvr;
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@ -27,6 +27,8 @@ int kvmppc_smt_threads(void);
 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem);
 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd);
 int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
+int kvmppc_reset_htab(int shift_hint);
+uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
 #endif /* !CONFIG_USER_ONLY */
 const ppc_def_t *kvmppc_host_cpu_def(void);
 int kvmppc_fixup_cpu(CPUPPCState *env);
@ -94,6 +96,23 @@ static inline int kvmppc_remove_spapr_tce(void *table, int pfd,
 {
    return -1;
 }
+
+static inline int kvmppc_reset_htab(int shift_hint)
+{
+    return -1;
+}
+
+static inline uint64_t kvmppc_rma_size(uint64_t current_size,
+                                       unsigned int hash_shift)
+{
+    return ram_size;
+}
+
+static inline int kvmppc_update_sdr1(CPUPPCState *env)
+{
+    return 0;
+}
+
 #endif /* !CONFIG_USER_ONLY */

 static inline const ppc_def_t *kvmppc_host_cpu_def(void)
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@ -82,7 +82,7 @@ void cpu_save(QEMUFile *f, void *opaque)
    qemu_put_betls(f, &env->hflags);
    qemu_put_betls(f, &env->hflags_nmsr);
    qemu_put_sbe32s(f, &env->mmu_idx);
-    qemu_put_sbe32s(f, &env->power_mode);
+    qemu_put_sbe32(f, 0);
 }

 int cpu_load(QEMUFile *f, void *opaque, int version_id)
@ -167,7 +167,7 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
    qemu_get_betls(f, &env->hflags);
    qemu_get_betls(f, &env->hflags_nmsr);
    qemu_get_sbe32s(f, &env->mmu_idx);
-    qemu_get_sbe32s(f, &env->power_mode);
+    qemu_get_sbe32(f); /* Discard unused power_mode */

    return 0;
 }
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@ -10423,6 +10423,14 @@ static void ppc_cpu_reset(CPUState *s)
    env->pending_interrupts = 0;
    env->exception_index = POWERPC_EXCP_NONE;
    env->error_code = 0;
+
+#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
+    env->vpa = 0;
+    env->slb_shadow = 0;
+    env->dispatch_trace_log = 0;
+    env->dtl_size = 0;
+#endif /* TARGET_PPC64 */
+
    /* Flush all TLBs */
    tlb_flush(env, 1);
 }