diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c index e536b3ec26..9b10e90519 100644 --- a/hw/intc/pnv_xive.c +++ b/hw/intc/pnv_xive.c @@ -84,28 +84,6 @@ static uint8_t pnv_xive_block_id(PnvXive *xive) return blk; } -/* - * Remote access to controllers. HW uses MMIOs. For now, a simple scan - * of the chips is good enough. - * - * TODO: Block scope support - */ -static PnvXive *pnv_xive_get_remote(uint8_t blk) -{ - PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine()); - int i; - - for (i = 0; i < pnv->num_chips; i++) { - Pnv9Chip *chip9 = PNV9_CHIP(pnv->chips[i]); - PnvXive *xive = &chip9->xive; - - if (pnv_xive_block_id(xive) == blk) { - return xive; - } - } - return NULL; -} - /* * VST accessors for SBE, EAT, ENDT, NVT * @@ -209,6 +187,42 @@ static uint64_t pnv_xive_vst_addr_indirect(PnvXive *xive, uint32_t type, return pnv_xive_vst_addr_direct(xive, type, vsd, (idx % vst_per_page)); } +/* + * This is a simplified model of operation forwarding on a remote IC. + * + * A PC MMIO address is built to identify the NVT structure. The load + * on the remote IC will return the address of the structure in RAM, + * which will then be used by pnv_xive_vst_write/read to perform the + * RAM operation. + */ +static uint64_t pnv_xive_vst_addr_remote(PnvXive *xive, uint32_t type, + uint64_t vsd, uint8_t blk, + uint32_t idx) +{ + const XiveVstInfo *info = &vst_infos[type]; + uint64_t remote_addr = vsd & VSD_ADDRESS_MASK; + uint64_t vst_addr; + MemTxResult result; + + if (type != VST_TSEL_VPDT) { + xive_error(xive, "VST: invalid access on remote VST %s %x/%x !?", + info->name, blk, idx); + return 0; + } + + remote_addr |= idx << xive->pc_shift; + + vst_addr = address_space_ldq_be(&address_space_memory, remote_addr, + MEMTXATTRS_UNSPECIFIED, &result); + if (result != MEMTX_OK) { + xive_error(xive, "VST: read failed at @0x%" HWADDR_PRIx + " for NVT %x/%x\n", remote_addr, blk, idx); + return 0; + } + + return vst_addr; +} + static uint64_t pnv_xive_vst_addr(PnvXive *xive, uint32_t type, uint8_t blk, uint32_t idx) { @@ -225,9 +239,7 @@ static uint64_t pnv_xive_vst_addr(PnvXive *xive, uint32_t type, uint8_t blk, /* Remote VST access */ if (GETFIELD(VSD_MODE, vsd) == VSD_MODE_FORWARD) { - xive = pnv_xive_get_remote(blk); - - return xive ? pnv_xive_vst_addr(xive, type, blk, idx) : 0; + return pnv_xive_vst_addr_remote(xive, type, vsd, blk, idx); } if (VSD_INDIRECT & vsd) { @@ -242,12 +254,20 @@ static int pnv_xive_vst_read(PnvXive *xive, uint32_t type, uint8_t blk, { const XiveVstInfo *info = &vst_infos[type]; uint64_t addr = pnv_xive_vst_addr(xive, type, blk, idx); + MemTxResult result; if (!addr) { return -1; } - cpu_physical_memory_read(addr, data, info->size); + result = address_space_read(&address_space_memory, addr, + MEMTXATTRS_UNSPECIFIED, data, + info->size); + if (result != MEMTX_OK) { + xive_error(xive, "VST: read failed at @0x%" HWADDR_PRIx + " for VST %s %x/%x\n", addr, info->name, blk, idx); + return -1; + } return 0; } @@ -258,16 +278,27 @@ static int pnv_xive_vst_write(PnvXive *xive, uint32_t type, uint8_t blk, { const XiveVstInfo *info = &vst_infos[type]; uint64_t addr = pnv_xive_vst_addr(xive, type, blk, idx); + MemTxResult result; if (!addr) { return -1; } if (word_number == XIVE_VST_WORD_ALL) { - cpu_physical_memory_write(addr, data, info->size); + result = address_space_write(&address_space_memory, addr, + MEMTXATTRS_UNSPECIFIED, data, + info->size); } else { - cpu_physical_memory_write(addr + word_number * 4, - data + word_number * 4, 4); + result = address_space_write(&address_space_memory, + addr + word_number * 4, + MEMTXATTRS_UNSPECIFIED, + data + word_number * 4, 4); + } + + if (result != MEMTX_OK) { + xive_error(xive, "VST: write failed at @0x%" HWADDR_PRIx + "for VST %s %x/%x\n", addr, info->name, blk, idx); + return -1; } return 0; } @@ -275,12 +306,26 @@ static int pnv_xive_vst_write(PnvXive *xive, uint32_t type, uint8_t blk, static int pnv_xive_get_end(XiveRouter *xrtr, uint8_t blk, uint32_t idx, XiveEND *end) { + PnvXive *xive = PNV_XIVE(xrtr); + + if (pnv_xive_block_id(xive) != blk) { + xive_error(xive, "VST: END %x/%x is remote !?", blk, idx); + return -1; + } + return pnv_xive_vst_read(PNV_XIVE(xrtr), VST_TSEL_EQDT, blk, idx, end); } static int pnv_xive_write_end(XiveRouter *xrtr, uint8_t blk, uint32_t idx, XiveEND *end, uint8_t word_number) { + PnvXive *xive = PNV_XIVE(xrtr); + + if (pnv_xive_block_id(xive) != blk) { + xive_error(xive, "VST: END %x/%x is remote !?", blk, idx); + return -1; + } + return pnv_xive_vst_write(PNV_XIVE(xrtr), VST_TSEL_EQDT, blk, idx, end, word_number); } @@ -1349,6 +1394,50 @@ static const MemoryRegionOps pnv_xive_ic_reg_ops = { #define PNV_XIVE_SYNC_PUSH 0xf00 /* Sync push context */ #define PNV_XIVE_SYNC_VPC 0xf80 /* Sync remove VPC store */ +static void pnv_xive_end_notify(XiveRouter *xrtr, XiveEAS *eas) +{ + PnvXive *xive = PNV_XIVE(xrtr); + uint8_t end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); + uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w); + uint32_t end_data = xive_get_field64(EAS_END_DATA, eas->w); + uint64_t end_vsd = xive->vsds[VST_TSEL_EQDT][end_blk]; + + switch (GETFIELD(VSD_MODE, end_vsd)) { + case VSD_MODE_EXCLUSIVE: + /* Perform the END notification on the local IC. */ + xive_router_end_notify(xrtr, eas); + break; + + case VSD_MODE_FORWARD: { + MemTxResult result; + uint64_t notif_port = end_vsd & VSD_ADDRESS_MASK; + uint64_t data = XIVE_TRIGGER_END | XIVE_TRIGGER_PQ | + be64_to_cpu(eas->w); + + /* Forward the store on the remote IC notify page. */ + address_space_stq_be(&address_space_memory, notif_port, data, + MEMTXATTRS_UNSPECIFIED, &result); + if (result != MEMTX_OK) { + xive_error(xive, "IC: Forward notif END %x/%x [%x] failed @%" + HWADDR_PRIx, end_blk, end_idx, end_data, notif_port); + return; + } + break; + } + + case VSD_MODE_INVALID: + default: + /* Set FIR */ + xive_error(xive, "IC: Invalid END VSD for block %x", end_blk); + return; + } +} + +/* + * The notify page can either be used to receive trigger events from + * the HW controllers (PHB, PSI) or to reroute interrupts between + * Interrupt controllers. + */ static void pnv_xive_ic_hw_trigger(PnvXive *xive, hwaddr addr, uint64_t val) { uint8_t blk; @@ -1357,8 +1446,8 @@ static void pnv_xive_ic_hw_trigger(PnvXive *xive, hwaddr addr, uint64_t val) trace_pnv_xive_ic_hw_trigger(addr, val); if (val & XIVE_TRIGGER_END) { - xive_error(xive, "IC: END trigger at @0x%"HWADDR_PRIx" data 0x%"PRIx64, - addr, val); + val = cpu_to_be64(val); + pnv_xive_end_notify(XIVE_ROUTER(xive), (XiveEAS *) &val); return; } @@ -1703,16 +1792,20 @@ static const MemoryRegionOps pnv_xive_vc_ops = { }; /* - * Presenter Controller MMIO region. The Virtualization Controller - * updates the IPB in the NVT table when required. Not modeled. + * Presenter Controller MMIO region. Points to the NVT sets. + * + * HW implements all possible mem ops to the underlying NVT structure + * but QEMU does not need to be so precise. The model implementation + * simply returns the RAM address of the NVT structure which is then + * used by pnv_xive_vst_write/read to perform the RAM operation. */ -static uint64_t pnv_xive_pc_read(void *opaque, hwaddr addr, - unsigned size) +static uint64_t pnv_xive_pc_read(void *opaque, hwaddr offset, unsigned size) { PnvXive *xive = PNV_XIVE(opaque); + uint32_t nvt_idx = offset >> xive->pc_shift; + uint8_t blk = pnv_xive_block_id(xive); /* TODO: VDT -> block xlate */ - xive_error(xive, "PC: invalid read @%"HWADDR_PRIx, addr); - return -1; + return pnv_xive_vst_addr(xive, VST_TSEL_VPDT, blk, nvt_idx); } static void pnv_xive_pc_write(void *opaque, hwaddr addr, @@ -1898,6 +1991,7 @@ static void pnv_xive_realize(DeviceState *dev, Error **errp) memory_region_init_io(&xive->ic_notify_mmio, OBJECT(dev), &pnv_xive_ic_notify_ops, xive, "xive-ic-notify", 1 << xive->ic_shift); + xive->ic_notify_mmio.disable_reentrancy_guard = true; /* The Pervasive LSI trigger and EOI pages (not modeled) */ memory_region_init_io(&xive->ic_lsi_mmio, OBJECT(dev), &pnv_xive_ic_lsi_ops, @@ -1933,6 +2027,7 @@ static void pnv_xive_realize(DeviceState *dev, Error **errp) /* Presenter Controller MMIO region (not modeled) */ memory_region_init_io(&xive->pc_mmio, OBJECT(xive), &pnv_xive_pc_ops, xive, "xive-pc", PNV9_XIVE_PC_SIZE); + xive->pc_mmio.disable_reentrancy_guard = true; /* Thread Interrupt Management Area (Direct) */ memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &pnv_xive_tm_ops, @@ -1998,6 +2093,7 @@ static void pnv_xive_class_init(ObjectClass *klass, void *data) xrc->get_nvt = pnv_xive_get_nvt; xrc->write_nvt = pnv_xive_write_nvt; xrc->get_block_id = pnv_xive_get_block_id; + xrc->end_notify = pnv_xive_end_notify; xnc->notify = pnv_xive_notify; xpc->match_nvt = pnv_xive_match_nvt; diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index bbb44a533c..4b8d0a5d81 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -240,12 +240,20 @@ static int pnv_xive2_vst_read(PnvXive2 *xive, uint32_t type, uint8_t blk, { const XiveVstInfo *info = &vst_infos[type]; uint64_t addr = pnv_xive2_vst_addr(xive, type, blk, idx); + MemTxResult result; if (!addr) { return -1; } - cpu_physical_memory_read(addr, data, info->size); + result = address_space_read(&address_space_memory, addr, + MEMTXATTRS_UNSPECIFIED, data, + info->size); + if (result != MEMTX_OK) { + xive2_error(xive, "VST: read failed at @0x%" HWADDR_PRIx + " for VST %s %x/%x\n", addr, info->name, blk, idx); + return -1; + } return 0; } @@ -256,16 +264,27 @@ static int pnv_xive2_vst_write(PnvXive2 *xive, uint32_t type, uint8_t blk, { const XiveVstInfo *info = &vst_infos[type]; uint64_t addr = pnv_xive2_vst_addr(xive, type, blk, idx); + MemTxResult result; if (!addr) { return -1; } if (word_number == XIVE_VST_WORD_ALL) { - cpu_physical_memory_write(addr, data, info->size); + result = address_space_write(&address_space_memory, addr, + MEMTXATTRS_UNSPECIFIED, data, + info->size); } else { - cpu_physical_memory_write(addr + word_number * 4, - data + word_number * 4, 4); + result = address_space_write(&address_space_memory, + addr + word_number * 4, + MEMTXATTRS_UNSPECIFIED, + data + word_number * 4, 4); + } + + if (result != MEMTX_OK) { + xive2_error(xive, "VST: write failed at @0x%" HWADDR_PRIx + "for VST %s %x/%x\n", addr, info->name, blk, idx); + return -1; } return 0; } diff --git a/hw/intc/pnv_xive_regs.h b/hw/intc/pnv_xive_regs.h index c78f030c02..793847638b 100644 --- a/hw/intc/pnv_xive_regs.h +++ b/hw/intc/pnv_xive_regs.h @@ -228,6 +228,7 @@ * VSD and is only meant to be used in indirect mode ! */ #define VSD_MODE PPC_BITMASK(0, 1) +#define VSD_MODE_INVALID 0 #define VSD_MODE_SHARED 1 #define VSD_MODE_EXCLUSIVE 2 #define VSD_MODE_FORWARD 3 diff --git a/hw/intc/xive.c b/hw/intc/xive.c index 56670b2cac..df3ee0496f 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -1518,6 +1518,13 @@ static void xive_router_realize(DeviceState *dev, Error **errp) assert(xrtr->xfb); } +static void xive_router_end_notify_handler(XiveRouter *xrtr, XiveEAS *eas) +{ + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr); + + return xrc->end_notify(xrtr, eas); +} + /* * Encode the HW CAM line in the block group mode format : * @@ -1664,8 +1671,7 @@ static bool xive_router_end_es_notify(XiveRouter *xrtr, uint8_t end_blk, * another chip. We don't model the PowerBus but the END trigger * message has the same parameters than in the function below. */ -static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk, - uint32_t end_idx, uint32_t end_data) +void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas) { XiveEND end; uint8_t priority; @@ -1675,6 +1681,10 @@ static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk, XiveNVT nvt; bool found; + uint8_t end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); + uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w); + uint32_t end_data = xive_get_field64(EAS_END_DATA, eas->w); + /* END cache lookup */ if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk, @@ -1817,10 +1827,7 @@ do_escalation: /* * The END trigger becomes an Escalation trigger */ - xive_router_end_notify(xrtr, - xive_get_field32(END_W4_ESC_END_BLOCK, end.w4), - xive_get_field32(END_W4_ESC_END_INDEX, end.w4), - xive_get_field32(END_W5_ESC_END_DATA, end.w5)); + xive_router_end_notify_handler(xrtr, (XiveEAS *) &end.w4); } void xive_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked) @@ -1871,10 +1878,7 @@ void xive_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked) /* * The event trigger becomes an END trigger */ - xive_router_end_notify(xrtr, - xive_get_field64(EAS_END_BLOCK, eas.w), - xive_get_field64(EAS_END_INDEX, eas.w), - xive_get_field64(EAS_END_DATA, eas.w)); + xive_router_end_notify_handler(xrtr, &eas); } static Property xive_router_properties[] = { @@ -1887,12 +1891,16 @@ static void xive_router_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); XiveNotifierClass *xnc = XIVE_NOTIFIER_CLASS(klass); + XiveRouterClass *xrc = XIVE_ROUTER_CLASS(klass); dc->desc = "XIVE Router Engine"; device_class_set_props(dc, xive_router_properties); /* Parent is SysBusDeviceClass. No need to call its realize hook */ dc->realize = xive_router_realize; xnc->notify = xive_router_notify; + + /* By default, the router handles END triggers locally */ + xrc->end_notify = xive_router_end_notify; } static const TypeInfo xive_router_info = { diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index 67793a86f1..d5b6820d1d 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -712,7 +712,7 @@ static int ppce500_prep_device_tree(PPCE500MachineState *machine, p->kernel_base = kernel_base; p->kernel_size = kernel_size; - qemu_register_reset(ppce500_reset_device_tree, p); + qemu_register_reset_nosnapshotload(ppce500_reset_device_tree, p); p->notifier.notify = ppce500_init_notify; qemu_add_machine_init_done_notifier(&p->notifier); diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c index 510ff0eaaf..9acc7adfc9 100644 --- a/hw/ppc/mac_oldworld.c +++ b/hw/ppc/mac_oldworld.c @@ -81,6 +81,7 @@ static void ppc_heathrow_reset(void *opaque) { PowerPCCPU *cpu = opaque; + cpu_ppc_tb_reset(&cpu->env); cpu_reset(CPU(cpu)); } diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c index 075367d94d..bd397cf2b5 100644 --- a/hw/ppc/pegasos2.c +++ b/hw/ppc/pegasos2.c @@ -99,6 +99,7 @@ static void pegasos2_cpu_reset(void *opaque) cpu->env.gpr[1] = 2 * VOF_STACK_SIZE - 0x20; cpu->env.nip = 0x100; } + cpu_ppc_tb_reset(&cpu->env); } static void pegasos2_pci_irq(void *opaque, int n, int level) diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 9b39d527de..8c7afe037f 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -61,6 +61,8 @@ static void pnv_core_cpu_reset(PnvCore *pc, PowerPCCPU *cpu) hreg_compute_hflags(env); ppc_maybe_interrupt(env); + cpu_ppc_tb_reset(env); + pcc->intc_reset(pc->chip, cpu); } diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 0e0a3d93c3..aeb116d919 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -32,6 +32,7 @@ #include "qemu/main-loop.h" #include "qemu/error-report.h" #include "sysemu/kvm.h" +#include "sysemu/replay.h" #include "sysemu/runstate.h" #include "kvm_ppc.h" #include "migration/vmstate.h" @@ -58,7 +59,9 @@ void ppc_set_irq(PowerPCCPU *cpu, int irq, int level) if (old_pending != env->pending_interrupts) { ppc_maybe_interrupt(env); - kvmppc_set_interrupt(cpu, irq, level); + if (kvm_enabled()) { + kvmppc_set_interrupt(cpu, irq, level); + } } trace_ppc_irq_set_exit(env, irq, level, env->pending_interrupts, @@ -482,10 +485,32 @@ void ppce500_set_mpic_proxy(bool enabled) /*****************************************************************************/ /* PowerPC time base and decrementer emulation */ +/* + * Conversion between QEMU_CLOCK_VIRTUAL ns and timebase (TB) ticks: + * TB ticks are arrived at by multiplying tb_freq then dividing by + * ns per second, and rounding down. TB ticks drive all clocks and + * timers in the target machine. + * + * Converting TB intervals to ns for the purpose of setting a + * QEMU_CLOCK_VIRTUAL timer should go the other way, but rounding + * up. Rounding down could cause the timer to fire before the TB + * value has been reached. + */ +static uint64_t ns_to_tb(uint32_t freq, int64_t clock) +{ + return muldiv64(clock, freq, NANOSECONDS_PER_SECOND); +} + +/* virtual clock in TB ticks, not adjusted by TB offset */ +static int64_t tb_to_ns_round_up(uint32_t freq, uint64_t tb) +{ + return muldiv64_round_up(tb, NANOSECONDS_PER_SECOND, freq); +} + uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t tb_offset) { /* TB time in tb periods */ - return muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND) + tb_offset; + return ns_to_tb(tb_env->tb_freq, vmclk) + tb_offset; } uint64_t cpu_ppc_load_tbl (CPUPPCState *env) @@ -497,7 +522,8 @@ uint64_t cpu_ppc_load_tbl (CPUPPCState *env) return env->spr[SPR_TBL]; } - tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset); + tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + tb_env->tb_offset); trace_ppc_tb_load(tb); return tb; @@ -508,7 +534,8 @@ static inline uint32_t _cpu_ppc_load_tbu(CPUPPCState *env) ppc_tb_t *tb_env = env->tb_env; uint64_t tb; - tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset); + tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + tb_env->tb_offset); trace_ppc_tb_load(tb); return tb >> 32; @@ -526,8 +553,7 @@ uint32_t cpu_ppc_load_tbu (CPUPPCState *env) static inline void cpu_ppc_store_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t *tb_offsetp, uint64_t value) { - *tb_offsetp = value - - muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND); + *tb_offsetp = value - ns_to_tb(tb_env->tb_freq, vmclk); trace_ppc_tb_store(value, *tb_offsetp); } @@ -565,7 +591,8 @@ uint64_t cpu_ppc_load_atbl (CPUPPCState *env) ppc_tb_t *tb_env = env->tb_env; uint64_t tb; - tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset); + tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + tb_env->atb_offset); trace_ppc_tb_load(tb); return tb; @@ -576,7 +603,8 @@ uint32_t cpu_ppc_load_atbu (CPUPPCState *env) ppc_tb_t *tb_env = env->tb_env; uint64_t tb; - tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset); + tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + tb_env->atb_offset); trace_ppc_tb_load(tb); return tb >> 32; @@ -683,64 +711,77 @@ bool ppc_decr_clear_on_delivery(CPUPPCState *env) return ((tb_env->flags & flags) == PPC_DECR_UNDERFLOW_TRIGGERED); } -static inline int64_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next) +static inline int64_t __cpu_ppc_load_decr(CPUPPCState *env, int64_t now, + uint64_t next) { ppc_tb_t *tb_env = env->tb_env; - int64_t decr, diff; + uint64_t n; + int64_t decr; - diff = next - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - if (diff >= 0) { - decr = muldiv64(diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND); - } else if (tb_env->flags & PPC_TIMER_BOOKE) { + n = ns_to_tb(tb_env->decr_freq, now); + if (next > n && tb_env->flags & PPC_TIMER_BOOKE) { decr = 0; - } else { - decr = -muldiv64(-diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND); + } else { + decr = next - n; } + trace_ppc_decr_load(decr); return decr; } -target_ulong cpu_ppc_load_decr(CPUPPCState *env) +static target_ulong _cpu_ppc_load_decr(CPUPPCState *env, int64_t now) { ppc_tb_t *tb_env = env->tb_env; uint64_t decr; - if (kvm_enabled()) { - return env->spr[SPR_DECR]; - } - - decr = _cpu_ppc_load_decr(env, tb_env->decr_next); + decr = __cpu_ppc_load_decr(env, now, tb_env->decr_next); /* * If large decrementer is enabled then the decrementer is signed extened * to 64 bits, otherwise it is a 32 bit value. */ if (env->spr[SPR_LPCR] & LPCR_LD) { - return decr; + PowerPCCPU *cpu = env_archcpu(env); + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + return sextract64(decr, 0, pcc->lrg_decr_bits); } return (uint32_t) decr; } -target_ulong cpu_ppc_load_hdecr(CPUPPCState *env) +target_ulong cpu_ppc_load_decr(CPUPPCState *env) +{ + if (kvm_enabled()) { + return env->spr[SPR_DECR]; + } else { + return _cpu_ppc_load_decr(env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); + } +} + +static target_ulong _cpu_ppc_load_hdecr(CPUPPCState *env, int64_t now) { PowerPCCPU *cpu = env_archcpu(env); PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); ppc_tb_t *tb_env = env->tb_env; uint64_t hdecr; - hdecr = _cpu_ppc_load_decr(env, tb_env->hdecr_next); + hdecr = __cpu_ppc_load_decr(env, now, tb_env->hdecr_next); /* * If we have a large decrementer (POWER9 or later) then hdecr is sign * extended to 64 bits, otherwise it is 32 bits. */ if (pcc->lrg_decr_bits > 32) { - return hdecr; + return sextract64(hdecr, 0, pcc->lrg_decr_bits); } return (uint32_t) hdecr; } +target_ulong cpu_ppc_load_hdecr(CPUPPCState *env) +{ + return _cpu_ppc_load_hdecr(env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); +} + uint64_t cpu_ppc_load_purr (CPUPPCState *env) { ppc_tb_t *tb_env = env->tb_env; @@ -785,7 +826,7 @@ static inline void cpu_ppc_hdecr_lower(PowerPCCPU *cpu) ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 0); } -static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp, +static void __cpu_ppc_store_decr(PowerPCCPU *cpu, int64_t now, uint64_t *nextp, QEMUTimer *timer, void (*raise_excp)(void *), void (*lower_excp)(PowerPCCPU *), @@ -794,7 +835,7 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp, { CPUPPCState *env = &cpu->env; ppc_tb_t *tb_env = env->tb_env; - uint64_t now, next; + uint64_t next; int64_t signed_value; int64_t signed_decr; @@ -806,10 +847,14 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp, trace_ppc_decr_store(nr_bits, decr, value); - if (kvm_enabled()) { - /* KVM handles decrementer exceptions, we don't need our own timer */ - return; - } + /* + * Calculate the next decrementer event and set a timer. + * decr_next is in timebase units to keep rounding simple. Note it is + * not adjusted by tb_offset because if TB changes via tb_offset changing, + * decrementer does not change, so not directly comparable with TB. + */ + next = ns_to_tb(tb_env->decr_freq, now) + value; + *nextp = next; /* nextp is in timebase units */ /* * Going from 1 -> 0 or 0 -> -1 is the event to generate a DEC interrupt. @@ -832,21 +877,17 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp, (*lower_excp)(cpu); } - /* Calculate the next timer event */ - now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - next = now + muldiv64(value, NANOSECONDS_PER_SECOND, tb_env->decr_freq); - *nextp = next; - /* Adjust timer */ - timer_mod(timer, next); + timer_mod(timer, tb_to_ns_round_up(tb_env->decr_freq, next)); } -static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, target_ulong decr, - target_ulong value, int nr_bits) +static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, int64_t now, + target_ulong decr, target_ulong value, + int nr_bits) { ppc_tb_t *tb_env = cpu->env.tb_env; - __cpu_ppc_store_decr(cpu, &tb_env->decr_next, tb_env->decr_timer, + __cpu_ppc_store_decr(cpu, now, &tb_env->decr_next, tb_env->decr_timer, tb_env->decr_timer->cb, &cpu_ppc_decr_lower, tb_env->flags, decr, value, nr_bits); } @@ -855,13 +896,22 @@ void cpu_ppc_store_decr(CPUPPCState *env, target_ulong value) { PowerPCCPU *cpu = env_archcpu(env); PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + int64_t now; + target_ulong decr; int nr_bits = 32; + if (kvm_enabled()) { + /* KVM handles decrementer exceptions, we don't need our own timer */ + return; + } + if (env->spr[SPR_LPCR] & LPCR_LD) { nr_bits = pcc->lrg_decr_bits; } - _cpu_ppc_store_decr(cpu, cpu_ppc_load_decr(env), value, nr_bits); + now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + decr = _cpu_ppc_load_decr(env, now); + _cpu_ppc_store_decr(cpu, now, decr, value, nr_bits); } static void cpu_ppc_decr_cb(void *opaque) @@ -871,14 +921,15 @@ static void cpu_ppc_decr_cb(void *opaque) cpu_ppc_decr_excp(cpu); } -static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, target_ulong hdecr, - target_ulong value, int nr_bits) +static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, int64_t now, + target_ulong hdecr, target_ulong value, + int nr_bits) { ppc_tb_t *tb_env = cpu->env.tb_env; if (tb_env->hdecr_timer != NULL) { /* HDECR (Book3S 64bit) is edge-based, not level like DECR */ - __cpu_ppc_store_decr(cpu, &tb_env->hdecr_next, tb_env->hdecr_timer, + __cpu_ppc_store_decr(cpu, now, &tb_env->hdecr_next, tb_env->hdecr_timer, tb_env->hdecr_timer->cb, &cpu_ppc_hdecr_lower, PPC_DECR_UNDERFLOW_TRIGGERED, hdecr, value, nr_bits); @@ -889,9 +940,12 @@ void cpu_ppc_store_hdecr(CPUPPCState *env, target_ulong value) { PowerPCCPU *cpu = env_archcpu(env); PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + int64_t now; + target_ulong hdecr; - _cpu_ppc_store_hdecr(cpu, cpu_ppc_load_hdecr(env), value, - pcc->lrg_decr_bits); + now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + hdecr = _cpu_ppc_load_hdecr(env, now); + _cpu_ppc_store_hdecr(cpu, now, hdecr, value, pcc->lrg_decr_bits); } static void cpu_ppc_hdecr_cb(void *opaque) @@ -901,29 +955,16 @@ static void cpu_ppc_hdecr_cb(void *opaque) cpu_ppc_hdecr_excp(cpu); } -void cpu_ppc_store_purr(CPUPPCState *env, uint64_t value) +static void _cpu_ppc_store_purr(CPUPPCState *env, int64_t now, uint64_t value) { ppc_tb_t *tb_env = env->tb_env; - cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - &tb_env->purr_offset, value); + cpu_ppc_store_tb(tb_env, now, &tb_env->purr_offset, value); } -static void cpu_ppc_set_tb_clk (void *opaque, uint32_t freq) +void cpu_ppc_store_purr(CPUPPCState *env, uint64_t value) { - CPUPPCState *env = opaque; - PowerPCCPU *cpu = env_archcpu(env); - ppc_tb_t *tb_env = env->tb_env; - - tb_env->tb_freq = freq; - tb_env->decr_freq = freq; - /* There is a bug in Linux 2.4 kernels: - * if a decrementer exception is pending when it enables msr_ee at startup, - * it's not ready to handle it... - */ - _cpu_ppc_store_decr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32); - _cpu_ppc_store_hdecr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32); - cpu_ppc_store_purr(env, 0x0000000000000000ULL); + _cpu_ppc_store_purr(env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), value); } static void timebase_save(PPCTimebase *tb) @@ -936,8 +977,14 @@ static void timebase_save(PPCTimebase *tb) return; } - /* not used anymore, we keep it for compatibility */ - tb->time_of_the_day_ns = qemu_clock_get_ns(QEMU_CLOCK_HOST); + if (replay_mode == REPLAY_MODE_NONE) { + /* not used anymore, we keep it for compatibility */ + tb->time_of_the_day_ns = qemu_clock_get_ns(QEMU_CLOCK_HOST); + } else { + /* simpler for record-replay to avoid this event, compat not needed */ + tb->time_of_the_day_ns = 0; + } + /* * tb_offset is only expected to be changed by QEMU so * there is no need to update it from KVM here @@ -1027,7 +1074,7 @@ const VMStateDescription vmstate_ppc_timebase = { }; /* Set up (once) timebase frequency (in Hz) */ -clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq) +void cpu_ppc_tb_init(CPUPPCState *env, uint32_t freq) { PowerPCCPU *cpu = env_archcpu(env); ppc_tb_t *tb_env; @@ -1040,16 +1087,41 @@ clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq) tb_env->flags |= PPC_DECR_UNDERFLOW_LEVEL; } /* Create new timer */ - tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_decr_cb, cpu); + tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + &cpu_ppc_decr_cb, cpu); if (env->has_hv_mode && !cpu->vhyp) { - tb_env->hdecr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_hdecr_cb, - cpu); + tb_env->hdecr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + &cpu_ppc_hdecr_cb, cpu); } else { tb_env->hdecr_timer = NULL; } - cpu_ppc_set_tb_clk(env, freq); - return &cpu_ppc_set_tb_clk; + tb_env->tb_freq = freq; + tb_env->decr_freq = freq; +} + +void cpu_ppc_tb_reset(CPUPPCState *env) +{ + PowerPCCPU *cpu = env_archcpu(env); + ppc_tb_t *tb_env = env->tb_env; + + timer_del(tb_env->decr_timer); + ppc_set_irq(cpu, PPC_INTERRUPT_DECR, 0); + tb_env->decr_next = 0; + if (tb_env->hdecr_timer != NULL) { + timer_del(tb_env->hdecr_timer); + ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 0); + tb_env->hdecr_next = 0; + } + + /* + * There is a bug in Linux 2.4 kernels: + * if a decrementer exception is pending when it enables msr_ee at startup, + * it's not ready to handle it... + */ + cpu_ppc_store_decr(env, -1); + cpu_ppc_store_hdecr(env, -1); + cpu_ppc_store_purr(env, 0x0000000000000000ULL); } void cpu_ppc_tb_free(CPUPPCState *env) @@ -1125,9 +1197,7 @@ static void cpu_4xx_fit_cb (void *opaque) /* Cannot occur, but makes gcc happy */ return; } - next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->tb_freq); - if (next == now) - next++; + next = now + tb_to_ns_round_up(tb_env->tb_freq, next); timer_mod(ppc40x_timer->fit_timer, next); env->spr[SPR_40x_TSR] |= 1 << 26; if ((env->spr[SPR_40x_TCR] >> 23) & 0x1) { @@ -1153,14 +1223,15 @@ static void start_stop_pit (CPUPPCState *env, ppc_tb_t *tb_env, int is_excp) } else { trace_ppc4xx_pit_start(ppc40x_timer->pit_reload); now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - next = now + muldiv64(ppc40x_timer->pit_reload, - NANOSECONDS_PER_SECOND, tb_env->decr_freq); - if (is_excp) - next += tb_env->decr_next - now; - if (next == now) - next++; + + if (is_excp) { + tb_env->decr_next += ppc40x_timer->pit_reload; + } else { + tb_env->decr_next = ns_to_tb(tb_env->decr_freq, now) + + ppc40x_timer->pit_reload; + } + next = tb_to_ns_round_up(tb_env->decr_freq, tb_env->decr_next); timer_mod(tb_env->decr_timer, next); - tb_env->decr_next = next; } } @@ -1213,9 +1284,7 @@ static void cpu_4xx_wdt_cb (void *opaque) /* Cannot occur, but makes gcc happy */ return; } - next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->decr_freq); - if (next == now) - next++; + next = now + tb_to_ns_round_up(tb_env->decr_freq, next); trace_ppc4xx_wdt(env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]); switch ((env->spr[SPR_40x_TSR] >> 30) & 0x3) { case 0x0: @@ -1465,5 +1534,7 @@ void ppc_irq_reset(PowerPCCPU *cpu) CPUPPCState *env = &cpu->env; env->irq_input_state = 0; - kvmppc_set_interrupt(cpu, PPC_INTERRUPT_EXT, 0); + if (kvm_enabled()) { + kvmppc_set_interrupt(cpu, PPC_INTERRUPT_EXT, 0); + } } diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c index d9231c7317..f6fd35fcb9 100644 --- a/hw/ppc/prep.c +++ b/hw/ppc/prep.c @@ -67,6 +67,7 @@ static void ppc_prep_reset(void *opaque) PowerPCCPU *cpu = opaque; cpu_reset(CPU(cpu)); + cpu_ppc_tb_reset(&cpu->env); } diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 07e91e3800..f7cc6a890f 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1022,7 +1022,6 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset) { MachineState *machine = MACHINE(spapr); SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); - uint8_t rng_seed[32]; int chosen; _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen")); @@ -1100,8 +1099,7 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset) spapr_dt_ov5_platform_support(spapr, fdt, chosen); } - qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed)); - _FDT(fdt_setprop(fdt, chosen, "rng-seed", rng_seed, sizeof(rng_seed))); + _FDT(fdt_setprop(fdt, chosen, "rng-seed", spapr->fdt_rng_seed, 32)); _FDT(spapr_dt_ovec(fdt, chosen, spapr->ov5_cas, "ibm,architecture-vec-5")); } @@ -1322,6 +1320,22 @@ void spapr_set_all_lpcrs(target_ulong value, target_ulong mask) } } +/* May be used when the machine is not running */ +void spapr_init_all_lpcrs(target_ulong value, target_ulong mask) +{ + CPUState *cs; + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + target_ulong lpcr; + + lpcr = env->spr[SPR_LPCR]; + lpcr &= ~(LPCR_HR | LPCR_UPRT); + ppc_store_lpcr(cpu, lpcr); + } +} + + static bool spapr_get_pate(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu, target_ulong lpid, ppc_v3_pate_t *entry) { @@ -1583,7 +1597,7 @@ int spapr_reallocate_hpt(SpaprMachineState *spapr, int shift, Error **errp) } /* We're setting up a hash table, so that means we're not radix */ spapr->patb_entry = 0; - spapr_set_all_lpcrs(0, LPCR_HR | LPCR_UPRT); + spapr_init_all_lpcrs(0, LPCR_HR | LPCR_UPRT); return 0; } @@ -1638,6 +1652,14 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) void *fdt; int rc; + if (reason != SHUTDOWN_CAUSE_SNAPSHOT_LOAD) { + /* + * Record-replay snapshot load must not consume random, this was + * already replayed from initial machine reset. + */ + qemu_guest_getrandom_nofail(spapr->fdt_rng_seed, 32); + } + pef_kvm_reset(machine->cgs, &error_fatal); spapr_caps_apply(spapr); @@ -1661,7 +1683,7 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) spapr_ovec_cleanup(spapr->ov5_cas); spapr->ov5_cas = spapr_ovec_new(); - ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal); + ppc_init_compat_all(spapr->max_compat_pvr, &error_fatal); /* * This is fixing some of the default configuration of the XIVE diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index b482d9754a..91fae56573 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -74,6 +74,8 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu) kvm_check_mmu(cpu, &error_fatal); + cpu_ppc_tb_reset(env); + spapr_irq_cpu_intc_reset(spapr, cpu); } diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 9b1f225d4a..b7dc388f2f 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -3,6 +3,7 @@ #include "qapi/error.h" #include "sysemu/hw_accel.h" #include "sysemu/runstate.h" +#include "sysemu/tcg.h" #include "qemu/log.h" #include "qemu/main-loop.h" #include "qemu/module.h" @@ -789,6 +790,54 @@ static target_ulong h_logical_dcbf(PowerPCCPU *cpu, SpaprMachineState *spapr, return H_SUCCESS; } +static target_ulong h_set_mode_resource_set_ciabr(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong mflags, + target_ulong value1, + target_ulong value2) +{ + CPUPPCState *env = &cpu->env; + + assert(tcg_enabled()); /* KVM will have handled this */ + + if (mflags) { + return H_UNSUPPORTED_FLAG; + } + if (value2) { + return H_P4; + } + if ((value1 & PPC_BITMASK(62, 63)) == 0x3) { + return H_P3; + } + + ppc_store_ciabr(env, value1); + + return H_SUCCESS; +} + +static target_ulong h_set_mode_resource_set_dawr0(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong mflags, + target_ulong value1, + target_ulong value2) +{ + CPUPPCState *env = &cpu->env; + + assert(tcg_enabled()); /* KVM will have handled this */ + + if (mflags) { + return H_UNSUPPORTED_FLAG; + } + if (value2 & PPC_BIT(61)) { + return H_P4; + } + + ppc_store_dawr0(env, value1); + ppc_store_dawrx0(env, value2); + + return H_SUCCESS; +} + static target_ulong h_set_mode_resource_le(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong mflags, @@ -858,6 +907,14 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong ret = H_P2; switch (resource) { + case H_SET_MODE_RESOURCE_SET_CIABR: + ret = h_set_mode_resource_set_ciabr(cpu, spapr, args[0], args[2], + args[3]); + break; + case H_SET_MODE_RESOURCE_SET_DAWR0: + ret = h_set_mode_resource_set_dawr0(cpu, spapr, args[0], args[2], + args[3]); + break; case H_SET_MODE_RESOURCE_LE: ret = h_set_mode_resource_le(cpu, spapr, args[0], args[2], args[3]); break; diff --git a/hw/ppc/vof.c b/hw/ppc/vof.c index 18c3f92317..e3b430a81f 100644 --- a/hw/ppc/vof.c +++ b/hw/ppc/vof.c @@ -1024,6 +1024,8 @@ void vof_cleanup(Vof *vof) } vof->claimed = NULL; vof->of_instances = NULL; + vof->of_instance_last = 0; + vof->claimed_base = 0; } void vof_build_dt(void *fdt, Vof *vof) diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h index e095c002dc..17a8dfc107 100644 --- a/include/hw/ppc/ppc.h +++ b/include/hw/ppc/ppc.h @@ -54,7 +54,8 @@ struct ppc_tb_t { */ uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t tb_offset); -clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq); +void cpu_ppc_tb_init(CPUPPCState *env, uint32_t freq); +void cpu_ppc_tb_reset(CPUPPCState *env); void cpu_ppc_tb_free(CPUPPCState *env); void cpu_ppc_hdecr_init(CPUPPCState *env); void cpu_ppc_hdecr_exit(CPUPPCState *env); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 538b2dfb89..f4bd204d86 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -204,6 +204,7 @@ struct SpaprMachineState { uint32_t fdt_size; uint32_t fdt_initial_size; void *fdt_blob; + uint8_t fdt_rng_seed[32]; long kernel_size; bool kernel_le; uint64_t kernel_addr; @@ -1012,6 +1013,7 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, #define SPAPR_OV5_XIVE_BOTH 0x80 /* Only to advertise on the platform */ void spapr_set_all_lpcrs(target_ulong value, target_ulong mask); +void spapr_init_all_lpcrs(target_ulong value, target_ulong mask); hwaddr spapr_get_rtas_addr(void); bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr); diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h index 9f580a2699..f120874e0f 100644 --- a/include/hw/ppc/xive.h +++ b/include/hw/ppc/xive.h @@ -401,6 +401,7 @@ struct XiveRouterClass { int (*write_nvt)(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx, XiveNVT *nvt, uint8_t word_number); uint8_t (*get_block_id)(XiveRouter *xrtr); + void (*end_notify)(XiveRouter *xrtr, XiveEAS *eas); }; int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx, @@ -414,6 +415,7 @@ int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx, int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx, XiveNVT *nvt, uint8_t word_number); void xive_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked); +void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas); /* * XIVE Presenter diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h index 011618373e..ead97d354d 100644 --- a/include/qemu/host-utils.h +++ b/include/qemu/host-utils.h @@ -56,6 +56,11 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) return (__int128_t)a * b / c; } +static inline uint64_t muldiv64_round_up(uint64_t a, uint32_t b, uint32_t c) +{ + return ((__int128_t)a * b + c - 1) / c; +} + static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor) { @@ -83,7 +88,8 @@ void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b); uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor); int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor); -static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) +static inline uint64_t muldiv64_rounding(uint64_t a, uint32_t b, uint32_t c, + bool round_up) { union { uint64_t ll; @@ -99,12 +105,25 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) u.ll = a; rl = (uint64_t)u.l.low * (uint64_t)b; + if (round_up) { + rl += c - 1; + } rh = (uint64_t)u.l.high * (uint64_t)b; rh += (rl >> 32); res.l.high = rh / c; res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c; return res.ll; } + +static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) +{ + return muldiv64_rounding(a, b, c, false); +} + +static inline uint64_t muldiv64_round_up(uint64_t a, uint32_t b, uint32_t c) +{ + return muldiv64_rounding(a, b, c, true); +} #endif /** diff --git a/target/ppc/compat.c b/target/ppc/compat.c index 7949a24f5a..ebef2cccec 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c @@ -229,6 +229,25 @@ int ppc_set_compat_all(uint32_t compat_pvr, Error **errp) return 0; } +/* To be used when the machine is not running */ +int ppc_init_compat_all(uint32_t compat_pvr, Error **errp) +{ + CPUState *cs; + + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + int ret; + + ret = ppc_set_compat(cpu, compat_pvr, errp); + + if (ret < 0) { + return ret; + } + } + + return 0; +} + int ppc_compat_max_vthreads(PowerPCCPU *cpu) { const CompatInfo *compat = compat_by_pvr(cpu->compat_pvr); diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c index 424f2e1741..e3ad8e0c27 100644 --- a/target/ppc/cpu.c +++ b/target/ppc/cpu.c @@ -59,6 +59,7 @@ void ppc_store_vscr(CPUPPCState *env, uint32_t vscr) env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT); env->vscr_sat.u64[1] = 0; set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status); + set_flush_inputs_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status); } uint32_t ppc_get_vscr(CPUPPCState *env) @@ -102,6 +103,92 @@ void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val) ppc_maybe_interrupt(env); } + +#if defined(TARGET_PPC64) +void ppc_update_ciabr(CPUPPCState *env) +{ + CPUState *cs = env_cpu(env); + target_ulong ciabr = env->spr[SPR_CIABR]; + target_ulong ciea, priv; + + ciea = ciabr & PPC_BITMASK(0, 61); + priv = ciabr & PPC_BITMASK(62, 63); + + if (env->ciabr_breakpoint) { + cpu_breakpoint_remove_by_ref(cs, env->ciabr_breakpoint); + env->ciabr_breakpoint = NULL; + } + + if (priv) { + cpu_breakpoint_insert(cs, ciea, BP_CPU, &env->ciabr_breakpoint); + } +} + +void ppc_store_ciabr(CPUPPCState *env, target_ulong val) +{ + env->spr[SPR_CIABR] = val; + ppc_update_ciabr(env); +} + +void ppc_update_daw0(CPUPPCState *env) +{ + CPUState *cs = env_cpu(env); + target_ulong deaw = env->spr[SPR_DAWR0] & PPC_BITMASK(0, 60); + uint32_t dawrx = env->spr[SPR_DAWRX0]; + int mrd = extract32(dawrx, PPC_BIT_NR(48), 54 - 48); + bool dw = extract32(dawrx, PPC_BIT_NR(57), 1); + bool dr = extract32(dawrx, PPC_BIT_NR(58), 1); + bool hv = extract32(dawrx, PPC_BIT_NR(61), 1); + bool sv = extract32(dawrx, PPC_BIT_NR(62), 1); + bool pr = extract32(dawrx, PPC_BIT_NR(62), 1); + vaddr len; + int flags; + + if (env->dawr0_watchpoint) { + cpu_watchpoint_remove_by_ref(cs, env->dawr0_watchpoint); + env->dawr0_watchpoint = NULL; + } + + if (!dr && !dw) { + return; + } + + if (!hv && !sv && !pr) { + return; + } + + len = (mrd + 1) * 8; + flags = BP_CPU | BP_STOP_BEFORE_ACCESS; + if (dr) { + flags |= BP_MEM_READ; + } + if (dw) { + flags |= BP_MEM_WRITE; + } + + cpu_watchpoint_insert(cs, deaw, len, flags, &env->dawr0_watchpoint); +} + +void ppc_store_dawr0(CPUPPCState *env, target_ulong val) +{ + env->spr[SPR_DAWR0] = val; + ppc_update_daw0(env); +} + +void ppc_store_dawrx0(CPUPPCState *env, uint32_t val) +{ + int hrammc = extract32(val, PPC_BIT_NR(56), 1); + + if (hrammc) { + /* This might be done with a second watchpoint at the xor of DEAW[0] */ + qemu_log_mask(LOG_UNIMP, "%s: DAWRX0[HRAMMC] is unimplemented\n", + __func__); + } + + env->spr[SPR_DAWRX0] = val; + ppc_update_daw0(env); +} +#endif #endif static inline void fpscr_set_rounding_mode(CPUPPCState *env) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 25fac9577a..173e4c351a 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1121,7 +1121,9 @@ struct CPUArchState { target_ulong reserve_addr; /* Reservation address */ target_ulong reserve_length; /* Reservation larx op size (bytes) */ target_ulong reserve_val; /* Reservation value */ +#if defined(TARGET_PPC64) target_ulong reserve_val2; +#endif /* These are used in supervisor mode only */ target_ulong msr; /* machine state register */ @@ -1137,6 +1139,8 @@ struct CPUArchState { /* MMU context, only relevant for full system emulation */ #if defined(TARGET_PPC64) ppc_slb_t slb[MAX_SLB_ENTRIES]; /* PowerPC 64 SLB area */ + struct CPUBreakpoint *ciabr_breakpoint; + struct CPUWatchpoint *dawr0_watchpoint; #endif target_ulong sr[32]; /* segment registers */ uint32_t nb_BATs; /* number of BATs */ @@ -1403,6 +1407,11 @@ void ppc_translate_init(void); #if !defined(CONFIG_USER_ONLY) void ppc_store_sdr1(CPUPPCState *env, target_ulong value); void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val); +void ppc_update_ciabr(CPUPPCState *env); +void ppc_store_ciabr(CPUPPCState *env, target_ulong value); +void ppc_update_daw0(CPUPPCState *env); +void ppc_store_dawr0(CPUPPCState *env, target_ulong value); +void ppc_store_dawrx0(CPUPPCState *env, uint32_t value); #endif /* !defined(CONFIG_USER_ONLY) */ void ppc_store_msr(CPUPPCState *env, target_ulong value); @@ -1495,6 +1504,7 @@ int ppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr, Error **errp); #if !defined(CONFIG_USER_ONLY) int ppc_set_compat_all(uint32_t compat_pvr, Error **errp); +int ppc_init_compat_all(uint32_t compat_pvr, Error **errp); #endif int ppc_compat_max_vthreads(PowerPCCPU *cpu); void ppc_compat_add_property(Object *obj, const char *name, @@ -1897,7 +1907,9 @@ void ppc_compat_add_property(Object *obj, const char *name, #define SPR_PSSCR (0x357) #define SPR_440_INV0 (0x370) #define SPR_440_INV1 (0x371) +#define SPR_TRIG1 (0x371) #define SPR_440_INV2 (0x372) +#define SPR_TRIG2 (0x372) #define SPR_440_INV3 (0x373) #define SPR_440_ITV0 (0x374) #define SPR_440_ITV1 (0x375) diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 02b7aad9b0..7ab5ee92d9 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -5117,17 +5117,17 @@ static void register_book3s_207_dbg_sprs(CPUPPCState *env) spr_register_kvm_hv(env, SPR_DAWR0, "DAWR0", SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic, + &spr_read_generic, &spr_write_dawr0, KVM_REG_PPC_DAWR, 0x00000000); spr_register_kvm_hv(env, SPR_DAWRX0, "DAWRX0", SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic32, + &spr_read_generic, &spr_write_dawrx0, KVM_REG_PPC_DAWRX, 0x00000000); spr_register_kvm_hv(env, SPR_CIABR, "CIABR", SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic, + &spr_read_generic, &spr_write_ciabr, KVM_REG_PPC_CIABR, 0x00000000); } @@ -5660,6 +5660,16 @@ static void register_power_common_book4_sprs(CPUPPCState *env) SPR_NOACCESS, SPR_NOACCESS, &spr_read_tfmr, &spr_write_tfmr, 0x00000000); + spr_register_hv(env, SPR_TRIG1, "TRIG1", + SPR_NOACCESS, SPR_NOACCESS, + &spr_access_nop, &spr_write_generic, + &spr_access_nop, &spr_write_generic, + 0x00000000); + spr_register_hv(env, SPR_TRIG2, "TRIG2", + SPR_NOACCESS, SPR_NOACCESS, + &spr_access_nop, &spr_write_generic, + &spr_access_nop, &spr_write_generic, + 0x00000000); #endif } @@ -7149,6 +7159,8 @@ static void ppc_cpu_reset_hold(Object *obj) env->nip = env->hreset_vector | env->excp_prefix; if (tcg_enabled()) { + cpu_breakpoint_remove_all(s, BP_CPU); + cpu_watchpoint_remove_all(s, BP_CPU); if (env->mmu_model != POWERPC_MMU_REAL) { ppc_tlb_invalidate_all(env); } @@ -7336,6 +7348,9 @@ static const struct TCGCPUOps ppc_tcg_ops = { .cpu_exec_exit = ppc_cpu_exec_exit, .do_unaligned_access = ppc_cpu_do_unaligned_access, .do_transaction_failed = ppc_cpu_do_transaction_failed, + .debug_excp_handler = ppc_cpu_debug_excp_handler, + .debug_check_breakpoint = ppc_cpu_debug_check_breakpoint, + .debug_check_watchpoint = ppc_cpu_debug_check_watchpoint, #endif /* !CONFIG_USER_ONLY */ }; #endif /* CONFIG_TCG */ diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 9aa8e46566..72ec2be92e 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -843,6 +843,7 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp) PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); vhc->hypercall(cpu->vhyp, cpu); + powerpc_reset_excp_state(cpu); return; } @@ -1014,6 +1015,7 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp) PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); vhc->hypercall(cpu->vhyp, cpu); + powerpc_reset_excp_state(cpu); return; } @@ -1526,6 +1528,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); vhc->hypercall(cpu->vhyp, cpu); + powerpc_reset_excp_state(cpu); return; } if (env->insns_flags2 & PPC2_ISA310) { @@ -1571,9 +1574,11 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) } } break; + case POWERPC_EXCP_TRACE: /* Trace exception */ + msr |= env->error_code; + /* fall through */ case POWERPC_EXCP_DSEG: /* Data segment exception */ case POWERPC_EXCP_ISEG: /* Instruction segment exception */ - case POWERPC_EXCP_TRACE: /* Trace exception */ case POWERPC_EXCP_SDOOR: /* Doorbell interrupt */ case POWERPC_EXCP_PERFM: /* Performance monitor interrupt */ break; @@ -3168,6 +3173,18 @@ void helper_book3s_msgsndp(CPUPPCState *env, target_ulong rb) } #endif /* TARGET_PPC64 */ +/* Single-step tracing */ +void helper_book3s_trace(CPUPPCState *env, target_ulong prev_ip) +{ + uint32_t error_code = 0; + if (env->insns_flags2 & PPC2_ISA207S) { + /* Load/store reporting, SRR1[35, 36] and SDAR, are not implemented. */ + env->spr[SPR_POWER_SIAR] = prev_ip; + error_code = PPC_BIT(33); + } + raise_exception_err(env, POWERPC_EXCP_TRACE, error_code); +} + void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) @@ -3243,5 +3260,97 @@ void ppc_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, cs->exception_index = POWERPC_EXCP_MCHECK; cpu_loop_exit_restore(cs, retaddr); } + +void ppc_cpu_debug_excp_handler(CPUState *cs) +{ +#if defined(TARGET_PPC64) + CPUPPCState *env = cs->env_ptr; + + if (env->insns_flags2 & PPC2_ISA207S) { + if (cs->watchpoint_hit) { + if (cs->watchpoint_hit->flags & BP_CPU) { + env->spr[SPR_DAR] = cs->watchpoint_hit->hitaddr; + env->spr[SPR_DSISR] = PPC_BIT(41); + cs->watchpoint_hit = NULL; + raise_exception(env, POWERPC_EXCP_DSI); + } + cs->watchpoint_hit = NULL; + } else if (cpu_breakpoint_test(cs, env->nip, BP_CPU)) { + raise_exception_err(env, POWERPC_EXCP_TRACE, + PPC_BIT(33) | PPC_BIT(43)); + } + } +#endif +} + +bool ppc_cpu_debug_check_breakpoint(CPUState *cs) +{ +#if defined(TARGET_PPC64) + CPUPPCState *env = cs->env_ptr; + + if (env->insns_flags2 & PPC2_ISA207S) { + target_ulong priv; + + priv = env->spr[SPR_CIABR] & PPC_BITMASK(62, 63); + switch (priv) { + case 0x1: /* problem */ + return env->msr & ((target_ulong)1 << MSR_PR); + case 0x2: /* supervisor */ + return (!(env->msr & ((target_ulong)1 << MSR_PR)) && + !(env->msr & ((target_ulong)1 << MSR_HV))); + case 0x3: /* hypervisor */ + return (!(env->msr & ((target_ulong)1 << MSR_PR)) && + (env->msr & ((target_ulong)1 << MSR_HV))); + default: + g_assert_not_reached(); + } + } +#endif + + return false; +} + +bool ppc_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp) +{ +#if defined(TARGET_PPC64) + CPUPPCState *env = cs->env_ptr; + + if (env->insns_flags2 & PPC2_ISA207S) { + if (wp == env->dawr0_watchpoint) { + uint32_t dawrx = env->spr[SPR_DAWRX0]; + bool wt = extract32(dawrx, PPC_BIT_NR(59), 1); + bool wti = extract32(dawrx, PPC_BIT_NR(60), 1); + bool hv = extract32(dawrx, PPC_BIT_NR(61), 1); + bool sv = extract32(dawrx, PPC_BIT_NR(62), 1); + bool pr = extract32(dawrx, PPC_BIT_NR(62), 1); + + if ((env->msr & ((target_ulong)1 << MSR_PR)) && !pr) { + return false; + } else if ((env->msr & ((target_ulong)1 << MSR_HV)) && !hv) { + return false; + } else if (!sv) { + return false; + } + + if (!wti) { + if (env->msr & ((target_ulong)1 << MSR_DR)) { + if (!wt) { + return false; + } + } else { + if (wt) { + return false; + } + } + } + + return true; + } + } +#endif + + return false; +} + #endif /* CONFIG_TCG */ #endif /* !CONFIG_USER_ONLY */ diff --git a/target/ppc/helper.h b/target/ppc/helper.h index abec6fe341..86f97ee1e7 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -25,6 +25,9 @@ DEF_HELPER_1(hrfid, void, env) DEF_HELPER_2(rfebb, void, env, tl) DEF_HELPER_2(store_lpcr, void, env, tl) DEF_HELPER_2(store_pcr, void, env, tl) +DEF_HELPER_2(store_ciabr, void, env, tl) +DEF_HELPER_2(store_dawr0, void, env, tl) +DEF_HELPER_2(store_dawrx0, void, env, tl) DEF_HELPER_2(store_mmcr0, void, env, tl) DEF_HELPER_2(store_mmcr1, void, env, tl) DEF_HELPER_3(store_pmc, void, env, i32, i64) @@ -32,6 +35,7 @@ DEF_HELPER_2(read_pmc, tl, env, i32) DEF_HELPER_2(insns_inc, void, env, i32) DEF_HELPER_1(handle_pmc5_overflow, void, env) #endif +DEF_HELPER_2(book3s_trace, void, env, tl) DEF_HELPER_1(check_tlb_flush_local, void, env) DEF_HELPER_1(check_tlb_flush_global, void, env) #endif diff --git a/target/ppc/internal.h b/target/ppc/internal.h index 57acb3212c..15803bc313 100644 --- a/target/ppc/internal.h +++ b/target/ppc/internal.h @@ -301,6 +301,9 @@ void ppc_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, MMUAccessType access_type, int mmu_idx, MemTxAttrs attrs, MemTxResult response, uintptr_t retaddr); +void ppc_cpu_debug_excp_handler(CPUState *cs); +bool ppc_cpu_debug_check_breakpoint(CPUState *cs); +bool ppc_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp); #endif FIELD(GER_MSK, XMSK, 0, 4) diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 7698501743..51112bd367 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -1320,7 +1320,7 @@ int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) return 0; } - if (!kvm_enabled() || !cap_interrupt_unset) { + if (!cap_interrupt_unset) { return 0; } diff --git a/target/ppc/machine.c b/target/ppc/machine.c index 1270a1f7fc..68cbdffecd 100644 --- a/target/ppc/machine.c +++ b/target/ppc/machine.c @@ -9,6 +9,7 @@ #include "qapi/error.h" #include "kvm_ppc.h" #include "power8-pmu.h" +#include "sysemu/replay.h" static void post_load_update_msr(CPUPPCState *env) { @@ -208,6 +209,14 @@ static int cpu_pre_save(void *opaque) /* Used to retain migration compatibility for pre 6.0 for 601 machines. */ env->hflags_compat_nmsr = 0; + if (tcg_enabled()) { + /* + * TCG does not maintain the DECR spr (unlike KVM) so have to save + * it here. + */ + env->spr[SPR_DECR] = cpu_ppc_load_decr(env); + } + return 0; } @@ -313,6 +322,17 @@ static int cpu_post_load(void *opaque, int version_id) post_load_update_msr(env); if (tcg_enabled()) { + /* Re-set breaks based on regs */ +#if defined(TARGET_PPC64) + ppc_update_ciabr(env); + ppc_update_daw0(env); +#endif + /* + * TCG needs to re-start the decrementer timer and/or raise the + * interrupt. This works for level-triggered decrementer. Edge + * triggered types (including HDEC) would need to carry more state. + */ + cpu_ppc_store_decr(env, env->spr[SPR_DECR]); pmu_mmcr01_updated(env); } @@ -670,6 +690,27 @@ static const VMStateDescription vmstate_compat = { } }; +static bool reservation_needed(void *opaque) +{ + return (replay_mode != REPLAY_MODE_NONE); +} + +static const VMStateDescription vmstate_reservation = { + .name = "cpu/reservation", + .version_id = 1, + .minimum_version_id = 1, + .needed = reservation_needed, + .fields = (VMStateField[]) { + VMSTATE_UINTTL(env.reserve_addr, PowerPCCPU), + VMSTATE_UINTTL(env.reserve_length, PowerPCCPU), + VMSTATE_UINTTL(env.reserve_val, PowerPCCPU), +#if defined(TARGET_PPC64) + VMSTATE_UINTTL(env.reserve_val2, PowerPCCPU), +#endif + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_ppc_cpu = { .name = "cpu", .version_id = 5, @@ -691,8 +732,7 @@ const VMStateDescription vmstate_ppc_cpu = { VMSTATE_UINTTL_ARRAY(env.spr, PowerPCCPU, 1024), VMSTATE_UINT64(env.spe_acc, PowerPCCPU), - /* Reservation */ - VMSTATE_UINTTL(env.reserve_addr, PowerPCCPU), + VMSTATE_UNUSED(sizeof(target_ulong)), /* was env.reserve_addr */ /* Supervisor mode architected state */ VMSTATE_UINTTL(env.msr, PowerPCCPU), @@ -721,6 +761,7 @@ const VMStateDescription vmstate_ppc_cpu = { &vmstate_tlbemb, &vmstate_tlbmas, &vmstate_compat, + &vmstate_reservation, NULL } }; diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index 692d058665..a05bdf78c9 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -199,6 +199,21 @@ void helper_store_pcr(CPUPPCState *env, target_ulong value) env->spr[SPR_PCR] = value & pcc->pcr_mask; } +void helper_store_ciabr(CPUPPCState *env, target_ulong value) +{ + ppc_store_ciabr(env, value); +} + +void helper_store_dawr0(CPUPPCState *env, target_ulong value) +{ + ppc_store_dawr0(env, value); +} + +void helper_store_dawrx0(CPUPPCState *env, target_ulong value) +{ + ppc_store_dawrx0(env, value); +} + /* * DPDES register is shared. Each bit reflects the state of the * doorbell interrupt of a thread of the same core. diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 920084bd8f..5823e039e6 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -219,27 +219,25 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type, return false; } -static void ppc_radix64_set_rc(PowerPCCPU *cpu, MMUAccessType access_type, - uint64_t pte, hwaddr pte_addr, int *prot) +static int ppc_radix64_check_rc(MMUAccessType access_type, uint64_t pte) { - CPUState *cs = CPU(cpu); - uint64_t npte; + switch (access_type) { + case MMU_DATA_STORE: + if (!(pte & R_PTE_C)) { + break; + } + /* fall through */ + case MMU_INST_FETCH: + case MMU_DATA_LOAD: + if (!(pte & R_PTE_R)) { + break; + } - npte = pte | R_PTE_R; /* Always set reference bit */ - - if (access_type == MMU_DATA_STORE) { /* Store/Write */ - npte |= R_PTE_C; /* Set change bit */ - } else { - /* - * Treat the page as read-only for now, so that a later write - * will pass through this function again to set the C bit. - */ - *prot &= ~PAGE_WRITE; + /* R/C bits are already set appropriately for this access */ + return 0; } - if (pte ^ npte) { /* If pte has changed then write it back */ - stq_phys(cs->as, pte_addr, npte); - } + return 1; } static bool ppc_radix64_is_valid_level(int level, int psize, uint64_t nls) @@ -380,7 +378,8 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, ppc_v3_pate_t pate, hwaddr *h_raddr, int *h_prot, int *h_page_size, bool pde_addr, - int mmu_idx, bool guest_visible) + int mmu_idx, uint64_t lpid, + bool guest_visible) { MMUAccessType access_type = orig_access_type; int fault_cause = 0; @@ -418,7 +417,24 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, } if (guest_visible) { - ppc_radix64_set_rc(cpu, access_type, pte, pte_addr, h_prot); + if (ppc_radix64_check_rc(access_type, pte)) { + /* + * Per ISA 3.1 Book III, 7.5.3 and 7.5.5, failure to set R/C during + * partition-scoped translation when effLPID = 0 results in normal + * (non-Hypervisor) Data and Instruction Storage Interrupts + * respectively. + * + * ISA 3.0 is ambiguous about this, but tests on POWER9 hardware + * seem to exhibit the same behavior. + */ + if (lpid > 0) { + ppc_radix64_raise_hsi(cpu, access_type, eaddr, g_raddr, + DSISR_ATOMIC_RC); + } else { + ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_ATOMIC_RC); + } + return 1; + } } return 0; @@ -447,7 +463,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, vaddr eaddr, uint64_t pid, ppc_v3_pate_t pate, hwaddr *g_raddr, int *g_prot, int *g_page_size, - int mmu_idx, bool guest_visible) + int mmu_idx, uint64_t lpid, + bool guest_visible) { CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; @@ -497,7 +514,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr, prtbe_addr, pate, &h_raddr, &h_prot, &h_page_size, true, - 5, guest_visible); + 5, lpid, guest_visible); if (ret) { return ret; } @@ -539,7 +556,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr, pte_addr, pate, &h_raddr, &h_prot, &h_page_size, - true, 5, guest_visible); + true, 5, lpid, + guest_visible); if (ret) { return ret; } @@ -580,7 +598,11 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, } if (guest_visible) { - ppc_radix64_set_rc(cpu, access_type, pte, pte_addr, g_prot); + /* R/C bits not appropriately set for access */ + if (ppc_radix64_check_rc(access_type, pte)) { + ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_ATOMIC_RC); + return 1; + } } return 0; @@ -695,7 +717,8 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr, if (relocation) { int ret = ppc_radix64_process_scoped_xlate(cpu, access_type, eaddr, pid, pate, &g_raddr, &prot, - &psize, mmu_idx, guest_visible); + &psize, mmu_idx, lpid, + guest_visible); if (ret) { return false; } @@ -719,7 +742,8 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr, ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr, g_raddr, pate, raddr, &prot, &psize, false, - mmu_idx, guest_visible); + mmu_idx, lpid, + guest_visible); if (ret) { return false; } diff --git a/target/ppc/spr_common.h b/target/ppc/spr_common.h index 5995070eaf..8a9d6cd994 100644 --- a/target/ppc/spr_common.h +++ b/target/ppc/spr_common.h @@ -159,6 +159,9 @@ void spr_read_mas73(DisasContext *ctx, int gprn, int sprn); #ifdef TARGET_PPC64 void spr_read_cfar(DisasContext *ctx, int gprn, int sprn); void spr_write_cfar(DisasContext *ctx, int sprn, int gprn); +void spr_write_ciabr(DisasContext *ctx, int sprn, int gprn); +void spr_write_dawr0(DisasContext *ctx, int sprn, int gprn); +void spr_write_dawrx0(DisasContext *ctx, int sprn, int gprn); void spr_write_ureg(DisasContext *ctx, int sprn, int gprn); void spr_read_purr(DisasContext *ctx, int gprn, int sprn); void spr_write_purr(DisasContext *ctx, int sprn, int gprn); diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 7111b34030..6b242ae0a6 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -75,7 +75,9 @@ static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32; static TCGv cpu_reserve; static TCGv cpu_reserve_length; static TCGv cpu_reserve_val; +#if defined(TARGET_PPC64) static TCGv cpu_reserve_val2; +#endif static TCGv cpu_fpscr; static TCGv_i32 cpu_access_type; @@ -149,9 +151,11 @@ void ppc_translate_init(void) cpu_reserve_val = tcg_global_mem_new(cpu_env, offsetof(CPUPPCState, reserve_val), "reserve_val"); +#if defined(TARGET_PPC64) cpu_reserve_val2 = tcg_global_mem_new(cpu_env, offsetof(CPUPPCState, reserve_val2), "reserve_val2"); +#endif cpu_fpscr = tcg_global_mem_new(cpu_env, offsetof(CPUPPCState, fpscr), "fpscr"); @@ -336,8 +340,9 @@ static void gen_ppc_maybe_interrupt(DisasContext *ctx) * The exception can be either POWERPC_EXCP_TRACE (on most PowerPCs) or * POWERPC_EXCP_DEBUG (on BookE). */ -static uint32_t gen_prep_dbgex(DisasContext *ctx) +static void gen_debug_exception(DisasContext *ctx, bool rfi_type) { +#if !defined(CONFIG_USER_ONLY) if (ctx->flags & POWERPC_FLAG_DE) { target_ulong dbsr = 0; if (ctx->singlestep_enabled & CPU_SINGLE_STEP) { @@ -350,16 +355,18 @@ static uint32_t gen_prep_dbgex(DisasContext *ctx) gen_load_spr(t0, SPR_BOOKE_DBSR); tcg_gen_ori_tl(t0, t0, dbsr); gen_store_spr(SPR_BOOKE_DBSR, t0); - return POWERPC_EXCP_DEBUG; + gen_helper_raise_exception(cpu_env, + tcg_constant_i32(POWERPC_EXCP_DEBUG)); + ctx->base.is_jmp = DISAS_NORETURN; } else { - return POWERPC_EXCP_TRACE; + if (!rfi_type) { /* BookS does not single step rfi type instructions */ + TCGv t0 = tcg_temp_new(); + tcg_gen_movi_tl(t0, ctx->cia); + gen_helper_book3s_trace(cpu_env, t0); + ctx->base.is_jmp = DISAS_NORETURN; + } } -} - -static void gen_debug_exception(DisasContext *ctx) -{ - gen_helper_raise_exception(cpu_env, tcg_constant_i32(gen_prep_dbgex(ctx))); - ctx->base.is_jmp = DISAS_NORETURN; +#endif } static inline void gen_inval_exception(DisasContext *ctx, uint32_t error) @@ -554,8 +561,9 @@ void spr_write_lr(DisasContext *ctx, int sprn, int gprn) tcg_gen_mov_tl(cpu_lr, cpu_gpr[gprn]); } -/* CFAR */ #if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) +/* Debug facilities */ +/* CFAR */ void spr_read_cfar(DisasContext *ctx, int gprn, int sprn) { tcg_gen_mov_tl(cpu_gpr[gprn], cpu_cfar); @@ -565,6 +573,26 @@ void spr_write_cfar(DisasContext *ctx, int sprn, int gprn) { tcg_gen_mov_tl(cpu_cfar, cpu_gpr[gprn]); } + +/* Breakpoint */ +void spr_write_ciabr(DisasContext *ctx, int sprn, int gprn) +{ + translator_io_start(&ctx->base); + gen_helper_store_ciabr(cpu_env, cpu_gpr[gprn]); +} + +/* Watchpoint */ +void spr_write_dawr0(DisasContext *ctx, int sprn, int gprn) +{ + translator_io_start(&ctx->base); + gen_helper_store_dawr0(cpu_env, cpu_gpr[gprn]); +} + +void spr_write_dawrx0(DisasContext *ctx, int sprn, int gprn) +{ + translator_io_start(&ctx->base); + gen_helper_store_dawrx0(cpu_env, cpu_gpr[gprn]); +} #endif /* defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) */ /* CTR */ @@ -4182,7 +4210,7 @@ static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest) static void gen_lookup_and_goto_ptr(DisasContext *ctx) { if (unlikely(ctx->singlestep_enabled)) { - gen_debug_exception(ctx); + gen_debug_exception(ctx, false); } else { /* * tcg_gen_lookup_and_goto_ptr will exit the TB if @@ -7408,8 +7436,9 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) } /* Honor single stepping. */ - if (unlikely(ctx->singlestep_enabled & CPU_SINGLE_STEP) - && (nip <= 0x100 || nip > 0xf00)) { + if (unlikely(ctx->singlestep_enabled & CPU_SINGLE_STEP)) { + bool rfi_type = false; + switch (is_jmp) { case DISAS_TOO_MANY: case DISAS_EXIT_UPDATE: @@ -7418,12 +7447,19 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) break; case DISAS_EXIT: case DISAS_CHAIN: + /* + * This is a heuristic, to put it kindly. The rfi class of + * instructions are among the few outside branches that change + * NIP without taking an interrupt. Single step trace interrupts + * do not fire on completion of these instructions. + */ + rfi_type = true; break; default: g_assert_not_reached(); } - gen_debug_exception(ctx); + gen_debug_exception(ctx, rfi_type); return; } diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc index 4ce02fd3a4..7ff7e1ec46 100644 --- a/target/ppc/translate/fixedpoint-impl.c.inc +++ b/target/ppc/translate/fixedpoint-impl.c.inc @@ -71,7 +71,7 @@ static bool do_ldst_quad(DisasContext *ctx, arg_D *a, bool store, bool prefixed) { #if defined(TARGET_PPC64) TCGv ea; - TCGv_i64 low_addr_gpr, high_addr_gpr; + TCGv_i64 lo, hi; TCGv_i128 t16; REQUIRE_INSNS_FLAGS(ctx, 64BX); @@ -94,21 +94,21 @@ static bool do_ldst_quad(DisasContext *ctx, arg_D *a, bool store, bool prefixed) gen_set_access_type(ctx, ACCESS_INT); ea = do_ea_calc(ctx, a->ra, tcg_constant_tl(a->si)); - if (prefixed || !ctx->le_mode) { - low_addr_gpr = cpu_gpr[a->rt]; - high_addr_gpr = cpu_gpr[a->rt + 1]; + if (ctx->le_mode && prefixed) { + lo = cpu_gpr[a->rt]; + hi = cpu_gpr[a->rt + 1]; } else { - low_addr_gpr = cpu_gpr[a->rt + 1]; - high_addr_gpr = cpu_gpr[a->rt]; + lo = cpu_gpr[a->rt + 1]; + hi = cpu_gpr[a->rt]; } t16 = tcg_temp_new_i128(); if (store) { - tcg_gen_concat_i64_i128(t16, low_addr_gpr, high_addr_gpr); + tcg_gen_concat_i64_i128(t16, lo, hi); tcg_gen_qemu_st_i128(t16, ea, ctx->mem_idx, DEF_MEMOP(MO_128)); } else { tcg_gen_qemu_ld_i128(t16, ea, ctx->mem_idx, DEF_MEMOP(MO_128)); - tcg_gen_extr_i128_i64(low_addr_gpr, high_addr_gpr, t16); + tcg_gen_extr_i128_i64(lo, hi, t16); } #else qemu_build_not_reached(); diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py index 79c607b0e7..a18610542e 100644 --- a/tests/avocado/replay_kernel.py +++ b/tests/avocado/replay_kernel.py @@ -255,8 +255,7 @@ class ReplayKernelNormal(ReplayKernelBase): kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=hvc0' - # icount is not good enough for PPC64 for complete boot yet - console_pattern = 'Kernel command line: %s' % kernel_command_line + console_pattern = 'VFS: Cannot open root device' self.run_rr(kernel_path, kernel_command_line, console_pattern) def test_ppc64_powernv(self): diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py index 680c314cfc..fc47874eda 100644 --- a/tests/avocado/reverse_debugging.py +++ b/tests/avocado/reverse_debugging.py @@ -150,16 +150,33 @@ class ReverseDebugging(LinuxKernelTest): self.check_pc(g, addr) logger.info('found position %x' % addr) - logger.info('seeking to the end (icount %s)' % (last_icount - 1)) - vm.qmp('replay-break', icount=last_icount - 1) - # continue - will return after pausing - g.cmd(b'c', b'T02thread:01;') + # visit the recorded instruction in forward order + logger.info('stepping forward') + for addr in steps: + self.check_pc(g, addr) + self.gdb_step(g) + logger.info('found position %x' % addr) + # set breakpoints for the instructions just stepped over logger.info('setting breakpoints') for addr in steps: # hardware breakpoint at addr with len=1 g.cmd(b'Z1,%x,1' % addr, b'OK') + # this may hit a breakpoint if first instructions are executed + # again + logger.info('continuing execution') + vm.qmp('replay-break', icount=last_icount - 1) + # continue - will return after pausing + # This could stop at the end and get a T02 return, or by + # re-executing one of the breakpoints and get a T05 return. + g.cmd(b'c') + if self.vm_get_icount(vm) == last_icount - 1: + logger.info('reached the end (icount %s)' % (last_icount - 1)) + else: + logger.info('hit a breakpoint again at %x (icount %s)' % + (self.get_pc(g), self.vm_get_icount(vm))) + logger.info('running reverse continue to reach %x' % steps[-1]) # reverse continue - will return after stopping at the breakpoint g.cmd(b'bc', b'T05thread:01;') @@ -216,3 +233,32 @@ class ReverseDebugging_AArch64(ReverseDebugging): self.reverse_debugging( args=('-kernel', kernel_path)) + +class ReverseDebugging_ppc64(ReverseDebugging): + """ + :avocado: tags=accel:tcg + """ + + REG_PC = 0x40 + + # unidentified gitlab timeout problem + @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab') + def test_ppc64_pseries(self): + """ + :avocado: tags=arch:ppc64 + :avocado: tags=machine:pseries + """ + # SLOF branches back to its entry point, which causes this test + # to take the 'hit a breakpoint again' path. That's not a problem, + # just slightly different than the other machines. + self.endian_is_le = False + self.reverse_debugging() + + @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab') + def test_ppc64_powernv(self): + """ + :avocado: tags=arch:ppc64 + :avocado: tags=machine:powernv + """ + self.endian_is_le = False + self.reverse_debugging()