pseries: Implement qemu initiated shutdowns using EPOW events

At present, using 'system_powerdown' from the monitor or otherwise
instructing qemu to (cleanly) shut down a pseries guest will not work,
because we did not have a method of signalling the shutdown request to the
guest.

PAPR does include a usable mechanism for this, though it is rather more
involved than the equivalent on x86.  This involves sending an EPOW
(Environmental and POwer Warning) event through the PAPR event and error
logging mechanism, which also has a number of other functions.

This patch implements just enough of the event/error logging functionality
to be able to send a shutdown event to the guest.  At least with modern
guest kernels and a userspace that is up and running, this means that
system_powerdown from the qemu monitor should now work correctly on pseries
guests.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
This commit is contained in:
David Gibson 2012-10-08 18:17:39 +00:00 committed by Alexander Graf
parent 1bfb37d1e0
commit 74d042e5ce
4 changed files with 342 additions and 2 deletions

View File

@ -11,6 +11,7 @@ obj-y += ppc_newworld.o
obj-$(CONFIG_PSERIES) += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o obj-$(CONFIG_PSERIES) += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o
obj-$(CONFIG_PSERIES) += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o obj-$(CONFIG_PSERIES) += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o
obj-$(CONFIG_PSERIES) += spapr_pci.o pci-hotplug.o spapr_iommu.o obj-$(CONFIG_PSERIES) += spapr_pci.o pci-hotplug.o spapr_iommu.o
obj-$(CONFIG_PSERIES) += spapr_events.o
# PowerPC 4xx boards # PowerPC 4xx boards
obj-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o obj-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
obj-y += ppc440_bamboo.o obj-y += ppc440_bamboo.o

View File

@ -232,7 +232,8 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
hwaddr initrd_size, hwaddr initrd_size,
hwaddr kernel_size, hwaddr kernel_size,
const char *boot_device, const char *boot_device,
const char *kernel_cmdline) const char *kernel_cmdline,
uint32_t epow_irq)
{ {
void *fdt; void *fdt;
CPUPPCState *env; CPUPPCState *env;
@ -403,6 +404,8 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
_FDT((fdt_property(fdt, "ibm,associativity-reference-points", _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
refpoints, sizeof(refpoints)))); refpoints, sizeof(refpoints))));
_FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));
_FDT((fdt_end_node(fdt))); _FDT((fdt_end_node(fdt)));
/* interrupt controller */ /* interrupt controller */
@ -433,6 +436,9 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
_FDT((fdt_end_node(fdt))); _FDT((fdt_end_node(fdt)));
/* event-sources */
spapr_events_fdt_skel(fdt, epow_irq);
_FDT((fdt_end_node(fdt))); /* close root node */ _FDT((fdt_end_node(fdt))); /* close root node */
_FDT((fdt_finish(fdt))); _FDT((fdt_finish(fdt)));
@ -795,6 +801,9 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
spapr->icp = xics_system_init(XICS_IRQS); spapr->icp = xics_system_init(XICS_IRQS);
spapr->next_irq = 16; spapr->next_irq = 16;
/* Set up EPOW events infrastructure */
spapr_events_init(spapr);
/* Set up IOMMU */ /* Set up IOMMU */
spapr_iommu_init(); spapr_iommu_init();
@ -903,7 +912,8 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, spapr->fdt_skel = spapr_create_fdt_skel(cpu_model,
initrd_base, initrd_size, initrd_base, initrd_size,
kernel_size, kernel_size,
boot_device, kernel_cmdline); boot_device, kernel_cmdline,
spapr->epow_irq);
assert(spapr->fdt_skel != NULL); assert(spapr->fdt_skel != NULL);
} }

View File

@ -26,6 +26,9 @@ typedef struct sPAPREnvironment {
int rtc_offset; int rtc_offset;
char *cpu_model; char *cpu_model;
bool has_graphics; bool has_graphics;
uint32_t epow_irq;
Notifier epow_notifier;
} sPAPREnvironment; } sPAPREnvironment;
#define H_SUCCESS 0 #define H_SUCCESS 0
@ -335,7 +338,12 @@ typedef struct sPAPRTCE {
#define SPAPR_VIO_BASE_LIOBN 0x00000000 #define SPAPR_VIO_BASE_LIOBN 0x00000000
#define SPAPR_PCI_BASE_LIOBN 0x80000000 #define SPAPR_PCI_BASE_LIOBN 0x80000000
#define RTAS_ERROR_LOG_MAX 2048
void spapr_iommu_init(void); void spapr_iommu_init(void);
void spapr_events_init(sPAPREnvironment *spapr);
void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size); DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size);
void spapr_tce_free(DMAContext *dma); void spapr_tce_free(DMAContext *dma);
void spapr_tce_reset(DMAContext *dma); void spapr_tce_reset(DMAContext *dma);

321
hw/spapr_events.c Normal file
View File

@ -0,0 +1,321 @@
/*
* QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
*
* RTAS events handling
*
* Copyright (c) 2012 David Gibson, IBM Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*/
#include "cpu.h"
#include "sysemu.h"
#include "qemu-char.h"
#include "hw/qdev.h"
#include "device_tree.h"
#include "hw/spapr.h"
#include "hw/spapr_vio.h"
#include <libfdt.h>
struct rtas_error_log {
uint32_t summary;
#define RTAS_LOG_VERSION_MASK 0xff000000
#define RTAS_LOG_VERSION_6 0x06000000
#define RTAS_LOG_SEVERITY_MASK 0x00e00000
#define RTAS_LOG_SEVERITY_ALREADY_REPORTED 0x00c00000
#define RTAS_LOG_SEVERITY_FATAL 0x00a00000
#define RTAS_LOG_SEVERITY_ERROR 0x00800000
#define RTAS_LOG_SEVERITY_ERROR_SYNC 0x00600000
#define RTAS_LOG_SEVERITY_WARNING 0x00400000
#define RTAS_LOG_SEVERITY_EVENT 0x00200000
#define RTAS_LOG_SEVERITY_NO_ERROR 0x00000000
#define RTAS_LOG_DISPOSITION_MASK 0x00180000
#define RTAS_LOG_DISPOSITION_FULLY_RECOVERED 0x00000000
#define RTAS_LOG_DISPOSITION_LIMITED_RECOVERY 0x00080000
#define RTAS_LOG_DISPOSITION_NOT_RECOVERED 0x00100000
#define RTAS_LOG_OPTIONAL_PART_PRESENT 0x00040000
#define RTAS_LOG_INITIATOR_MASK 0x0000f000
#define RTAS_LOG_INITIATOR_UNKNOWN 0x00000000
#define RTAS_LOG_INITIATOR_CPU 0x00001000
#define RTAS_LOG_INITIATOR_PCI 0x00002000
#define RTAS_LOG_INITIATOR_MEMORY 0x00004000
#define RTAS_LOG_INITIATOR_HOTPLUG 0x00006000
#define RTAS_LOG_TARGET_MASK 0x00000f00
#define RTAS_LOG_TARGET_UNKNOWN 0x00000000
#define RTAS_LOG_TARGET_CPU 0x00000100
#define RTAS_LOG_TARGET_PCI 0x00000200
#define RTAS_LOG_TARGET_MEMORY 0x00000400
#define RTAS_LOG_TARGET_HOTPLUG 0x00000600
#define RTAS_LOG_TYPE_MASK 0x000000ff
#define RTAS_LOG_TYPE_OTHER 0x00000000
#define RTAS_LOG_TYPE_RETRY 0x00000001
#define RTAS_LOG_TYPE_TCE_ERR 0x00000002
#define RTAS_LOG_TYPE_INTERN_DEV_FAIL 0x00000003
#define RTAS_LOG_TYPE_TIMEOUT 0x00000004
#define RTAS_LOG_TYPE_DATA_PARITY 0x00000005
#define RTAS_LOG_TYPE_ADDR_PARITY 0x00000006
#define RTAS_LOG_TYPE_CACHE_PARITY 0x00000007
#define RTAS_LOG_TYPE_ADDR_INVALID 0x00000008
#define RTAS_LOG_TYPE_ECC_UNCORR 0x00000009
#define RTAS_LOG_TYPE_ECC_CORR 0x0000000a
#define RTAS_LOG_TYPE_EPOW 0x00000040
uint32_t extended_length;
} QEMU_PACKED;
struct rtas_event_log_v6 {
uint8_t b0;
#define RTAS_LOG_V6_B0_VALID 0x80
#define RTAS_LOG_V6_B0_UNRECOVERABLE_ERROR 0x40
#define RTAS_LOG_V6_B0_RECOVERABLE_ERROR 0x20
#define RTAS_LOG_V6_B0_DEGRADED_OPERATION 0x10
#define RTAS_LOG_V6_B0_PREDICTIVE_ERROR 0x08
#define RTAS_LOG_V6_B0_NEW_LOG 0x04
#define RTAS_LOG_V6_B0_BIGENDIAN 0x02
uint8_t _resv1;
uint8_t b2;
#define RTAS_LOG_V6_B2_POWERPC_FORMAT 0x80
#define RTAS_LOG_V6_B2_LOG_FORMAT_MASK 0x0f
#define RTAS_LOG_V6_B2_LOG_FORMAT_PLATFORM_EVENT 0x0e
uint8_t _resv2[9];
uint32_t company;
#define RTAS_LOG_V6_COMPANY_IBM 0x49424d00 /* IBM<null> */
} QEMU_PACKED;
struct rtas_event_log_v6_section_header {
uint16_t section_id;
uint16_t section_length;
uint8_t section_version;
uint8_t section_subtype;
uint16_t creator_component_id;
} QEMU_PACKED;
struct rtas_event_log_v6_maina {
#define RTAS_LOG_V6_SECTION_ID_MAINA 0x5048 /* PH */
struct rtas_event_log_v6_section_header hdr;
uint32_t creation_date; /* BCD: YYYYMMDD */
uint32_t creation_time; /* BCD: HHMMSS00 */
uint8_t _platform1[8];
char creator_id;
uint8_t _resv1[2];
uint8_t section_count;
uint8_t _resv2[4];
uint8_t _platform2[8];
uint32_t plid;
uint8_t _platform3[4];
} QEMU_PACKED;
struct rtas_event_log_v6_mainb {
#define RTAS_LOG_V6_SECTION_ID_MAINB 0x5548 /* UH */
struct rtas_event_log_v6_section_header hdr;
uint8_t subsystem_id;
uint8_t _platform1;
uint8_t event_severity;
uint8_t event_subtype;
uint8_t _platform2[4];
uint8_t _resv1[2];
uint16_t action_flags;
uint8_t _resv2[4];
} QEMU_PACKED;
struct rtas_event_log_v6_epow {
#define RTAS_LOG_V6_SECTION_ID_EPOW 0x4550 /* EP */
struct rtas_event_log_v6_section_header hdr;
uint8_t sensor_value;
#define RTAS_LOG_V6_EPOW_ACTION_RESET 0
#define RTAS_LOG_V6_EPOW_ACTION_WARN_COOLING 1
#define RTAS_LOG_V6_EPOW_ACTION_WARN_POWER 2
#define RTAS_LOG_V6_EPOW_ACTION_SYSTEM_SHUTDOWN 3
#define RTAS_LOG_V6_EPOW_ACTION_SYSTEM_HALT 4
#define RTAS_LOG_V6_EPOW_ACTION_MAIN_ENCLOSURE 5
#define RTAS_LOG_V6_EPOW_ACTION_POWER_OFF 7
uint8_t event_modifier;
#define RTAS_LOG_V6_EPOW_MODIFIER_NORMAL 1
#define RTAS_LOG_V6_EPOW_MODIFIER_ON_UPS 2
#define RTAS_LOG_V6_EPOW_MODIFIER_CRITICAL 3
#define RTAS_LOG_V6_EPOW_MODIFIER_TEMPERATURE 4
uint8_t extended_modifier;
#define RTAS_LOG_V6_EPOW_XMODIFIER_SYSTEM_WIDE 0
#define RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC 1
uint8_t _resv;
uint64_t reason_code;
} QEMU_PACKED;
struct epow_log_full {
struct rtas_error_log hdr;
struct rtas_event_log_v6 v6hdr;
struct rtas_event_log_v6_maina maina;
struct rtas_event_log_v6_mainb mainb;
struct rtas_event_log_v6_epow epow;
} QEMU_PACKED;
#define EVENT_MASK_INTERNAL_ERRORS 0x80000000
#define EVENT_MASK_EPOW 0x40000000
#define EVENT_MASK_HOTPLUG 0x10000000
#define EVENT_MASK_IO 0x08000000
#define _FDT(exp) \
do { \
int ret = (exp); \
if (ret < 0) { \
fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
#exp, fdt_strerror(ret)); \
exit(1); \
} \
} while (0)
void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq)
{
uint32_t epow_irq_ranges[] = {cpu_to_be32(epow_irq), cpu_to_be32(1)};
uint32_t epow_interrupts[] = {cpu_to_be32(epow_irq), 0};
_FDT((fdt_begin_node(fdt, "event-sources")));
_FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
_FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
_FDT((fdt_property(fdt, "interrupt-ranges",
epow_irq_ranges, sizeof(epow_irq_ranges))));
_FDT((fdt_begin_node(fdt, "epow-events")));
_FDT((fdt_property(fdt, "interrupts",
epow_interrupts, sizeof(epow_interrupts))));
_FDT((fdt_end_node(fdt)));
_FDT((fdt_end_node(fdt)));
}
static struct epow_log_full *pending_epow;
static uint32_t next_plid;
static void spapr_powerdown_req(Notifier *n, void *opaque)
{
sPAPREnvironment *spapr = container_of(n, sPAPREnvironment, epow_notifier);
struct rtas_error_log *hdr;
struct rtas_event_log_v6 *v6hdr;
struct rtas_event_log_v6_maina *maina;
struct rtas_event_log_v6_mainb *mainb;
struct rtas_event_log_v6_epow *epow;
struct tm tm;
int year;
if (pending_epow) {
/* For now, we just throw away earlier events if two come
* along before any are consumed. This is sufficient for our
* powerdown messages, but we'll need more if we do more
* general error/event logging */
g_free(pending_epow);
}
pending_epow = g_malloc0(sizeof(*pending_epow));
hdr = &pending_epow->hdr;
v6hdr = &pending_epow->v6hdr;
maina = &pending_epow->maina;
mainb = &pending_epow->mainb;
epow = &pending_epow->epow;
hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6
| RTAS_LOG_SEVERITY_EVENT
| RTAS_LOG_DISPOSITION_NOT_RECOVERED
| RTAS_LOG_OPTIONAL_PART_PRESENT
| RTAS_LOG_TYPE_EPOW);
hdr->extended_length = cpu_to_be32(sizeof(*pending_epow)
- sizeof(pending_epow->hdr));
v6hdr->b0 = RTAS_LOG_V6_B0_VALID | RTAS_LOG_V6_B0_NEW_LOG
| RTAS_LOG_V6_B0_BIGENDIAN;
v6hdr->b2 = RTAS_LOG_V6_B2_POWERPC_FORMAT
| RTAS_LOG_V6_B2_LOG_FORMAT_PLATFORM_EVENT;
v6hdr->company = cpu_to_be32(RTAS_LOG_V6_COMPANY_IBM);
maina->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINA);
maina->hdr.section_length = cpu_to_be16(sizeof(*maina));
/* FIXME: section version, subtype and creator id? */
qemu_get_timedate(&tm, spapr->rtc_offset);
year = tm.tm_year + 1900;
maina->creation_date = cpu_to_be32((to_bcd(year / 100) << 24)
| (to_bcd(year % 100) << 16)
| (to_bcd(tm.tm_mon + 1) << 8)
| to_bcd(tm.tm_mday));
maina->creation_time = cpu_to_be32((to_bcd(tm.tm_hour) << 24)
| (to_bcd(tm.tm_min) << 16)
| (to_bcd(tm.tm_sec) << 8));
maina->creator_id = 'H'; /* Hypervisor */
maina->section_count = 3; /* Main-A, Main-B and EPOW */
maina->plid = next_plid++;
mainb->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINB);
mainb->hdr.section_length = cpu_to_be16(sizeof(*mainb));
/* FIXME: section version, subtype and creator id? */
mainb->subsystem_id = 0xa0; /* External environment */
mainb->event_severity = 0x00; /* Informational / non-error */
mainb->event_subtype = 0xd0; /* Normal shutdown */
epow->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_EPOW);
epow->hdr.section_length = cpu_to_be16(sizeof(*epow));
epow->hdr.section_version = 2; /* includes extended modifier */
/* FIXME: section subtype and creator id? */
epow->sensor_value = RTAS_LOG_V6_EPOW_ACTION_SYSTEM_SHUTDOWN;
epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL;
epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC;
qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->epow_irq));
}
static void check_exception(sPAPREnvironment *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
{
uint32_t mask, buf, len;
uint64_t xinfo;
if ((nargs < 6) || (nargs > 7) || nret != 1) {
rtas_st(rets, 0, -3);
return;
}
xinfo = rtas_ld(args, 1);
mask = rtas_ld(args, 2);
buf = rtas_ld(args, 4);
len = rtas_ld(args, 5);
if (nargs == 7) {
xinfo |= (uint64_t)rtas_ld(args, 6) << 32;
}
if ((mask & EVENT_MASK_EPOW) && pending_epow) {
if (sizeof(*pending_epow) < len) {
len = sizeof(*pending_epow);
}
cpu_physical_memory_write(buf, pending_epow, len);
g_free(pending_epow);
pending_epow = NULL;
rtas_st(rets, 0, 0);
} else {
rtas_st(rets, 0, 1);
}
}
void spapr_events_init(sPAPREnvironment *spapr)
{
spapr->epow_irq = spapr_allocate_msi(0);
spapr->epow_notifier.notify = spapr_powerdown_req;
qemu_register_powerdown_notifier(&spapr->epow_notifier);
spapr_rtas_register("check-exception", check_exception);
}