ppc/xive: introduce the XIVE interrupt thread context

Each POWER9 processor chip has a XIVE presenter that can generate four
different exceptions to its threads:

  - hypervisor exception,
  - O/S exception
  - Event-Based Branch (EBB)
  - msgsnd (doorbell).

Each exception has a state independent from the others called a Thread
Interrupt Management context. This context is a set of registers which
lets the thread handle priority management and interrupt acknowledgment
among other things. The most important ones being :

  - Interrupt Priority Register  (PIPR)
  - Interrupt Pending Buffer     (IPB)
  - Current Processor Priority   (CPPR)
  - Notification Source Register (NSR)

These registers are accessible through a specific MMIO region, called
the Thread Interrupt Management Area (TIMA), four aligned pages, each
exposing a different view of the registers. First page (page address
ending in 0b00) gives access to the entire context and is reserved for
the ring 0 view for the physical thread context. The second (page
address ending in 0b01) is for the hypervisor, ring 1 view. The third
(page address ending in 0b10) is for the operating system, ring 2
view. The fourth (page address ending in 0b11) is for user level, ring
3 view.

The thread interrupt context is modeled with a XiveTCTX object
containing the values of the different exception registers. The TIMA
region is mapped at the same address for each CPU.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
This commit is contained in:
Cédric Le Goater 2018-12-09 20:45:53 +01:00 committed by David Gibson
parent 002686be42
commit 207d9fe985
3 changed files with 550 additions and 0 deletions

View File

@ -16,6 +16,429 @@
#include "hw/qdev-properties.h"
#include "monitor/monitor.h"
#include "hw/ppc/xive.h"
#include "hw/ppc/xive_regs.h"
/*
* XIVE Thread Interrupt Management context
*/
static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
{
return 0;
}
static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
{
if (cppr > XIVE_PRIORITY_MAX) {
cppr = 0xff;
}
tctx->regs[ring + TM_CPPR] = cppr;
}
/*
* XIVE Thread Interrupt Management Area (TIMA)
*/
/*
* Define an access map for each page of the TIMA that we will use in
* the memory region ops to filter values when doing loads and stores
* of raw registers values
*
* Registers accessibility bits :
*
* 0x0 - no access
* 0x1 - write only
* 0x2 - read only
* 0x3 - read/write
*/
static const uint8_t xive_tm_hw_view[] = {
/* QW-0 User */ 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0,
/* QW-1 OS */ 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 0, 0, 0, 0,
/* QW-2 POOL */ 0, 0, 3, 3, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0,
/* QW-3 PHYS */ 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 0, 3, 3, 3, 3, 0,
};
static const uint8_t xive_tm_hv_view[] = {
/* QW-0 User */ 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0,
/* QW-1 OS */ 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 0, 0, 0, 0,
/* QW-2 POOL */ 0, 0, 3, 3, 0, 0, 0, 0, 0, 3, 3, 3, 0, 0, 0, 0,
/* QW-3 PHYS */ 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 0, 3, 0, 0, 0, 0,
};
static const uint8_t xive_tm_os_view[] = {
/* QW-0 User */ 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0,
/* QW-1 OS */ 2, 3, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
/* QW-2 POOL */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* QW-3 PHYS */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const uint8_t xive_tm_user_view[] = {
/* QW-0 User */ 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* QW-1 OS */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* QW-2 POOL */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* QW-3 PHYS */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
/*
* Overall TIMA access map for the thread interrupt management context
* registers
*/
static const uint8_t *xive_tm_views[] = {
[XIVE_TM_HW_PAGE] = xive_tm_hw_view,
[XIVE_TM_HV_PAGE] = xive_tm_hv_view,
[XIVE_TM_OS_PAGE] = xive_tm_os_view,
[XIVE_TM_USER_PAGE] = xive_tm_user_view,
};
/*
* Computes a register access mask for a given offset in the TIMA
*/
static uint64_t xive_tm_mask(hwaddr offset, unsigned size, bool write)
{
uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
uint8_t reg_offset = offset & 0x3F;
uint8_t reg_mask = write ? 0x1 : 0x2;
uint64_t mask = 0x0;
int i;
for (i = 0; i < size; i++) {
if (xive_tm_views[page_offset][reg_offset + i] & reg_mask) {
mask |= (uint64_t) 0xff << (8 * (size - i - 1));
}
}
return mask;
}
static void xive_tm_raw_write(XiveTCTX *tctx, hwaddr offset, uint64_t value,
unsigned size)
{
uint8_t ring_offset = offset & 0x30;
uint8_t reg_offset = offset & 0x3F;
uint64_t mask = xive_tm_mask(offset, size, true);
int i;
/*
* Only 4 or 8 bytes stores are allowed and the User ring is
* excluded
*/
if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA @%"
HWADDR_PRIx"\n", offset);
return;
}
/*
* Use the register offset for the raw values and filter out
* reserved values
*/
for (i = 0; i < size; i++) {
uint8_t byte_mask = (mask >> (8 * (size - i - 1)));
if (byte_mask) {
tctx->regs[reg_offset + i] = (value >> (8 * (size - i - 1))) &
byte_mask;
}
}
}
static uint64_t xive_tm_raw_read(XiveTCTX *tctx, hwaddr offset, unsigned size)
{
uint8_t ring_offset = offset & 0x30;
uint8_t reg_offset = offset & 0x3F;
uint64_t mask = xive_tm_mask(offset, size, false);
uint64_t ret;
int i;
/*
* Only 4 or 8 bytes loads are allowed and the User ring is
* excluded
*/
if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access at TIMA @%"
HWADDR_PRIx"\n", offset);
return -1;
}
/* Use the register offset for the raw values */
ret = 0;
for (i = 0; i < size; i++) {
ret |= (uint64_t) tctx->regs[reg_offset + i] << (8 * (size - i - 1));
}
/* filter out reserved values */
return ret & mask;
}
/*
* The TM context is mapped twice within each page. Stores and loads
* to the first mapping below 2K write and read the specified values
* without modification. The second mapping above 2K performs specific
* state changes (side effects) in addition to setting/returning the
* interrupt management area context of the processor thread.
*/
static uint64_t xive_tm_ack_os_reg(XiveTCTX *tctx, hwaddr offset, unsigned size)
{
return xive_tctx_accept(tctx, TM_QW1_OS);
}
static void xive_tm_set_os_cppr(XiveTCTX *tctx, hwaddr offset,
uint64_t value, unsigned size)
{
xive_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff);
}
/*
* Define a mapping of "special" operations depending on the TIMA page
* offset and the size of the operation.
*/
typedef struct XiveTmOp {
uint8_t page_offset;
uint32_t op_offset;
unsigned size;
void (*write_handler)(XiveTCTX *tctx, hwaddr offset, uint64_t value,
unsigned size);
uint64_t (*read_handler)(XiveTCTX *tctx, hwaddr offset, unsigned size);
} XiveTmOp;
static const XiveTmOp xive_tm_operations[] = {
/*
* MMIOs below 2K : raw values and special operations without side
* effects
*/
{ XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, xive_tm_set_os_cppr, NULL },
/* MMIOs above 2K : special operations with side effects */
{ XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, NULL, xive_tm_ack_os_reg },
};
static const XiveTmOp *xive_tm_find_op(hwaddr offset, unsigned size, bool write)
{
uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
uint32_t op_offset = offset & 0xFFF;
int i;
for (i = 0; i < ARRAY_SIZE(xive_tm_operations); i++) {
const XiveTmOp *xto = &xive_tm_operations[i];
/* Accesses done from a more privileged TIMA page is allowed */
if (xto->page_offset >= page_offset &&
xto->op_offset == op_offset &&
xto->size == size &&
((write && xto->write_handler) || (!write && xto->read_handler))) {
return xto;
}
}
return NULL;
}
/*
* TIMA MMIO handlers
*/
static void xive_tm_write(void *opaque, hwaddr offset,
uint64_t value, unsigned size)
{
PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
XiveTCTX *tctx = XIVE_TCTX(cpu->intc);
const XiveTmOp *xto;
/*
* TODO: check V bit in Q[0-3]W2, check PTER bit associated with CPU
*/
/*
* First, check for special operations in the 2K region
*/
if (offset & 0x800) {
xto = xive_tm_find_op(offset, size, true);
if (!xto) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA"
"@%"HWADDR_PRIx"\n", offset);
} else {
xto->write_handler(tctx, offset, value, size);
}
return;
}
/*
* Then, for special operations in the region below 2K.
*/
xto = xive_tm_find_op(offset, size, true);
if (xto) {
xto->write_handler(tctx, offset, value, size);
return;
}
/*
* Finish with raw access to the register values
*/
xive_tm_raw_write(tctx, offset, value, size);
}
static uint64_t xive_tm_read(void *opaque, hwaddr offset, unsigned size)
{
PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
XiveTCTX *tctx = XIVE_TCTX(cpu->intc);
const XiveTmOp *xto;
/*
* TODO: check V bit in Q[0-3]W2, check PTER bit associated with CPU
*/
/*
* First, check for special operations in the 2K region
*/
if (offset & 0x800) {
xto = xive_tm_find_op(offset, size, false);
if (!xto) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA"
"@%"HWADDR_PRIx"\n", offset);
return -1;
}
return xto->read_handler(tctx, offset, size);
}
/*
* Then, for special operations in the region below 2K.
*/
xto = xive_tm_find_op(offset, size, false);
if (xto) {
return xto->read_handler(tctx, offset, size);
}
/*
* Finish with raw access to the register values
*/
return xive_tm_raw_read(tctx, offset, size);
}
const MemoryRegionOps xive_tm_ops = {
.read = xive_tm_read,
.write = xive_tm_write,
.endianness = DEVICE_BIG_ENDIAN,
.valid = {
.min_access_size = 1,
.max_access_size = 8,
},
.impl = {
.min_access_size = 1,
.max_access_size = 8,
},
};
static inline uint32_t xive_tctx_word2(uint8_t *ring)
{
return *((uint32_t *) &ring[TM_WORD2]);
}
static char *xive_tctx_ring_print(uint8_t *ring)
{
uint32_t w2 = xive_tctx_word2(ring);
return g_strdup_printf("%02x %02x %02x %02x %02x "
"%02x %02x %02x %08x",
ring[TM_NSR], ring[TM_CPPR], ring[TM_IPB], ring[TM_LSMFB],
ring[TM_ACK_CNT], ring[TM_INC], ring[TM_AGE], ring[TM_PIPR],
be32_to_cpu(w2));
}
static const char * const xive_tctx_ring_names[] = {
"USER", "OS", "POOL", "PHYS",
};
void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
{
int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
int i;
monitor_printf(mon, "CPU[%04x]: QW NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
" W2\n", cpu_index);
for (i = 0; i < XIVE_TM_RING_COUNT; i++) {
char *s = xive_tctx_ring_print(&tctx->regs[i * XIVE_TM_RING_SIZE]);
monitor_printf(mon, "CPU[%04x]: %4s %s\n", cpu_index,
xive_tctx_ring_names[i], s);
g_free(s);
}
}
static void xive_tctx_reset(void *dev)
{
XiveTCTX *tctx = XIVE_TCTX(dev);
memset(tctx->regs, 0, sizeof(tctx->regs));
/* Set some defaults */
tctx->regs[TM_QW1_OS + TM_LSMFB] = 0xFF;
tctx->regs[TM_QW1_OS + TM_ACK_CNT] = 0xFF;
tctx->regs[TM_QW1_OS + TM_AGE] = 0xFF;
}
static void xive_tctx_realize(DeviceState *dev, Error **errp)
{
XiveTCTX *tctx = XIVE_TCTX(dev);
PowerPCCPU *cpu;
CPUPPCState *env;
Object *obj;
Error *local_err = NULL;
obj = object_property_get_link(OBJECT(dev), "cpu", &local_err);
if (!obj) {
error_propagate(errp, local_err);
error_prepend(errp, "required link 'cpu' not found: ");
return;
}
cpu = POWERPC_CPU(obj);
tctx->cs = CPU(obj);
env = &cpu->env;
switch (PPC_INPUT(env)) {
case PPC_FLAGS_INPUT_POWER7:
tctx->output = env->irq_inputs[POWER7_INPUT_INT];
break;
default:
error_setg(errp, "XIVE interrupt controller does not support "
"this CPU bus model");
return;
}
qemu_register_reset(xive_tctx_reset, dev);
}
static void xive_tctx_unrealize(DeviceState *dev, Error **errp)
{
qemu_unregister_reset(xive_tctx_reset, dev);
}
static const VMStateDescription vmstate_xive_tctx = {
.name = TYPE_XIVE_TCTX,
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_BUFFER(regs, XiveTCTX),
VMSTATE_END_OF_LIST()
},
};
static void xive_tctx_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->desc = "XIVE Interrupt Thread Context";
dc->realize = xive_tctx_realize;
dc->unrealize = xive_tctx_unrealize;
dc->vmsd = &vmstate_xive_tctx;
}
static const TypeInfo xive_tctx_info = {
.name = TYPE_XIVE_TCTX,
.parent = TYPE_DEVICE,
.instance_size = sizeof(XiveTCTX),
.class_init = xive_tctx_class_init,
};
/*
* XIVE ESB helpers
@ -864,6 +1287,7 @@ static void xive_register_types(void)
type_register_static(&xive_fabric_info);
type_register_static(&xive_router_info);
type_register_static(&xive_end_source_info);
type_register_static(&xive_tctx_info);
}
type_init(xive_register_types)

View File

@ -367,4 +367,48 @@ typedef struct XiveENDSource {
void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon);
void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon);
/*
* XIVE Thread interrupt Management (TM) context
*/
#define TYPE_XIVE_TCTX "xive-tctx"
#define XIVE_TCTX(obj) OBJECT_CHECK(XiveTCTX, (obj), TYPE_XIVE_TCTX)
/*
* XIVE Thread interrupt Management register rings :
*
* QW-0 User event-based exception state
* QW-1 O/S OS context for priority management, interrupt acks
* QW-2 Pool hypervisor pool context for virtual processors dispatched
* QW-3 Physical physical thread context and security context
*/
#define XIVE_TM_RING_COUNT 4
#define XIVE_TM_RING_SIZE 0x10
typedef struct XiveTCTX {
DeviceState parent_obj;
CPUState *cs;
qemu_irq output;
uint8_t regs[XIVE_TM_RING_COUNT * XIVE_TM_RING_SIZE];
} XiveTCTX;
/*
* XIVE Thread Interrupt Management Aera (TIMA)
*
* This region gives access to the registers of the thread interrupt
* management context. It is four page wide, each page providing a
* different view of the registers. The page with the lower offset is
* the most privileged and gives access to the entire context.
*/
#define XIVE_TM_HW_PAGE 0x0
#define XIVE_TM_HV_PAGE 0x1
#define XIVE_TM_OS_PAGE 0x2
#define XIVE_TM_USER_PAGE 0x3
extern const MemoryRegionOps xive_tm_ops;
void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon);
#endif /* PPC_XIVE_H */

View File

@ -23,6 +23,88 @@
#define XIVE_SRCNO_INDEX(srcno) ((srcno) & 0x0fffffff)
#define XIVE_SRCNO(blk, idx) ((uint32_t)(blk) << 28 | (idx))
#define TM_SHIFT 16
/* TM register offsets */
#define TM_QW0_USER 0x000 /* All rings */
#define TM_QW1_OS 0x010 /* Ring 0..2 */
#define TM_QW2_HV_POOL 0x020 /* Ring 0..1 */
#define TM_QW3_HV_PHYS 0x030 /* Ring 0..1 */
/* Byte offsets inside a QW QW0 QW1 QW2 QW3 */
#define TM_NSR 0x0 /* + + - + */
#define TM_CPPR 0x1 /* - + - + */
#define TM_IPB 0x2 /* - + + + */
#define TM_LSMFB 0x3 /* - + + + */
#define TM_ACK_CNT 0x4 /* - + - - */
#define TM_INC 0x5 /* - + - + */
#define TM_AGE 0x6 /* - + - + */
#define TM_PIPR 0x7 /* - + - + */
#define TM_WORD0 0x0
#define TM_WORD1 0x4
/*
* QW word 2 contains the valid bit at the top and other fields
* depending on the QW.
*/
#define TM_WORD2 0x8
#define TM_QW0W2_VU PPC_BIT32(0)
#define TM_QW0W2_LOGIC_SERV PPC_BITMASK32(1, 31) /* XX 2,31 ? */
#define TM_QW1W2_VO PPC_BIT32(0)
#define TM_QW1W2_OS_CAM PPC_BITMASK32(8, 31)
#define TM_QW2W2_VP PPC_BIT32(0)
#define TM_QW2W2_POOL_CAM PPC_BITMASK32(8, 31)
#define TM_QW3W2_VT PPC_BIT32(0)
#define TM_QW3W2_LP PPC_BIT32(6)
#define TM_QW3W2_LE PPC_BIT32(7)
#define TM_QW3W2_T PPC_BIT32(31)
/*
* In addition to normal loads to "peek" and writes (only when invalid)
* using 4 and 8 bytes accesses, the above registers support these
* "special" byte operations:
*
* - Byte load from QW0[NSR] - User level NSR (EBB)
* - Byte store to QW0[NSR] - User level NSR (EBB)
* - Byte load/store to QW1[CPPR] and QW3[CPPR] - CPPR access
* - Byte load from QW3[TM_WORD2] - Read VT||00000||LP||LE on thrd 0
* otherwise VT||0000000
* - Byte store to QW3[TM_WORD2] - Set VT bit (and LP/LE if present)
*
* Then we have all these "special" CI ops at these offset that trigger
* all sorts of side effects:
*/
#define TM_SPC_ACK_EBB 0x800 /* Load8 ack EBB to reg*/
#define TM_SPC_ACK_OS_REG 0x810 /* Load16 ack OS irq to reg */
#define TM_SPC_PUSH_USR_CTX 0x808 /* Store32 Push/Validate user context */
#define TM_SPC_PULL_USR_CTX 0x808 /* Load32 Pull/Invalidate user
* context */
#define TM_SPC_SET_OS_PENDING 0x812 /* Store8 Set OS irq pending bit */
#define TM_SPC_PULL_OS_CTX 0x818 /* Load32/Load64 Pull/Invalidate OS
* context to reg */
#define TM_SPC_PULL_POOL_CTX 0x828 /* Load32/Load64 Pull/Invalidate Pool
* context to reg*/
#define TM_SPC_ACK_HV_REG 0x830 /* Load16 ack HV irq to reg */
#define TM_SPC_PULL_USR_CTX_OL 0xc08 /* Store8 Pull/Inval usr ctx to odd
* line */
#define TM_SPC_ACK_OS_EL 0xc10 /* Store8 ack OS irq to even line */
#define TM_SPC_ACK_HV_POOL_EL 0xc20 /* Store8 ack HV evt pool to even
* line */
#define TM_SPC_ACK_HV_EL 0xc30 /* Store8 ack HV irq to even line */
/* XXX more... */
/* NSR fields for the various QW ack types */
#define TM_QW0_NSR_EB PPC_BIT8(0)
#define TM_QW1_NSR_EO PPC_BIT8(0)
#define TM_QW3_NSR_HE PPC_BITMASK8(0, 1)
#define TM_QW3_NSR_HE_NONE 0
#define TM_QW3_NSR_HE_POOL 1
#define TM_QW3_NSR_HE_PHYS 2
#define TM_QW3_NSR_HE_LSI 3
#define TM_QW3_NSR_I PPC_BIT8(2)
#define TM_QW3_NSR_GRP_LVL PPC_BIT8(3, 7)
/*
* EAS (Event Assignment Structure)
*