Merge branch 'kvm-ppc-next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc into HEAD

The main thing here is a new implementation of the in-kernel
XICS interrupt controller emulation for POWER9 machines, from Ben
Herrenschmidt.

POWER9 has a new interrupt controller called XIVE (eXternal Interrupt
Virtualization Engine) which is able to deliver interrupts directly
to guest virtual CPUs in hardware without hypervisor intervention.
With this new code, the guest still sees the old XICS interface but
performance is better because the XICS emulation in the host uses the
XIVE directly rather than going through a XICS emulation in firmware.

Conflicts:
	arch/powerpc/kernel/cpu_setup_power.S [cherry-picked fix]
	arch/powerpc/kvm/book3s_xive.c [include asm/debugfs.h]
This commit is contained in:
Paolo Bonzini 2017-05-09 11:50:01 +02:00
commit 4415b33528
26 changed files with 3359 additions and 89 deletions

View File

@ -111,6 +111,8 @@ struct kvmppc_host_state {
struct kvm_vcpu *kvm_vcpu;
struct kvmppc_vcore *kvm_vcore;
void __iomem *xics_phys;
void __iomem *xive_tima_phys;
void __iomem *xive_tima_virt;
u32 saved_xirr;
u64 dabr;
u64 host_mmcr[7]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */

View File

@ -210,6 +210,12 @@ struct kvmppc_spapr_tce_table {
/* XICS components, defined in book3s_xics.c */
struct kvmppc_xics;
struct kvmppc_icp;
extern struct kvm_device_ops kvm_xics_ops;
/* XIVE components, defined in book3s_xive.c */
struct kvmppc_xive;
struct kvmppc_xive_vcpu;
extern struct kvm_device_ops kvm_xive_ops;
struct kvmppc_passthru_irqmap;
@ -298,6 +304,7 @@ struct kvm_arch {
#endif
#ifdef CONFIG_KVM_XICS
struct kvmppc_xics *xics;
struct kvmppc_xive *xive;
struct kvmppc_passthru_irqmap *pimap;
#endif
struct kvmppc_ops *kvm_ops;
@ -427,7 +434,7 @@ struct kvmppc_passthru_irqmap {
#define KVMPPC_IRQ_DEFAULT 0
#define KVMPPC_IRQ_MPIC 1
#define KVMPPC_IRQ_XICS 2
#define KVMPPC_IRQ_XICS 2 /* Includes a XIVE option */
#define MMIO_HPTE_CACHE_SIZE 4
@ -454,6 +461,21 @@ struct mmio_hpte_cache {
struct openpic;
/* W0 and W1 of a XIVE thread management context */
union xive_tma_w01 {
struct {
u8 nsr;
u8 cppr;
u8 ipb;
u8 lsmfb;
u8 ack;
u8 inc;
u8 age;
u8 pipr;
};
__be64 w01;
};
struct kvm_vcpu_arch {
ulong host_stack;
u32 host_pid;
@ -714,6 +736,10 @@ struct kvm_vcpu_arch {
struct openpic *mpic; /* KVM_IRQ_MPIC */
#ifdef CONFIG_KVM_XICS
struct kvmppc_icp *icp; /* XICS presentation controller */
struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
__be32 xive_cam_word; /* Cooked W2 in proper endian with valid bit */
u32 xive_pushed; /* Is the VP pushed on the physical CPU ? */
union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
#endif
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE

View File

@ -240,6 +240,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server,
u32 priority);
extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
@ -428,6 +429,14 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
paca[cpu].kvm_hstate.xics_phys = (void __iomem *)addr;
}
static inline void kvmppc_set_xive_tima(int cpu,
unsigned long phys_addr,
void __iomem *virt_addr)
{
paca[cpu].kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr;
paca[cpu].kvm_hstate.xive_tima_virt = virt_addr;
}
static inline u32 kvmppc_get_xics_latch(void)
{
u32 xirr;
@ -458,6 +467,11 @@ static inline void __init kvm_cma_reserve(void)
static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
{}
static inline void kvmppc_set_xive_tima(int cpu,
unsigned long phys_addr,
void __iomem *virt_addr)
{}
static inline u32 kvmppc_get_xics_latch(void)
{
return 0;
@ -508,6 +522,10 @@ extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, __be32 xirr,
struct kvmppc_irq_map *irq_map,
struct kvmppc_passthru_irqmap *pimap,
bool *again);
extern int kvmppc_xics_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status);
extern int h_ipi_redirect;
#else
static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
@ -525,6 +543,60 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{ return 0; }
#endif
#ifdef CONFIG_KVM_XIVE
/*
* Below the first "xive" is the "eXternal Interrupt Virtualization Engine"
* ie. P9 new interrupt controller, while the second "xive" is the legacy
* "eXternal Interrupt Vector Entry" which is the configuration of an
* interrupt on the "xics" interrupt controller on P8 and earlier. Those
* two function consume or produce a legacy "XIVE" state from the
* new "XIVE" interrupt controller.
*/
extern int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
u32 priority);
extern int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
u32 *priority);
extern int kvmppc_xive_int_on(struct kvm *kvm, u32 irq);
extern int kvmppc_xive_int_off(struct kvm *kvm, u32 irq);
extern void kvmppc_xive_init_module(void);
extern void kvmppc_xive_exit_module(void);
extern int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
extern void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu);
extern int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
struct irq_desc *host_desc);
extern int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
struct irq_desc *host_desc);
extern u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status);
#else
static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
u32 priority) { return -1; }
static inline int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
u32 *priority) { return -1; }
static inline int kvmppc_xive_int_on(struct kvm *kvm, u32 irq) { return -1; }
static inline int kvmppc_xive_int_off(struct kvm *kvm, u32 irq) { return -1; }
static inline void kvmppc_xive_init_module(void) { }
static inline void kvmppc_xive_exit_module(void) { }
static inline int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
static inline void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
static inline int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
struct irq_desc *host_desc) { return -ENODEV; }
static inline int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
struct irq_desc *host_desc) { return -ENODEV; }
static inline u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu) { return 0; }
static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { return -ENOENT; }
static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status) { return -ENODEV; }
#endif /* CONFIG_KVM_XIVE */
/*
* Prototypes for functions called only from assembler code.
* Having prototypes reduces sparse errors.
@ -562,6 +634,8 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
unsigned long slb_v, unsigned int status, bool data);
unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu);
unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu);
unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server);
int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr);
int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);

View File

@ -99,7 +99,6 @@ struct xive_q {
#define XIVE_ESB_SET_PQ_01 0xd00
#define XIVE_ESB_SET_PQ_10 0xe00
#define XIVE_ESB_SET_PQ_11 0xf00
#define XIVE_ESB_MASK XIVE_ESB_SET_PQ_01
#define XIVE_ESB_VAL_P 0x2
#define XIVE_ESB_VAL_Q 0x1
@ -136,11 +135,11 @@ extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
__be32 *qpage, u32 order, bool can_escalate);
extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
extern bool __xive_irq_trigger(struct xive_irq_data *xd);
extern bool __xive_irq_retrigger(struct xive_irq_data *xd);
extern void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd);
extern void xive_native_sync_source(u32 hw_irq);
extern bool is_xive_irq(struct irq_chip *chip);
extern int xive_native_enable_vp(u32 vp_id);
extern int xive_native_disable_vp(u32 vp_id);
extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
#else

View File

@ -634,6 +634,8 @@ int main(void)
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys);
HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt);
HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_PTID, ptid);
@ -719,6 +721,14 @@ int main(void)
OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6);
#endif
#ifdef CONFIG_KVM_XICS
DEFINE(VCPU_XIVE_SAVED_STATE, offsetof(struct kvm_vcpu,
arch.xive_saved_state));
DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
arch.xive_cam_word));
DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
#endif
#ifdef CONFIG_KVM_EXIT_TIMING
OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu);
OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl);

View File

@ -197,6 +197,11 @@ config KVM_XICS
Specification) interrupt controller architecture used on
IBM POWER (pSeries) servers.
config KVM_XIVE
bool
default y
depends on KVM_XICS && PPC_XIVE_NATIVE && KVM_BOOK3S_HV_POSSIBLE
source drivers/vhost/Kconfig
endif # VIRTUALIZATION

View File

@ -74,7 +74,7 @@ kvm-hv-y += \
book3s_64_mmu_radix.o
kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
book3s_hv_rm_xics.o
book3s_hv_rm_xics.o book3s_hv_rm_xive.o
ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
@ -89,6 +89,8 @@ endif
kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
book3s_xics.o
kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o
kvm-book3s_64-module-objs := \
$(common-objs-y) \
book3s.o \

View File

@ -35,6 +35,7 @@
#include <asm/kvm_book3s.h>
#include <asm/mmu_context.h>
#include <asm/page.h>
#include <asm/xive.h>
#include "book3s.h"
#include "trace.h"
@ -596,11 +597,14 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
break;
#ifdef CONFIG_KVM_XICS
case KVM_REG_PPC_ICP_STATE:
if (!vcpu->arch.icp) {
if (!vcpu->arch.icp && !vcpu->arch.xive_vcpu) {
r = -ENXIO;
break;
}
*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
if (xive_enabled())
*val = get_reg_val(id, kvmppc_xive_get_icp(vcpu));
else
*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
break;
#endif /* CONFIG_KVM_XICS */
case KVM_REG_PPC_FSCR:
@ -666,12 +670,14 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
#endif /* CONFIG_VSX */
#ifdef CONFIG_KVM_XICS
case KVM_REG_PPC_ICP_STATE:
if (!vcpu->arch.icp) {
if (!vcpu->arch.icp && !vcpu->arch.xive_vcpu) {
r = -ENXIO;
break;
}
r = kvmppc_xics_set_icp(vcpu,
set_reg_val(id, *val));
if (xive_enabled())
r = kvmppc_xive_set_icp(vcpu, set_reg_val(id, *val));
else
r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val));
break;
#endif /* CONFIG_KVM_XICS */
case KVM_REG_PPC_FSCR:
@ -942,6 +948,50 @@ int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall)
return kvm->arch.kvm_ops->hcall_implemented(hcall);
}
#ifdef CONFIG_KVM_XICS
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
bool line_status)
{
if (xive_enabled())
return kvmppc_xive_set_irq(kvm, irq_source_id, irq, level,
line_status);
else
return kvmppc_xics_set_irq(kvm, irq_source_id, irq, level,
line_status);
}
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *irq_entry,
struct kvm *kvm, int irq_source_id,
int level, bool line_status)
{
return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi,
level, line_status);
}
static int kvmppc_book3s_set_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
{
return kvm_set_irq(kvm, irq_source_id, e->gsi, level, line_status);
}
int kvm_irq_map_gsi(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *entries, int gsi)
{
entries->gsi = gsi;
entries->type = KVM_IRQ_ROUTING_IRQCHIP;
entries->set = kvmppc_book3s_set_irq;
entries->irqchip.irqchip = 0;
entries->irqchip.pin = gsi;
return 1;
}
int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
return pin;
}
#endif /* CONFIG_KVM_XICS */
static int kvmppc_book3s_init(void)
{
int r;
@ -952,12 +1002,25 @@ static int kvmppc_book3s_init(void)
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
r = kvmppc_book3s_init_pr();
#endif
return r;
#ifdef CONFIG_KVM_XICS
#ifdef CONFIG_KVM_XIVE
if (xive_enabled()) {
kvmppc_xive_init_module();
kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
} else
#endif
kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
#endif
return r;
}
static void kvmppc_book3s_exit(void)
{
#ifdef CONFIG_KVM_XICS
if (xive_enabled())
kvmppc_xive_exit_module();
#endif
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
kvmppc_book3s_exit_pr();
#endif

View File

@ -67,6 +67,7 @@
#include <asm/mmu.h>
#include <asm/opal.h>
#include <asm/xics.h>
#include <asm/xive.h>
#include "book3s.h"
@ -837,6 +838,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_IPOLL:
case H_XIRR_X:
if (kvmppc_xics_enabled(vcpu)) {
if (xive_enabled()) {
ret = H_NOT_AVAILABLE;
return RESUME_GUEST;
}
ret = kvmppc_xics_hcall(vcpu, req);
break;
}
@ -2947,8 +2952,12 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
r = kvmppc_book3s_hv_page_fault(run, vcpu,
vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
} else if (r == RESUME_PASSTHROUGH)
r = kvmppc_xics_rm_complete(vcpu, 0);
} else if (r == RESUME_PASSTHROUGH) {
if (WARN_ON(xive_enabled()))
r = H_SUCCESS;
else
r = kvmppc_xics_rm_complete(vcpu, 0);
}
} while (is_kvmppc_resume_guest(r));
out:
@ -3400,10 +3409,20 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
/*
* On POWER9, VPM0 bit is reserved (VPM0=1 behaviour is assumed)
* Set HVICE bit to enable hypervisor virtualization interrupts.
* Set HEIC to prevent OS interrupts to go to hypervisor (should
* be unnecessary but better safe than sorry in case we re-enable
* EE in HV mode with this LPCR still set)
*/
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
lpcr &= ~LPCR_VPM0;
lpcr |= LPCR_HVICE;
lpcr |= LPCR_HVICE | LPCR_HEIC;
/*
* If xive is enabled, we route 0x500 interrupts directly
* to the guest.
*/
if (xive_enabled())
lpcr |= LPCR_LPES;
}
/*
@ -3533,7 +3552,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
struct kvmppc_irq_map *irq_map;
struct kvmppc_passthru_irqmap *pimap;
struct irq_chip *chip;
int i;
int i, rc = 0;
if (!kvm_irq_bypass)
return 1;
@ -3558,10 +3577,10 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
/*
* For now, we only support interrupts for which the EOI operation
* is an OPAL call followed by a write to XIRR, since that's
* what our real-mode EOI code does.
* what our real-mode EOI code does, or a XIVE interrupt
*/
chip = irq_data_get_irq_chip(&desc->irq_data);
if (!chip || !is_pnv_opal_msi(chip)) {
if (!chip || !(is_pnv_opal_msi(chip) || is_xive_irq(chip))) {
pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
host_irq, guest_gsi);
mutex_unlock(&kvm->lock);
@ -3603,7 +3622,12 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
if (i == pimap->n_mapped)
pimap->n_mapped++;
kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
if (xive_enabled())
rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc);
else
kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
if (rc)
irq_map->r_hwirq = 0;
mutex_unlock(&kvm->lock);
@ -3614,7 +3638,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
{
struct irq_desc *desc;
struct kvmppc_passthru_irqmap *pimap;
int i;
int i, rc = 0;
if (!kvm_irq_bypass)
return 0;
@ -3639,9 +3663,12 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
return -ENODEV;
}
kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
if (xive_enabled())
rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc);
else
kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
/* invalidate the entry */
/* invalidate the entry (what do do on error from the above ?) */
pimap->mapped[i].r_hwirq = 0;
/*
@ -3650,7 +3677,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
*/
unlock:
mutex_unlock(&kvm->lock);
return 0;
return rc;
}
static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons,
@ -3928,7 +3955,7 @@ static int kvmppc_book3s_init_hv(void)
* indirectly, via OPAL.
*/
#ifdef CONFIG_SMP
if (!get_paca()->kvm_hstate.xics_phys) {
if (!xive_enabled() && !local_paca->kvm_hstate.xics_phys) {
struct device_node *np;
np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");

View File

@ -32,6 +32,24 @@
#define KVM_CMA_CHUNK_ORDER 18
#include "book3s_xics.h"
#include "book3s_xive.h"
/*
* The XIVE module will populate these when it loads
*/
unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr);
int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
EXPORT_SYMBOL_GPL(__xive_vm_h_xirr);
EXPORT_SYMBOL_GPL(__xive_vm_h_ipoll);
EXPORT_SYMBOL_GPL(__xive_vm_h_ipi);
EXPORT_SYMBOL_GPL(__xive_vm_h_cppr);
EXPORT_SYMBOL_GPL(__xive_vm_h_eoi);
/*
* Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
* should be power of 2.
@ -211,6 +229,7 @@ void kvmhv_rm_send_ipi(int cpu)
__asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
return;
}
/* On POWER8 for IPIs to threads in the same core, use msgsnd. */
if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
cpu_first_thread_sibling(cpu) ==
@ -407,6 +426,9 @@ static long kvmppc_read_one_intr(bool *again)
u8 host_ipi;
int64_t rc;
if (xive_enabled())
return 1;
/* see if a host IPI is pending */
host_ipi = local_paca->kvm_hstate.host_ipi;
if (host_ipi)
@ -491,3 +513,84 @@ static long kvmppc_read_one_intr(bool *again)
return kvmppc_check_passthru(xisr, xirr, again);
}
#ifdef CONFIG_KVM_XICS
static inline bool is_rm(void)
{
return !(mfmsr() & MSR_DR);
}
unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
{
if (xive_enabled()) {
if (is_rm())
return xive_rm_h_xirr(vcpu);
if (unlikely(!__xive_vm_h_xirr))
return H_NOT_AVAILABLE;
return __xive_vm_h_xirr(vcpu);
} else
return xics_rm_h_xirr(vcpu);
}
unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
{
vcpu->arch.gpr[5] = get_tb();
if (xive_enabled()) {
if (is_rm())
return xive_rm_h_xirr(vcpu);
if (unlikely(!__xive_vm_h_xirr))
return H_NOT_AVAILABLE;
return __xive_vm_h_xirr(vcpu);
} else
return xics_rm_h_xirr(vcpu);
}
unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
{
if (xive_enabled()) {
if (is_rm())
return xive_rm_h_ipoll(vcpu, server);
if (unlikely(!__xive_vm_h_ipoll))
return H_NOT_AVAILABLE;
return __xive_vm_h_ipoll(vcpu, server);
} else
return H_TOO_HARD;
}
int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr)
{
if (xive_enabled()) {
if (is_rm())
return xive_rm_h_ipi(vcpu, server, mfrr);
if (unlikely(!__xive_vm_h_ipi))
return H_NOT_AVAILABLE;
return __xive_vm_h_ipi(vcpu, server, mfrr);
} else
return xics_rm_h_ipi(vcpu, server, mfrr);
}
int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
{
if (xive_enabled()) {
if (is_rm())
return xive_rm_h_cppr(vcpu, cppr);
if (unlikely(!__xive_vm_h_cppr))
return H_NOT_AVAILABLE;
return __xive_vm_h_cppr(vcpu, cppr);
} else
return xics_rm_h_cppr(vcpu, cppr);
}
int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
{
if (xive_enabled()) {
if (is_rm())
return xive_rm_h_eoi(vcpu, xirr);
if (unlikely(!__xive_vm_h_eoi))
return H_NOT_AVAILABLE;
return __xive_vm_h_eoi(vcpu, xirr);
} else
return xics_rm_h_eoi(vcpu, xirr);
}
#endif /* CONFIG_KVM_XICS */

View File

@ -484,7 +484,7 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
}
unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu)
{
union kvmppc_icp_state old_state, new_state;
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@ -522,8 +522,8 @@ unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
return check_too_hard(xics, icp);
}
int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr)
int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr)
{
union kvmppc_icp_state old_state, new_state;
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@ -609,7 +609,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
return check_too_hard(xics, this_icp);
}
int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
{
union kvmppc_icp_state old_state, new_state;
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@ -729,7 +729,7 @@ static int ics_rm_eoi(struct kvm_vcpu *vcpu, u32 irq)
return check_too_hard(xics, icp);
}
int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
{
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp = vcpu->arch.icp;

View File

@ -0,0 +1,47 @@
#include <linux/kernel.h>
#include <linux/kvm_host.h>
#include <linux/err.h>
#include <linux/kernel_stat.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
#include <asm/hvcall.h>
#include <asm/xics.h>
#include <asm/debug.h>
#include <asm/synch.h>
#include <asm/cputhreads.h>
#include <asm/pgtable.h>
#include <asm/ppc-opcode.h>
#include <asm/pnv-pci.h>
#include <asm/opal.h>
#include <asm/smp.h>
#include <asm/asm-prototypes.h>
#include <asm/xive.h>
#include <asm/xive-regs.h>
#include "book3s_xive.h"
/* XXX */
#include <asm/udbg.h>
//#define DBG(fmt...) udbg_printf(fmt)
#define DBG(fmt...) do { } while(0)
static inline void __iomem *get_tima_phys(void)
{
return local_paca->kvm_hstate.xive_tima_phys;
}
#undef XIVE_RUNTIME_CHECKS
#define X_PFX xive_rm_
#define X_STATIC
#define X_STAT_PFX stat_rm_
#define __x_tima get_tima_phys()
#define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_page))
#define __x_trig_page(xd) ((void __iomem *)((xd)->trig_page))
#define __x_readb __raw_rm_readb
#define __x_writeb __raw_rm_writeb
#define __x_readw __raw_rm_readw
#define __x_readq __raw_rm_readq
#define __x_writeq __raw_rm_writeq
#include "book3s_xive_template.c"

View File

@ -30,6 +30,7 @@
#include <asm/book3s/64/mmu-hash.h>
#include <asm/tm.h>
#include <asm/opal.h>
#include <asm/xive-regs.h>
#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
@ -970,6 +971,23 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
cmpwi r3, 512 /* 1 microsecond */
blt hdec_soon
#ifdef CONFIG_KVM_XICS
/* We are entering the guest on that thread, push VCPU to XIVE */
ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
cmpldi cr0, r10, r0
beq no_xive
ld r11, VCPU_XIVE_SAVED_STATE(r4)
li r9, TM_QW1_OS
stdcix r11,r9,r10
eieio
lwz r11, VCPU_XIVE_CAM_WORD(r4)
li r9, TM_QW1_OS + TM_WORD2
stwcix r11,r9,r10
li r9, 1
stw r9, VCPU_XIVE_PUSHED(r4)
no_xive:
#endif /* CONFIG_KVM_XICS */
deliver_guest_interrupt:
ld r6, VCPU_CTR(r4)
ld r7, VCPU_XER(r4)
@ -1307,6 +1325,42 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
blt deliver_guest_interrupt
guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
#ifdef CONFIG_KVM_XICS
/* We are exiting, pull the VP from the XIVE */
lwz r0, VCPU_XIVE_PUSHED(r9)
cmpwi cr0, r0, 0
beq 1f
li r7, TM_SPC_PULL_OS_CTX
li r6, TM_QW1_OS
mfmsr r0
andi. r0, r0, MSR_IR /* in real mode? */
beq 2f
ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
cmpldi cr0, r10, 0
beq 1f
/* First load to pull the context, we ignore the value */
lwzx r11, r7, r10
eieio
/* Second load to recover the context state (Words 0 and 1) */
ldx r11, r6, r10
b 3f
2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
cmpldi cr0, r10, 0
beq 1f
/* First load to pull the context, we ignore the value */
lwzcix r11, r7, r10
eieio
/* Second load to recover the context state (Words 0 and 1) */
ldcix r11, r6, r10
3: std r11, VCPU_XIVE_SAVED_STATE(r9)
/* Fixup some of the state for the next load */
li r10, 0
li r0, 0xff
stw r10, VCPU_XIVE_PUSHED(r9)
stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
1:
#endif /* CONFIG_KVM_XICS */
/* Save more register state */
mfdar r6
mfdsisr r7
@ -2011,7 +2065,7 @@ hcall_real_table:
.long DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
.long DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
.long DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
.long 0 /* 0x70 - H_IPOLL */
.long DOTSYM(kvmppc_rm_h_ipoll) - hcall_real_table
.long DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
#else
.long 0 /* 0x64 - H_EOI */
@ -2181,7 +2235,11 @@ hcall_real_table:
.long 0 /* 0x2f0 */
.long 0 /* 0x2f4 */
.long 0 /* 0x2f8 */
.long 0 /* 0x2fc */
#ifdef CONFIG_KVM_XICS
.long DOTSYM(kvmppc_rm_h_xirr_x) - hcall_real_table
#else
.long 0 /* 0x2fc - H_XIRR_X*/
#endif
.long DOTSYM(kvmppc_h_random) - hcall_real_table
.globl hcall_real_table_end
hcall_real_table_end:

View File

@ -16,6 +16,7 @@
#include <asm/kvm_ppc.h>
#include <asm/hvcall.h>
#include <asm/rtas.h>
#include <asm/xive.h>
#ifdef CONFIG_KVM_XICS
static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
@ -32,7 +33,10 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
server = be32_to_cpu(args->args[1]);
priority = be32_to_cpu(args->args[2]);
rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
if (xive_enabled())
rc = kvmppc_xive_set_xive(vcpu->kvm, irq, server, priority);
else
rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
if (rc)
rc = -3;
out:
@ -52,7 +56,10 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
irq = be32_to_cpu(args->args[0]);
server = priority = 0;
rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
if (xive_enabled())
rc = kvmppc_xive_get_xive(vcpu->kvm, irq, &server, &priority);
else
rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
if (rc) {
rc = -3;
goto out;
@ -76,7 +83,10 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
irq = be32_to_cpu(args->args[0]);
rc = kvmppc_xics_int_off(vcpu->kvm, irq);
if (xive_enabled())
rc = kvmppc_xive_int_off(vcpu->kvm, irq);
else
rc = kvmppc_xics_int_off(vcpu->kvm, irq);
if (rc)
rc = -3;
out:
@ -95,7 +105,10 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
irq = be32_to_cpu(args->args[0]);
rc = kvmppc_xics_int_on(vcpu->kvm, irq);
if (xive_enabled())
rc = kvmppc_xive_int_on(vcpu->kvm, irq);
else
rc = kvmppc_xics_int_on(vcpu->kvm, irq);
if (rc)
rc = -3;
out:

View File

@ -1306,8 +1306,8 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
return 0;
}
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
bool line_status)
int kvmppc_xics_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
bool line_status)
{
struct kvmppc_xics *xics = kvm->arch.xics;
@ -1316,14 +1316,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
return ics_deliver_irq(xics, irq, level);
}
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *irq_entry,
struct kvm *kvm, int irq_source_id,
int level, bool line_status)
{
return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi,
level, line_status);
}
static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
struct kvmppc_xics *xics = dev->private;
@ -1457,29 +1449,6 @@ void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
}
static int xics_set_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
{
return kvm_set_irq(kvm, irq_source_id, e->gsi, level, line_status);
}
int kvm_irq_map_gsi(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *entries, int gsi)
{
entries->gsi = gsi;
entries->type = KVM_IRQ_ROUTING_IRQCHIP;
entries->set = xics_set_irq;
entries->irqchip.irqchip = 0;
entries->irqchip.pin = gsi;
return 1;
}
int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
return pin;
}
void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq,
unsigned long host_irq)
{

View File

@ -10,6 +10,7 @@
#ifndef _KVM_PPC_BOOK3S_XICS_H
#define _KVM_PPC_BOOK3S_XICS_H
#ifdef CONFIG_KVM_XICS
/*
* We use a two-level tree to store interrupt source information.
* There are up to 1024 ICS nodes, each of which can represent
@ -144,5 +145,11 @@ static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
return ics;
}
extern unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu);
extern int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr);
extern int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
extern int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
#endif /* CONFIG_KVM_XICS */
#endif /* _KVM_PPC_BOOK3S_XICS_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,256 @@
/*
* Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*/
#ifndef _KVM_PPC_BOOK3S_XIVE_H
#define _KVM_PPC_BOOK3S_XIVE_H
#ifdef CONFIG_KVM_XICS
#include "book3s_xics.h"
/*
* State for one guest irq source.
*
* For each guest source we allocate a HW interrupt in the XIVE
* which we use for all SW triggers. It will be unused for
* pass-through but it's easier to keep around as the same
* guest interrupt can alternatively be emulated or pass-through
* if a physical device is hot unplugged and replaced with an
* emulated one.
*
* This state structure is very similar to the XICS one with
* additional XIVE specific tracking.
*/
struct kvmppc_xive_irq_state {
bool valid; /* Interrupt entry is valid */
u32 number; /* Guest IRQ number */
u32 ipi_number; /* XIVE IPI HW number */
struct xive_irq_data ipi_data; /* XIVE IPI associated data */
u32 pt_number; /* XIVE Pass-through number if any */
struct xive_irq_data *pt_data; /* XIVE Pass-through associated data */
/* Targetting as set by guest */
u32 guest_server; /* Current guest selected target */
u8 guest_priority; /* Guest set priority */
u8 saved_priority; /* Saved priority when masking */
/* Actual targetting */
u32 act_server; /* Actual server */
u8 act_priority; /* Actual priority */
/* Various state bits */
bool in_eoi; /* Synchronize with H_EOI */
bool old_p; /* P bit state when masking */
bool old_q; /* Q bit state when masking */
bool lsi; /* level-sensitive interrupt */
bool asserted; /* Only for emulated LSI: current state */
/* Saved for migration state */
bool in_queue;
bool saved_p;
bool saved_q;
u8 saved_scan_prio;
};
/* Select the "right" interrupt (IPI vs. passthrough) */
static inline void kvmppc_xive_select_irq(struct kvmppc_xive_irq_state *state,
u32 *out_hw_irq,
struct xive_irq_data **out_xd)
{
if (state->pt_number) {
if (out_hw_irq)
*out_hw_irq = state->pt_number;
if (out_xd)
*out_xd = state->pt_data;
} else {
if (out_hw_irq)
*out_hw_irq = state->ipi_number;
if (out_xd)
*out_xd = &state->ipi_data;
}
}
/*
* This corresponds to an "ICS" in XICS terminology, we use it
* as a mean to break up source information into multiple structures.
*/
struct kvmppc_xive_src_block {
arch_spinlock_t lock;
u16 id;
struct kvmppc_xive_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
};
struct kvmppc_xive {
struct kvm *kvm;
struct kvm_device *dev;
struct dentry *dentry;
/* VP block associated with the VM */
u32 vp_base;
/* Blocks of sources */
struct kvmppc_xive_src_block *src_blocks[KVMPPC_XICS_MAX_ICS_ID + 1];
u32 max_sbid;
/*
* For state save, we lazily scan the queues on the first interrupt
* being migrated. We don't have a clean way to reset that flags
* so we keep track of the number of valid sources and how many of
* them were migrated so we can reset when all of them have been
* processed.
*/
u32 src_count;
u32 saved_src_count;
/*
* Some irqs are delayed on restore until the source is created,
* keep track here of how many of them
*/
u32 delayed_irqs;
/* Which queues (priorities) are in use by the guest */
u8 qmap;
/* Queue orders */
u32 q_order;
u32 q_page_order;
};
#define KVMPPC_XIVE_Q_COUNT 8
struct kvmppc_xive_vcpu {
struct kvmppc_xive *xive;
struct kvm_vcpu *vcpu;
bool valid;
/* Server number. This is the HW CPU ID from a guest perspective */
u32 server_num;
/*
* HW VP corresponding to this VCPU. This is the base of the VP
* block plus the server number.
*/
u32 vp_id;
u32 vp_chip_id;
u32 vp_cam;
/* IPI used for sending ... IPIs */
u32 vp_ipi;
struct xive_irq_data vp_ipi_data;
/* Local emulation state */
uint8_t cppr; /* guest CPPR */
uint8_t hw_cppr;/* Hardware CPPR */
uint8_t mfrr;
uint8_t pending;
/* Each VP has 8 queues though we only provision some */
struct xive_q queues[KVMPPC_XIVE_Q_COUNT];
u32 esc_virq[KVMPPC_XIVE_Q_COUNT];
char *esc_virq_names[KVMPPC_XIVE_Q_COUNT];
/* Stash a delayed irq on restore from migration (see set_icp) */
u32 delayed_irq;
/* Stats */
u64 stat_rm_h_xirr;
u64 stat_rm_h_ipoll;
u64 stat_rm_h_cppr;
u64 stat_rm_h_eoi;
u64 stat_rm_h_ipi;
u64 stat_vm_h_xirr;
u64 stat_vm_h_ipoll;
u64 stat_vm_h_cppr;
u64 stat_vm_h_eoi;
u64 stat_vm_h_ipi;
};
static inline struct kvm_vcpu *kvmppc_xive_find_server(struct kvm *kvm, u32 nr)
{
struct kvm_vcpu *vcpu = NULL;
int i;
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->arch.xive_vcpu && nr == vcpu->arch.xive_vcpu->server_num)
return vcpu;
}
return NULL;
}
static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmppc_xive *xive,
u32 irq, u16 *source)
{
u32 bid = irq >> KVMPPC_XICS_ICS_SHIFT;
u16 src = irq & KVMPPC_XICS_SRC_MASK;
if (source)
*source = src;
if (bid > KVMPPC_XICS_MAX_ICS_ID)
return NULL;
return xive->src_blocks[bid];
}
/*
* Mapping between guest priorities and host priorities
* is as follow.
*
* Guest request for 0...6 are honored. Guest request for anything
* higher results in a priority of 7 being applied.
*
* However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
* in order to match AIX expectations
*
* Similar mapping is done for CPPR values
*/
static inline u8 xive_prio_from_guest(u8 prio)
{
if (prio == 0xff || prio < 8)
return prio;
return 7;
}
static inline u8 xive_prio_to_guest(u8 prio)
{
if (prio == 0xff || prio < 7)
return prio;
return 0xb;
}
static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
{
u32 cur;
if (!qpage)
return 0;
cur = be32_to_cpup(qpage + *idx);
if ((cur >> 31) == *toggle)
return 0;
*idx = (*idx + 1) & msk;
if (*idx == 0)
(*toggle) ^= 1;
return cur & 0x7fffffff;
}
extern unsigned long xive_rm_h_xirr(struct kvm_vcpu *vcpu);
extern unsigned long xive_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server);
extern int xive_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr);
extern int xive_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
extern int xive_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
extern unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
extern unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr);
extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
#endif /* CONFIG_KVM_XICS */
#endif /* _KVM_PPC_BOOK3S_XICS_H */

View File

@ -0,0 +1,503 @@
/*
* Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*/
/* File to be included by other .c files */
#define XGLUE(a,b) a##b
#define GLUE(a,b) XGLUE(a,b)
static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
{
u8 cppr;
u16 ack;
/* XXX DD1 bug workaround: Check PIPR vs. CPPR first ! */
/* Perform the acknowledge OS to register cycle. */
ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));
/* Synchronize subsequent queue accesses */
mb();
/* XXX Check grouping level */
/* Anything ? */
if (!((ack >> 8) & TM_QW1_NSR_EO))
return;
/* Grab CPPR of the most favored pending interrupt */
cppr = ack & 0xff;
if (cppr < 8)
xc->pending |= 1 << cppr;
#ifdef XIVE_RUNTIME_CHECKS
/* Check consistency */
if (cppr >= xc->hw_cppr)
pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
smp_processor_id(), cppr, xc->hw_cppr);
#endif
/*
* Update our image of the HW CPPR. We don't yet modify
* xc->cppr, this will be done as we scan for interrupts
* in the queues.
*/
xc->hw_cppr = cppr;
}
static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset)
{
u64 val;
if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
offset |= offset << 4;
val =__x_readq(__x_eoi_page(xd) + offset);
#ifdef __LITTLE_ENDIAN__
val >>= 64-8;
#endif
return (u8)val;
}
static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
{
/* If the XIVE supports the new "store EOI facility, use it */
if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
__x_writeq(0, __x_eoi_page(xd));
else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
opal_int_eoi(hw_irq);
} else {
uint64_t eoi_val;
/*
* Otherwise for EOI, we use the special MMIO that does
* a clear of both P and Q and returns the old Q,
* except for LSIs where we use the "EOI cycle" special
* load.
*
* This allows us to then do a re-trigger if Q was set
* rather than synthetizing an interrupt in software
*
* For LSIs, using the HW EOI cycle works around a problem
* on P9 DD1 PHBs where the other ESB accesses don't work
* properly.
*/
if (xd->flags & XIVE_IRQ_FLAG_LSI)
__x_readq(__x_eoi_page(xd));
else {
eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
/* Re-trigger if needed */
if ((eoi_val & 1) && __x_trig_page(xd))
__x_writeq(0, __x_trig_page(xd));
}
}
}
enum {
scan_fetch,
scan_poll,
scan_eoi,
};
static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
u8 pending, int scan_type)
{
u32 hirq = 0;
u8 prio = 0xff;
/* Find highest pending priority */
while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
struct xive_q *q;
u32 idx, toggle;
__be32 *qpage;
/*
* If pending is 0 this will return 0xff which is what
* we want
*/
prio = ffs(pending) - 1;
/*
* If the most favoured prio we found pending is less
* favored (or equal) than a pending IPI, we return
* the IPI instead.
*
* Note: If pending was 0 and mfrr is 0xff, we will
* not spurriously take an IPI because mfrr cannot
* then be smaller than cppr.
*/
if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
prio = xc->mfrr;
hirq = XICS_IPI;
break;
}
/* Don't scan past the guest cppr */
if (prio >= xc->cppr || prio > 7)
break;
/* Grab queue and pointers */
q = &xc->queues[prio];
idx = q->idx;
toggle = q->toggle;
/*
* Snapshot the queue page. The test further down for EOI
* must use the same "copy" that was used by __xive_read_eq
* since qpage can be set concurrently and we don't want
* to miss an EOI.
*/
qpage = READ_ONCE(q->qpage);
skip_ipi:
/*
* Try to fetch from the queue. Will return 0 for a
* non-queueing priority (ie, qpage = 0).
*/
hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
/*
* If this was a signal for an MFFR change done by
* H_IPI we skip it. Additionally, if we were fetching
* we EOI it now, thus re-enabling reception of a new
* such signal.
*
* We also need to do that if prio is 0 and we had no
* page for the queue. In this case, we have non-queued
* IPI that needs to be EOId.
*
* This is safe because if we have another pending MFRR
* change that wasn't observed above, the Q bit will have
* been set and another occurrence of the IPI will trigger.
*/
if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
if (scan_type == scan_fetch)
GLUE(X_PFX,source_eoi)(xc->vp_ipi,
&xc->vp_ipi_data);
/* Loop back on same queue with updated idx/toggle */
#ifdef XIVE_RUNTIME_CHECKS
WARN_ON(hirq && hirq != XICS_IPI);
#endif
if (hirq)
goto skip_ipi;
}
/* If fetching, update queue pointers */
if (scan_type == scan_fetch) {
q->idx = idx;
q->toggle = toggle;
}
/* Something found, stop searching */
if (hirq)
break;
/* Clear the pending bit on the now empty queue */
pending &= ~(1 << prio);
/*
* Check if the queue count needs adjusting due to
* interrupts being moved away.
*/
if (atomic_read(&q->pending_count)) {
int p = atomic_xchg(&q->pending_count, 0);
if (p) {
#ifdef XIVE_RUNTIME_CHECKS
WARN_ON(p > atomic_read(&q->count));
#endif
atomic_sub(p, &q->count);
}
}
}
/* If we are just taking a "peek", do nothing else */
if (scan_type == scan_poll)
return hirq;
/* Update the pending bits */
xc->pending = pending;
/*
* If this is an EOI that's it, no CPPR adjustment done here,
* all we needed was cleanup the stale pending bits and check
* if there's anything left.
*/
if (scan_type == scan_eoi)
return hirq;
/*
* If we found an interrupt, adjust what the guest CPPR should
* be as if we had just fetched that interrupt from HW.
*/
if (hirq)
xc->cppr = prio;
/*
* If it was an IPI the HW CPPR might have been lowered too much
* as the HW interrupt we use for IPIs is routed to priority 0.
*
* We re-sync it here.
*/
if (xc->cppr != xc->hw_cppr) {
xc->hw_cppr = xc->cppr;
__x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
}
return hirq;
}
X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
u8 old_cppr;
u32 hirq;
pr_devel("H_XIRR\n");
xc->GLUE(X_STAT_PFX,h_xirr)++;
/* First collect pending bits from HW */
GLUE(X_PFX,ack_pending)(xc);
/*
* Cleanup the old-style bits if needed (they may have been
* set by pull or an escalation interrupts).
*/
if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions))
clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
&vcpu->arch.pending_exceptions);
pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
xc->pending, xc->hw_cppr, xc->cppr);
/* Grab previous CPPR and reverse map it */
old_cppr = xive_prio_to_guest(xc->cppr);
/* Scan for actual interrupts */
hirq = GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_fetch);
pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
hirq, xc->hw_cppr, xc->cppr);
#ifdef XIVE_RUNTIME_CHECKS
/* That should never hit */
if (hirq & 0xff000000)
pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
#endif
/*
* XXX We could check if the interrupt is masked here and
* filter it. If we chose to do so, we would need to do:
*
* if (masked) {
* lock();
* if (masked) {
* old_Q = true;
* hirq = 0;
* }
* unlock();
* }
*/
/* Return interrupt and old CPPR in GPR4 */
vcpu->arch.gpr[4] = hirq | (old_cppr << 24);
return H_SUCCESS;
}
X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
u8 pending = xc->pending;
u32 hirq;
u8 pipr;
pr_devel("H_IPOLL(server=%ld)\n", server);
xc->GLUE(X_STAT_PFX,h_ipoll)++;
/* Grab the target VCPU if not the current one */
if (xc->server_num != server) {
vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
if (!vcpu)
return H_PARAMETER;
xc = vcpu->arch.xive_vcpu;
/* Scan all priorities */
pending = 0xff;
} else {
/* Grab pending interrupt if any */
pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR);
if (pipr < 8)
pending |= 1 << pipr;
}
hirq = GLUE(X_PFX,scan_interrupts)(xc, pending, scan_poll);
/* Return interrupt and old CPPR in GPR4 */
vcpu->arch.gpr[4] = hirq | (xc->cppr << 24);
return H_SUCCESS;
}
static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
{
u8 pending, prio;
pending = xc->pending;
if (xc->mfrr != 0xff) {
if (xc->mfrr < 8)
pending |= 1 << xc->mfrr;
else
pending |= 0x80;
}
if (!pending)
return;
prio = ffs(pending) - 1;
__x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
}
X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
u8 old_cppr;
pr_devel("H_CPPR(cppr=%ld)\n", cppr);
xc->GLUE(X_STAT_PFX,h_cppr)++;
/* Map CPPR */
cppr = xive_prio_from_guest(cppr);
/* Remember old and update SW state */
old_cppr = xc->cppr;
xc->cppr = cppr;
/*
* We are masking less, we need to look for pending things
* to deliver and set VP pending bits accordingly to trigger
* a new interrupt otherwise we might miss MFRR changes for
* which we have optimized out sending an IPI signal.
*/
if (cppr > old_cppr)
GLUE(X_PFX,push_pending_to_hw)(xc);
/* Apply new CPPR */
xc->hw_cppr = cppr;
__x_writeb(cppr, __x_tima + TM_QW1_OS + TM_CPPR);
return H_SUCCESS;
}
X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr)
{
struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
struct kvmppc_xive_src_block *sb;
struct kvmppc_xive_irq_state *state;
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
struct xive_irq_data *xd;
u8 new_cppr = xirr >> 24;
u32 irq = xirr & 0x00ffffff, hw_num;
u16 src;
int rc = 0;
pr_devel("H_EOI(xirr=%08lx)\n", xirr);
xc->GLUE(X_STAT_PFX,h_eoi)++;
xc->cppr = xive_prio_from_guest(new_cppr);
/*
* IPIs are synthetized from MFRR and thus don't need
* any special EOI handling. The underlying interrupt
* used to signal MFRR changes is EOId when fetched from
* the queue.
*/
if (irq == XICS_IPI || irq == 0)
goto bail;
/* Find interrupt source */
sb = kvmppc_xive_find_source(xive, irq, &src);
if (!sb) {
pr_devel(" source not found !\n");
rc = H_PARAMETER;
goto bail;
}
state = &sb->irq_state[src];
kvmppc_xive_select_irq(state, &hw_num, &xd);
state->in_eoi = true;
mb();
again:
if (state->guest_priority == MASKED) {
arch_spin_lock(&sb->lock);
if (state->guest_priority != MASKED) {
arch_spin_unlock(&sb->lock);
goto again;
}
pr_devel(" EOI on saved P...\n");
/* Clear old_p, that will cause unmask to perform an EOI */
state->old_p = false;
arch_spin_unlock(&sb->lock);
} else {
pr_devel(" EOI on source...\n");
/* Perform EOI on the source */
GLUE(X_PFX,source_eoi)(hw_num, xd);
/* If it's an emulated LSI, check level and resend */
if (state->lsi && state->asserted)
__x_writeq(0, __x_trig_page(xd));
}
mb();
state->in_eoi = false;
bail:
/* Re-evaluate pending IRQs and update HW */
GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_eoi);
GLUE(X_PFX,push_pending_to_hw)(xc);
pr_devel(" after scan pending=%02x\n", xc->pending);
/* Apply new CPPR */
xc->hw_cppr = xc->cppr;
__x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
return rc;
}
X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
xc->GLUE(X_STAT_PFX,h_ipi)++;
/* Find target */
vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
if (!vcpu)
return H_PARAMETER;
xc = vcpu->arch.xive_vcpu;
/* Locklessly write over MFRR */
xc->mfrr = mfrr;
/* Shoot the IPI if most favored than target cppr */
if (mfrr < xc->cppr)
__x_writeq(0, __x_trig_page(&xc->vp_ipi_data));
return H_SUCCESS;
}

View File

@ -12,6 +12,7 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
#endif
#ifdef CONFIG_KVM_XICS
ret = ret || (kvm->arch.xics != NULL);
ret = ret || (kvm->arch.xive != NULL);
#endif
smp_rmb();
return ret;

View File

@ -38,6 +38,8 @@
#include <asm/irqflags.h>
#include <asm/iommu.h>
#include <asm/switch_to.h>
#include <asm/xive.h>
#include "timing.h"
#include "irq.h"
#include "../mm/mmu_decl.h"
@ -697,7 +699,10 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
break;
case KVMPPC_IRQ_XICS:
kvmppc_xics_free_icp(vcpu);
if (xive_enabled())
kvmppc_xive_cleanup_vcpu(vcpu);
else
kvmppc_xics_free_icp(vcpu);
break;
}
@ -1522,8 +1527,12 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
r = -EPERM;
dev = kvm_device_from_filp(f.file);
if (dev)
r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
if (dev) {
if (xive_enabled())
r = kvmppc_xive_connect_vcpu(dev, vcpu, cap->args[1]);
else
r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
}
fdput(f);
break;
@ -1547,7 +1556,7 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
return true;
#endif
#ifdef CONFIG_KVM_XICS
if (kvm->arch.xics)
if (kvm->arch.xics || kvm->arch.xive)
return true;
#endif
return false;

View File

@ -967,3 +967,4 @@ EXPORT_SYMBOL_GPL(opal_leds_set_ind);
EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
/* Export this for KVM */
EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
EXPORT_SYMBOL_GPL(opal_int_eoi);

View File

@ -46,13 +46,15 @@
#endif
bool __xive_enabled;
EXPORT_SYMBOL_GPL(__xive_enabled);
bool xive_cmdline_disabled;
/* We use only one priority for now */
static u8 xive_irq_priority;
/* TIMA */
/* TIMA exported to KVM */
void __iomem *xive_tima;
EXPORT_SYMBOL_GPL(xive_tima);
u32 xive_tima_offset;
/* Backend ops */
@ -345,8 +347,11 @@ static void xive_irq_eoi(struct irq_data *d)
DBG_VERBOSE("eoi_irq: irq=%d [0x%lx] pending=%02x\n",
d->irq, irqd_to_hwirq(d), xc->pending_prio);
/* EOI the source if it hasn't been disabled */
if (!irqd_irq_disabled(d))
/*
* EOI the source if it hasn't been disabled and hasn't
* been passed-through to a KVM guest
*/
if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d))
xive_do_source_eoi(irqd_to_hwirq(d), xd);
/*
@ -689,9 +694,14 @@ static int xive_irq_set_affinity(struct irq_data *d,
old_target = xd->target;
rc = xive_ops->configure_irq(hw_irq,
get_hard_smp_processor_id(target),
xive_irq_priority, d->irq);
/*
* Only configure the irq if it's not currently passed-through to
* a KVM guest
*/
if (!irqd_is_forwarded_to_vcpu(d))
rc = xive_ops->configure_irq(hw_irq,
get_hard_smp_processor_id(target),
xive_irq_priority, d->irq);
if (rc < 0) {
pr_err("Error %d reconfiguring irq %d\n", rc, d->irq);
return rc;
@ -771,6 +781,123 @@ static int xive_irq_retrigger(struct irq_data *d)
return 1;
}
static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
int rc;
u8 pq;
/*
* We only support this on interrupts that do not require
* firmware calls for masking and unmasking
*/
if (xd->flags & XIVE_IRQ_FLAG_MASK_FW)
return -EIO;
/*
* This is called by KVM with state non-NULL for enabling
* pass-through or NULL for disabling it
*/
if (state) {
irqd_set_forwarded_to_vcpu(d);
/* Set it to PQ=10 state to prevent further sends */
pq = xive_poke_esb(xd, XIVE_ESB_SET_PQ_10);
/* No target ? nothing to do */
if (xd->target == XIVE_INVALID_TARGET) {
/*
* An untargetted interrupt should have been
* also masked at the source
*/
WARN_ON(pq & 2);
return 0;
}
/*
* If P was set, adjust state to PQ=11 to indicate
* that a resend is needed for the interrupt to reach
* the guest. Also remember the value of P.
*
* This also tells us that it's in flight to a host queue
* or has already been fetched but hasn't been EOIed yet
* by the host. This it's potentially using up a host
* queue slot. This is important to know because as long
* as this is the case, we must not hard-unmask it when
* "returning" that interrupt to the host.
*
* This saved_p is cleared by the host EOI, when we know
* for sure the queue slot is no longer in use.
*/
if (pq & 2) {
pq = xive_poke_esb(xd, XIVE_ESB_SET_PQ_11);
xd->saved_p = true;
/*
* Sync the XIVE source HW to ensure the interrupt
* has gone through the EAS before we change its
* target to the guest. That should guarantee us
* that we *will* eventually get an EOI for it on
* the host. Otherwise there would be a small window
* for P to be seen here but the interrupt going
* to the guest queue.
*/
if (xive_ops->sync_source)
xive_ops->sync_source(hw_irq);
} else
xd->saved_p = false;
} else {
irqd_clr_forwarded_to_vcpu(d);
/* No host target ? hard mask and return */
if (xd->target == XIVE_INVALID_TARGET) {
xive_do_source_set_mask(xd, true);
return 0;
}
/*
* Sync the XIVE source HW to ensure the interrupt
* has gone through the EAS before we change its
* target to the host.
*/
if (xive_ops->sync_source)
xive_ops->sync_source(hw_irq);
/*
* By convention we are called with the interrupt in
* a PQ=10 or PQ=11 state, ie, it won't fire and will
* have latched in Q whether there's a pending HW
* interrupt or not.
*
* First reconfigure the target.
*/
rc = xive_ops->configure_irq(hw_irq,
get_hard_smp_processor_id(xd->target),
xive_irq_priority, d->irq);
if (rc)
return rc;
/*
* Then if saved_p is not set, effectively re-enable the
* interrupt with an EOI. If it is set, we know there is
* still a message in a host queue somewhere that will be
* EOId eventually.
*
* Note: We don't check irqd_irq_disabled(). Effectively,
* we *will* let the irq get through even if masked if the
* HW is still firing it in order to deal with the whole
* saved_p business properly. If the interrupt triggers
* while masked, the generic code will re-mask it anyway.
*/
if (!xd->saved_p)
xive_do_source_eoi(hw_irq, xd);
}
return 0;
}
static struct irq_chip xive_irq_chip = {
.name = "XIVE-IRQ",
.irq_startup = xive_irq_startup,
@ -781,12 +908,14 @@ static struct irq_chip xive_irq_chip = {
.irq_set_affinity = xive_irq_set_affinity,
.irq_set_type = xive_irq_set_type,
.irq_retrigger = xive_irq_retrigger,
.irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity,
};
bool is_xive_irq(struct irq_chip *chip)
{
return chip == &xive_irq_chip;
}
EXPORT_SYMBOL_GPL(is_xive_irq);
void xive_cleanup_irq_data(struct xive_irq_data *xd)
{
@ -801,6 +930,7 @@ void xive_cleanup_irq_data(struct xive_irq_data *xd)
xd->trig_mmio = NULL;
}
}
EXPORT_SYMBOL_GPL(xive_cleanup_irq_data);
static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw)
{

View File

@ -31,6 +31,7 @@
#include <asm/xive.h>
#include <asm/xive-regs.h>
#include <asm/opal.h>
#include <asm/kvm_ppc.h>
#include "xive-internal.h"
@ -95,6 +96,7 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
}
return 0;
}
EXPORT_SYMBOL_GPL(xive_native_populate_irq_data);
int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
{
@ -108,6 +110,8 @@ int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
}
return rc == 0 ? 0 : -ENXIO;
}
EXPORT_SYMBOL_GPL(xive_native_configure_irq);
/* This can be called multiple time to change a queue configuration */
int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
@ -172,6 +176,7 @@ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
fail:
return rc;
}
EXPORT_SYMBOL_GPL(xive_native_configure_queue);
static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
{
@ -192,6 +197,7 @@ void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
{
__xive_native_disable_queue(vp_id, q, prio);
}
EXPORT_SYMBOL_GPL(xive_native_disable_queue);
static int xive_native_setup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
{
@ -262,6 +268,7 @@ static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
}
return 0;
}
#endif /* CONFIG_SMP */
u32 xive_native_alloc_irq(void)
{
@ -277,6 +284,7 @@ u32 xive_native_alloc_irq(void)
return 0;
return rc;
}
EXPORT_SYMBOL_GPL(xive_native_alloc_irq);
void xive_native_free_irq(u32 irq)
{
@ -287,7 +295,9 @@ void xive_native_free_irq(u32 irq)
msleep(1);
}
}
EXPORT_SYMBOL_GPL(xive_native_free_irq);
#ifdef CONFIG_SMP
static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc)
{
s64 rc;
@ -383,7 +393,7 @@ static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
return;
/* Enable the pool VP */
vp = xive_pool_vps + get_hard_smp_processor_id(cpu);
vp = xive_pool_vps + cpu;
pr_debug("CPU %d setting up pool VP 0x%x\n", cpu, vp);
for (;;) {
rc = opal_xive_set_vp_info(vp, OPAL_XIVE_VP_ENABLED, 0);
@ -428,7 +438,7 @@ static void xive_native_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
in_be64(xive_tima + TM_SPC_PULL_POOL_CTX);
/* Disable it */
vp = xive_pool_vps + get_hard_smp_processor_id(cpu);
vp = xive_pool_vps + cpu;
for (;;) {
rc = opal_xive_set_vp_info(vp, 0, 0);
if (rc != OPAL_BUSY)
@ -437,10 +447,11 @@ static void xive_native_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
}
}
static void xive_native_sync_source(u32 hw_irq)
void xive_native_sync_source(u32 hw_irq)
{
opal_xive_sync(XIVE_SYNC_EAS, hw_irq);
}
EXPORT_SYMBOL_GPL(xive_native_sync_source);
static const struct xive_ops xive_native_ops = {
.populate_irq_data = xive_native_populate_irq_data,
@ -501,10 +512,24 @@ static bool xive_parse_provisioning(struct device_node *np)
return true;
}
static void xive_native_setup_pools(void)
{
/* Allocate a pool big enough */
pr_debug("XIVE: Allocating VP block for pool size %d\n", nr_cpu_ids);
xive_pool_vps = xive_native_alloc_vp_block(nr_cpu_ids);
if (WARN_ON(xive_pool_vps == XIVE_INVALID_VP))
pr_err("XIVE: Failed to allocate pool VP, KVM might not function\n");
pr_debug("XIVE: Pool VPs allocated at 0x%x for %d max CPUs\n",
xive_pool_vps, nr_cpu_ids);
}
u32 xive_native_default_eq_shift(void)
{
return xive_queue_shift;
}
EXPORT_SYMBOL_GPL(xive_native_default_eq_shift);
bool xive_native_init(void)
{
@ -514,7 +539,7 @@ bool xive_native_init(void)
struct property *prop;
u8 max_prio = 7;
const __be32 *p;
u32 val;
u32 val, cpu;
s64 rc;
if (xive_cmdline_disabled)
@ -550,7 +575,11 @@ bool xive_native_init(void)
break;
}
/* Grab size of provisioning pages */
/* Configure Thread Management areas for KVM */
for_each_possible_cpu(cpu)
kvmppc_set_xive_tima(cpu, r.start, tima);
/* Grab size of provisionning pages */
xive_parse_provisioning(np);
/* Switch the XIVE to exploitation mode */
@ -560,6 +589,9 @@ bool xive_native_init(void)
return false;
}
/* Setup some dummy HV pool VPs */
xive_native_setup_pools();
/* Initialize XIVE core with our backend */
if (!xive_core_init(&xive_native_ops, tima, TM_QW3_HV_PHYS,
max_prio)) {
@ -638,3 +670,47 @@ void xive_native_free_vp_block(u32 vp_base)
pr_warn("OPAL error %lld freeing VP block\n", rc);
}
EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
int xive_native_enable_vp(u32 vp_id)
{
s64 rc;
for (;;) {
rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0);
if (rc != OPAL_BUSY)
break;
msleep(1);
}
return rc ? -EIO : 0;
}
EXPORT_SYMBOL_GPL(xive_native_enable_vp);
int xive_native_disable_vp(u32 vp_id)
{
s64 rc;
for (;;) {
rc = opal_xive_set_vp_info(vp_id, 0, 0);
if (rc != OPAL_BUSY)
break;
msleep(1);
}
return rc ? -EIO : 0;
}
EXPORT_SYMBOL_GPL(xive_native_disable_vp);
int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
{
__be64 vp_cam_be;
__be32 vp_chip_id_be;
s64 rc;
rc = opal_xive_get_vp_info(vp_id, NULL, &vp_cam_be, NULL, &vp_chip_id_be);
if (rc)
return -EIO;
*out_cam_id = be64_to_cpu(vp_cam_be) & 0xffffffffu;
*out_chip_id = be32_to_cpu(vp_chip_id_be);
return 0;
}
EXPORT_SYMBOL_GPL(xive_native_get_vp_info);

View File

@ -1167,7 +1167,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
void kvm_unregister_device_ops(u32 type);
extern struct kvm_device_ops kvm_mpic_ops;
extern struct kvm_device_ops kvm_xics_ops;
extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
extern struct kvm_device_ops kvm_arm_vgic_v3_ops;

View File

@ -2836,10 +2836,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
[KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops,
[KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops,
#endif
#ifdef CONFIG_KVM_XICS
[KVM_DEV_TYPE_XICS] = &kvm_xics_ops,
#endif
};
int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)