From a1f158262a3e00fe396f2d21ef1cffdfc29226dc Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 20 Apr 2018 15:33:21 +1000 Subject: [PATCH 01/63] KVM: PPC: Book3S HV: Add 'online' register to ONE_REG interface This adds a new KVM_REG_PPC_ONLINE register which userspace can set to 0 or 1 via the GET/SET_ONE_REG interface to indicate whether it considers the VCPU to be offline (0), that is, not currently running, or online (1). This will be used in a later patch to configure the register which controls PURR and SPURR accumulation. Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_host.h | 2 ++ arch/powerpc/include/uapi/asm/kvm.h | 1 + arch/powerpc/kvm/book3s_hv.c | 6 ++++++ 3 files changed, 9 insertions(+) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 17498e9a26e4..9703f8f229c9 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -772,6 +772,8 @@ struct kvm_vcpu_arch { u64 busy_preempt; u32 emul_inst; + + u32 online; #endif #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 833ed9a16adf..1b32b56a03d3 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -633,6 +633,7 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd) #define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) +#define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 9963f65c212b..04bd71796098 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1526,6 +1526,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.dec_expires + vcpu->arch.vcore->tb_offset); break; + case KVM_REG_PPC_ONLINE: + *val = get_reg_val(id, vcpu->arch.online); + break; default: r = -EINVAL; break; @@ -1757,6 +1760,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, vcpu->arch.dec_expires = set_reg_val(id, *val) - vcpu->arch.vcore->tb_offset; break; + case KVM_REG_PPC_ONLINE: + vcpu->arch.online = set_reg_val(id, *val); + break; default: r = -EINVAL; break; From 7aa15842c15f8a32000372ad2b3195029fde6fd4 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 20 Apr 2018 19:53:22 +1000 Subject: [PATCH 02/63] KVM: PPC: Book3S HV: Set RWMR on POWER8 so PURR/SPURR count correctly Although Linux doesn't use PURR and SPURR ((Scaled) Processor Utilization of Resources Register), other OSes depend on them. On POWER8 they count at a rate depending on whether the VCPU is idle or running, the activity of the VCPU, and the value in the RWMR (Region-Weighting Mode Register). Hardware expects the hypervisor to update the RWMR when a core is dispatched to reflect the number of online VCPUs in the vcore. This adds code to maintain a count in the vcore struct indicating how many VCPUs are online. In kvmppc_run_core we use that count to set the RWMR register on POWER8. If the core is split because of a static or dynamic micro-threading mode, we use the value for 8 threads. The RWMR value is not relevant when the host is executing because Linux does not use the PURR or SPURR register, so we don't bother saving and restoring the host value. For the sake of old userspace which does not set the KVM_REG_PPC_ONLINE register, we set online to 1 if it was 0 at the time of a KVM_RUN ioctl. Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s.h | 1 + arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kvm/book3s_hv.c | 61 ++++++++++++++++++++++++++- 3 files changed, 62 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index e7377b73cfec..c1f3a870c48a 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -104,6 +104,7 @@ struct kvmppc_vcore { ulong vtb; /* virtual timebase */ ulong conferring_threads; unsigned int halt_poll_ns; + atomic_t online_count; }; struct kvmppc_vcpu_book3s { diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index cb0f272ce123..44b2be4a65d1 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -365,6 +365,7 @@ #define SPRN_PSSCR 0x357 /* Processor Stop Status and Control Register (ISA 3.0) */ #define SPRN_PSSCR_PR 0x337 /* PSSCR ISA 3.0, privileged mode access */ #define SPRN_PMCR 0x374 /* Power Management Control Register */ +#define SPRN_RWMR 0x375 /* Region-Weighting Mode Register */ /* HFSCR and FSCR bit numbers are the same */ #define FSCR_SCV_LG 12 /* Enable System Call Vectored */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 04bd71796098..f61dd9efa6fb 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -123,6 +123,32 @@ static bool no_mixing_hpt_and_radix; static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); +/* + * RWMR values for POWER8. These control the rate at which PURR + * and SPURR count and should be set according to the number of + * online threads in the vcore being run. + */ +#define RWMR_RPA_P8_1THREAD 0x164520C62609AECA +#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9 +#define RWMR_RPA_P8_3THREAD 0x164520C62609AECA +#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9 +#define RWMR_RPA_P8_5THREAD 0x164520C62609AECA +#define RWMR_RPA_P8_6THREAD 0x164520C62609AECA +#define RWMR_RPA_P8_7THREAD 0x164520C62609AECA +#define RWMR_RPA_P8_8THREAD 0x164520C62609AECA + +static unsigned long p8_rwmr_values[MAX_SMT_THREADS + 1] = { + RWMR_RPA_P8_1THREAD, + RWMR_RPA_P8_1THREAD, + RWMR_RPA_P8_2THREAD, + RWMR_RPA_P8_3THREAD, + RWMR_RPA_P8_4THREAD, + RWMR_RPA_P8_5THREAD, + RWMR_RPA_P8_6THREAD, + RWMR_RPA_P8_7THREAD, + RWMR_RPA_P8_8THREAD, +}; + static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc, int *ip) { @@ -1761,7 +1787,12 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, vcpu->arch.vcore->tb_offset; break; case KVM_REG_PPC_ONLINE: - vcpu->arch.online = set_reg_val(id, *val); + i = set_reg_val(id, *val); + if (i && !vcpu->arch.online) + atomic_inc(&vcpu->arch.vcore->online_count); + else if (!i && vcpu->arch.online) + atomic_dec(&vcpu->arch.vcore->online_count); + vcpu->arch.online = i; break; default: r = -EINVAL; @@ -2856,6 +2887,25 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) } } + /* + * On POWER8, set RWMR register. + * Since it only affects PURR and SPURR, it doesn't affect + * the host, so we don't save/restore the host value. + */ + if (is_power8) { + unsigned long rwmr_val = RWMR_RPA_P8_8THREAD; + int n_online = atomic_read(&vc->online_count); + + /* + * Use the 8-thread value if we're doing split-core + * or if the vcore's online count looks bogus. + */ + if (split == 1 && threads_per_subcore == MAX_SMT_THREADS && + n_online >= 1 && n_online <= MAX_SMT_THREADS) + rwmr_val = p8_rwmr_values[n_online]; + mtspr(SPRN_RWMR, rwmr_val); + } + /* Start all the threads */ active = 0; for (sub = 0; sub < core_info.n_subcores; ++sub) { @@ -3358,6 +3408,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) } #endif + /* + * Force online to 1 for the sake of old userspace which doesn't + * set it. + */ + if (!vcpu->arch.online) { + atomic_inc(&vcpu->arch.vcore->online_count); + vcpu->arch.online = 1; + } + kvmppc_core_prepare_to_enter(vcpu); /* No need to go into the guest when all we'll do is come back out */ From 48e70b1ce667dc032f9166cc00ddb594ecc0065e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 19 Apr 2018 11:49:51 +1000 Subject: [PATCH 03/63] KVM: PPC: Book3S HV: Fix inaccurate comment We now have interrupts hard-disabled when coming back from kvmppc_hv_entry_trampoline, so this changes the comment to reflect that. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_interrupts.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 0e8493033288..82f2ff9410b6 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -137,7 +137,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) /* * We return here in virtual mode after the guest exits * with something that we can't handle in real mode. - * Interrupts are enabled again at this point. + * Interrupts are still hard-disabled. */ /* From c6b61661d229e42b58d5e511191e925d105a5cce Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 14 May 2018 20:00:27 +1000 Subject: [PATCH 04/63] KVM: PPC: Book3S: Use correct page shift in H_STUFF_TCE The other TCE handlers use page shift from the guest visible TCE table (described by kvmppc_spapr_tce_iommu_table) so let's make H_STUFF_TCE handlers do the same thing. This should cause no behavioral change now but soon we will allow the iommu_table::it_page_shift being different from from the emulated table page size so this will play a role. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Acked-by: Balbir Singh Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_vio.c | 2 +- arch/powerpc/kvm/book3s_64_vio_hv.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 4dffa611376d..041e54d26750 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -615,7 +615,7 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, return H_PARAMETER; list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { - unsigned long entry = ioba >> stit->tbl->it_page_shift; + unsigned long entry = ioba >> stt->page_shift; for (i = 0; i < npages; ++i) { ret = kvmppc_tce_iommu_unmap(vcpu->kvm, diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 6651f736a0b1..e220fabb2f5d 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -526,7 +526,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, return H_PARAMETER; list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { - unsigned long entry = ioba >> stit->tbl->it_page_shift; + unsigned long entry = ioba >> stt->page_shift; for (i = 0; i < npages; ++i) { ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, From ca1fc489cfa06a554fd71eb46d8927614ec7e6f3 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 14 May 2018 20:00:28 +1000 Subject: [PATCH 05/63] KVM: PPC: Book3S: Allow backing bigger guest IOMMU pages with smaller physical pages At the moment we only support in the host the IOMMU page sizes which the guest is aware of, which is 4KB/64KB/16MB. However P9 does not support 16MB IOMMU pages, 2MB and 1GB pages are supported instead. We can still emulate bigger guest pages (for example 16MB) with smaller host pages (4KB/64KB/2MB). This allows the physical IOMMU pages to use a page size smaller or equal than the guest visible IOMMU page size. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_vio.c | 64 ++++++++++++++++++++++------- arch/powerpc/kvm/book3s_64_vio_hv.c | 50 +++++++++++++++++++--- 2 files changed, 94 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 041e54d26750..984f1978a19c 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -176,14 +176,12 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, if (!tbltmp) continue; - /* - * Make sure hardware table parameters are exactly the same; - * this is used in the TCE handlers where boundary checks - * use only the first attached table. - */ - if ((tbltmp->it_page_shift == stt->page_shift) && - (tbltmp->it_offset == stt->offset) && - (tbltmp->it_size == stt->size)) { + /* Make sure hardware table parameters are compatible */ + if ((tbltmp->it_page_shift <= stt->page_shift) && + (tbltmp->it_offset << tbltmp->it_page_shift == + stt->offset << stt->page_shift) && + (tbltmp->it_size << tbltmp->it_page_shift == + stt->size << stt->page_shift)) { /* * Reference the table to avoid races with * add/remove DMA windows. @@ -396,7 +394,7 @@ static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm, return H_SUCCESS; } -static long kvmppc_tce_iommu_unmap(struct kvm *kvm, +static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm, struct iommu_table *tbl, unsigned long entry) { enum dma_data_direction dir = DMA_NONE; @@ -416,7 +414,24 @@ static long kvmppc_tce_iommu_unmap(struct kvm *kvm, return ret; } -long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, +static long kvmppc_tce_iommu_unmap(struct kvm *kvm, + struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl, + unsigned long entry) +{ + unsigned long i, ret = H_SUCCESS; + unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); + unsigned long io_entry = entry * subpages; + + for (i = 0; i < subpages; ++i) { + ret = kvmppc_tce_iommu_do_unmap(kvm, tbl, io_entry + i); + if (ret != H_SUCCESS) + break; + } + + return ret; +} + +long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, unsigned long entry, unsigned long ua, enum dma_data_direction dir) { @@ -453,6 +468,27 @@ long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, return 0; } +static long kvmppc_tce_iommu_map(struct kvm *kvm, + struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl, + unsigned long entry, unsigned long ua, + enum dma_data_direction dir) +{ + unsigned long i, pgoff, ret = H_SUCCESS; + unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); + unsigned long io_entry = entry * subpages; + + for (i = 0, pgoff = 0; i < subpages; + ++i, pgoff += IOMMU_PAGE_SIZE(tbl)) { + + ret = kvmppc_tce_iommu_do_map(kvm, tbl, + io_entry + i, ua + pgoff, dir); + if (ret != H_SUCCESS) + break; + } + + return ret; +} + long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce) { @@ -491,10 +527,10 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { if (dir == DMA_NONE) - ret = kvmppc_tce_iommu_unmap(vcpu->kvm, + ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt, stit->tbl, entry); else - ret = kvmppc_tce_iommu_map(vcpu->kvm, stit->tbl, + ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry, ua, dir); if (ret == H_SUCCESS) @@ -570,7 +606,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, return H_PARAMETER; list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { - ret = kvmppc_tce_iommu_map(vcpu->kvm, + ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry + i, ua, iommu_tce_direction(tce)); @@ -618,7 +654,7 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, unsigned long entry = ioba >> stt->page_shift; for (i = 0; i < npages; ++i) { - ret = kvmppc_tce_iommu_unmap(vcpu->kvm, + ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt, stit->tbl, entry + i); if (ret == H_SUCCESS) diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index e220fabb2f5d..635f3ca8129a 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -221,7 +221,7 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, return H_SUCCESS; } -static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm, +static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm, struct iommu_table *tbl, unsigned long entry) { enum dma_data_direction dir = DMA_NONE; @@ -245,7 +245,24 @@ static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm, return ret; } -static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, +static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm, + struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl, + unsigned long entry) +{ + unsigned long i, ret = H_SUCCESS; + unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); + unsigned long io_entry = entry * subpages; + + for (i = 0; i < subpages; ++i) { + ret = kvmppc_rm_tce_iommu_do_unmap(kvm, tbl, io_entry + i); + if (ret != H_SUCCESS) + break; + } + + return ret; +} + +static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, unsigned long entry, unsigned long ua, enum dma_data_direction dir) { @@ -290,6 +307,27 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, return 0; } +static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, + struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl, + unsigned long entry, unsigned long ua, + enum dma_data_direction dir) +{ + unsigned long i, pgoff, ret = H_SUCCESS; + unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); + unsigned long io_entry = entry * subpages; + + for (i = 0, pgoff = 0; i < subpages; + ++i, pgoff += IOMMU_PAGE_SIZE(tbl)) { + + ret = kvmppc_rm_tce_iommu_do_map(kvm, tbl, + io_entry + i, ua + pgoff, dir); + if (ret != H_SUCCESS) + break; + } + + return ret; +} + long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce) { @@ -327,10 +365,10 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { if (dir == DMA_NONE) - ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, + ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, stt, stit->tbl, entry); else - ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, + ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry, ua, dir); if (ret == H_SUCCESS) @@ -477,7 +515,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, return H_PARAMETER; list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { - ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, + ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry + i, ua, iommu_tce_direction(tce)); @@ -529,7 +567,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, unsigned long entry = ioba >> stt->page_shift; for (i = 0; i < npages; ++i) { - ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, + ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, stt, stit->tbl, entry + i); if (ret == H_SUCCESS) From e45719af1caff16dbc0f6bf7bbfbc5e7a54738a5 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 14 May 2018 20:00:29 +1000 Subject: [PATCH 06/63] KVM: PPC: Book3S: Check KVM_CREATE_SPAPR_TCE_64 parameters Although it does not seem possible to break the host by passing bad parameters when creating a TCE table in KVM, it is still better to get an early clear indication of that than debugging weird effect this might bring. This adds some sanity checks that the page size is 4KB..16GB as this is what the actual LoPAPR supports and that the window actually fits 64bit space. Signed-off-by: Alexey Kardashevskiy Acked-by: Balbir Singh Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_vio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 984f1978a19c..80ead383d8ee 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -300,7 +300,8 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, int ret = -ENOMEM; int i; - if (!args->size) + if (!args->size || args->page_shift < 12 || args->page_shift > 34 || + (args->offset + args->size > (ULLONG_MAX >> args->page_shift))) return -EINVAL; size = _ALIGN_UP(args->size, PAGE_SIZE >> 3); From 16d5c39d54031afe8a5663b4638030d9fc38ba19 Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Thu, 10 May 2018 23:57:19 +0530 Subject: [PATCH 07/63] KVM: PPC: Book3S: Change return type to vm_fault_t Use new return type vm_fault_t for fault handler in struct vm_operations_struct. For now, this is just documenting that the function returns a VM_FAULT value rather than an errno. Once all instances are converted, vm_fault_t will become a distinct type. commit 1c8f422059ae ("mm: change return type to vm_fault_t") Signed-off-by: Souptick Joarder Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_vio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 80ead383d8ee..d066e37551ec 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -235,7 +235,7 @@ static void release_spapr_tce_table(struct rcu_head *head) kfree(stt); } -static int kvm_spapr_tce_fault(struct vm_fault *vmf) +static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf) { struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data; struct page *page; From 1143a70665c2175a33a40d8f2dc277978fbf7640 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Mon, 7 May 2018 14:20:07 +0800 Subject: [PATCH 08/63] KVM: PPC: Add pt_regs into kvm_vcpu_arch and move vcpu->arch.gpr[] into it Current regs are scattered at kvm_vcpu_arch structure and it will be more neat to organize them into pt_regs structure. Also it will enable reimplementation of MMIO emulation code with analyse_instr() later. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s.h | 4 +- arch/powerpc/include/asm/kvm_book3s_64.h | 8 ++-- arch/powerpc/include/asm/kvm_booke.h | 4 +- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kvm/book3s_64_vio_hv.c | 2 +- arch/powerpc/kvm/book3s_hv_builtin.c | 6 +-- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 15 ++++--- arch/powerpc/kvm/book3s_hv_rm_xics.c | 2 +- arch/powerpc/kvm/book3s_pr.c | 56 ++++++++++++------------ arch/powerpc/kvm/book3s_xive_template.c | 4 +- arch/powerpc/kvm/e500_emulate.c | 4 +- 12 files changed, 55 insertions(+), 54 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index c1f3a870c48a..e3182f7ae499 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -275,12 +275,12 @@ static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) { - vcpu->arch.gpr[num] = val; + vcpu->arch.regs.gpr[num] = val; } static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) { - return vcpu->arch.gpr[num]; + return vcpu->arch.regs.gpr[num]; } static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index c424e44f4c00..38dbcad086d6 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -490,8 +490,8 @@ static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu) vcpu->arch.ppr = vcpu->arch.ppr_tm; vcpu->arch.dscr = vcpu->arch.dscr_tm; vcpu->arch.tar = vcpu->arch.tar_tm; - memcpy(vcpu->arch.gpr, vcpu->arch.gpr_tm, - sizeof(vcpu->arch.gpr)); + memcpy(vcpu->arch.regs.gpr, vcpu->arch.gpr_tm, + sizeof(vcpu->arch.regs.gpr)); vcpu->arch.fp = vcpu->arch.fp_tm; vcpu->arch.vr = vcpu->arch.vr_tm; vcpu->arch.vrsave = vcpu->arch.vrsave_tm; @@ -507,8 +507,8 @@ static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu) vcpu->arch.ppr_tm = vcpu->arch.ppr; vcpu->arch.dscr_tm = vcpu->arch.dscr; vcpu->arch.tar_tm = vcpu->arch.tar; - memcpy(vcpu->arch.gpr_tm, vcpu->arch.gpr, - sizeof(vcpu->arch.gpr)); + memcpy(vcpu->arch.gpr_tm, vcpu->arch.regs.gpr, + sizeof(vcpu->arch.regs.gpr)); vcpu->arch.fp_tm = vcpu->arch.fp; vcpu->arch.vr_tm = vcpu->arch.vr; vcpu->arch.vrsave_tm = vcpu->arch.vrsave; diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index bc6e29e4dfd4..f5fc9569ef56 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -36,12 +36,12 @@ static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) { - vcpu->arch.gpr[num] = val; + vcpu->arch.regs.gpr[num] = val; } static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) { - return vcpu->arch.gpr[num]; + return vcpu->arch.regs.gpr[num]; } static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 9703f8f229c9..a75443a372bb 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -486,7 +486,7 @@ struct kvm_vcpu_arch { struct kvmppc_book3s_shadow_vcpu *shadow_vcpu; #endif - ulong gpr[32]; + struct pt_regs regs; struct thread_fp_state fp; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 373dc1d6ef44..774c6a8ebfb4 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -425,7 +425,7 @@ int main(void) OFFSET(VCPU_HOST_STACK, kvm_vcpu, arch.host_stack); OFFSET(VCPU_HOST_PID, kvm_vcpu, arch.host_pid); OFFSET(VCPU_GUEST_PID, kvm_vcpu, arch.pid); - OFFSET(VCPU_GPRS, kvm_vcpu, arch.gpr); + OFFSET(VCPU_GPRS, kvm_vcpu, arch.regs.gpr); OFFSET(VCPU_VRSAVE, kvm_vcpu, arch.vrsave); OFFSET(VCPU_FPRS, kvm_vcpu, arch.fp.fpr); #ifdef CONFIG_ALTIVEC diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 635f3ca8129a..925fc316a104 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -609,7 +609,7 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, page = stt->pages[idx / TCES_PER_PAGE]; tbl = (u64 *)page_address(page); - vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE]; + vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE]; return H_SUCCESS; } diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index de18299f92b7..2b127586be30 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -211,9 +211,9 @@ long kvmppc_h_random(struct kvm_vcpu *vcpu) /* Only need to do the expensive mfmsr() on radix */ if (kvm_is_radix(vcpu->kvm) && (mfmsr() & MSR_IR)) - r = powernv_get_random_long(&vcpu->arch.gpr[4]); + r = powernv_get_random_long(&vcpu->arch.regs.gpr[4]); else - r = powernv_get_random_real_mode(&vcpu->arch.gpr[4]); + r = powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]); if (r) return H_SUCCESS; @@ -562,7 +562,7 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu) { if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; - vcpu->arch.gpr[5] = get_tb(); + vcpu->arch.regs.gpr[5] = get_tb(); if (xive_enabled()) { if (is_rm()) return xive_rm_h_xirr(vcpu); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 78e6a392330f..8e12c5c3c4ee 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -418,7 +418,8 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel) { return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel, - vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]); + vcpu->arch.pgdir, true, + &vcpu->arch.regs.gpr[4]); } #ifdef __BIG_ENDIAN__ @@ -561,13 +562,13 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long pte_index, unsigned long avpn) { return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn, - &vcpu->arch.gpr[4]); + &vcpu->arch.regs.gpr[4]); } long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; - unsigned long *args = &vcpu->arch.gpr[4]; + unsigned long *args = &vcpu->arch.regs.gpr[4]; __be64 *hp, *hptes[4]; unsigned long tlbrb[4]; long int i, j, k, n, found, indexes[4]; @@ -787,8 +788,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C)); r &= ~HPTE_GR_RESERVED; } - vcpu->arch.gpr[4 + i * 2] = v; - vcpu->arch.gpr[5 + i * 2] = r; + vcpu->arch.regs.gpr[4 + i * 2] = v; + vcpu->arch.regs.gpr[5 + i * 2] = r; } return H_SUCCESS; } @@ -834,7 +835,7 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, } } } - vcpu->arch.gpr[4] = gr; + vcpu->arch.regs.gpr[4] = gr; ret = H_SUCCESS; out: unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); @@ -881,7 +882,7 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, kvmppc_set_dirty_from_hpte(kvm, v, gr); } } - vcpu->arch.gpr[4] = gr; + vcpu->arch.regs.gpr[4] = gr; ret = H_SUCCESS; out: unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 2a862618f072..758d1d23215e 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -517,7 +517,7 @@ unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu) } while (!icp_rm_try_update(icp, old_state, new_state)); /* Return the result in GPR4 */ - vcpu->arch.gpr[4] = xirr; + vcpu->arch.regs.gpr[4] = xirr; return check_too_hard(xics, icp); } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index d3f304d06adf..899bc9a02ab5 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -147,20 +147,20 @@ void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu) { struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - svcpu->gpr[0] = vcpu->arch.gpr[0]; - svcpu->gpr[1] = vcpu->arch.gpr[1]; - svcpu->gpr[2] = vcpu->arch.gpr[2]; - svcpu->gpr[3] = vcpu->arch.gpr[3]; - svcpu->gpr[4] = vcpu->arch.gpr[4]; - svcpu->gpr[5] = vcpu->arch.gpr[5]; - svcpu->gpr[6] = vcpu->arch.gpr[6]; - svcpu->gpr[7] = vcpu->arch.gpr[7]; - svcpu->gpr[8] = vcpu->arch.gpr[8]; - svcpu->gpr[9] = vcpu->arch.gpr[9]; - svcpu->gpr[10] = vcpu->arch.gpr[10]; - svcpu->gpr[11] = vcpu->arch.gpr[11]; - svcpu->gpr[12] = vcpu->arch.gpr[12]; - svcpu->gpr[13] = vcpu->arch.gpr[13]; + svcpu->gpr[0] = vcpu->arch.regs.gpr[0]; + svcpu->gpr[1] = vcpu->arch.regs.gpr[1]; + svcpu->gpr[2] = vcpu->arch.regs.gpr[2]; + svcpu->gpr[3] = vcpu->arch.regs.gpr[3]; + svcpu->gpr[4] = vcpu->arch.regs.gpr[4]; + svcpu->gpr[5] = vcpu->arch.regs.gpr[5]; + svcpu->gpr[6] = vcpu->arch.regs.gpr[6]; + svcpu->gpr[7] = vcpu->arch.regs.gpr[7]; + svcpu->gpr[8] = vcpu->arch.regs.gpr[8]; + svcpu->gpr[9] = vcpu->arch.regs.gpr[9]; + svcpu->gpr[10] = vcpu->arch.regs.gpr[10]; + svcpu->gpr[11] = vcpu->arch.regs.gpr[11]; + svcpu->gpr[12] = vcpu->arch.regs.gpr[12]; + svcpu->gpr[13] = vcpu->arch.regs.gpr[13]; svcpu->cr = vcpu->arch.cr; svcpu->xer = vcpu->arch.xer; svcpu->ctr = vcpu->arch.ctr; @@ -194,20 +194,20 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu) if (!svcpu->in_use) goto out; - vcpu->arch.gpr[0] = svcpu->gpr[0]; - vcpu->arch.gpr[1] = svcpu->gpr[1]; - vcpu->arch.gpr[2] = svcpu->gpr[2]; - vcpu->arch.gpr[3] = svcpu->gpr[3]; - vcpu->arch.gpr[4] = svcpu->gpr[4]; - vcpu->arch.gpr[5] = svcpu->gpr[5]; - vcpu->arch.gpr[6] = svcpu->gpr[6]; - vcpu->arch.gpr[7] = svcpu->gpr[7]; - vcpu->arch.gpr[8] = svcpu->gpr[8]; - vcpu->arch.gpr[9] = svcpu->gpr[9]; - vcpu->arch.gpr[10] = svcpu->gpr[10]; - vcpu->arch.gpr[11] = svcpu->gpr[11]; - vcpu->arch.gpr[12] = svcpu->gpr[12]; - vcpu->arch.gpr[13] = svcpu->gpr[13]; + vcpu->arch.regs.gpr[0] = svcpu->gpr[0]; + vcpu->arch.regs.gpr[1] = svcpu->gpr[1]; + vcpu->arch.regs.gpr[2] = svcpu->gpr[2]; + vcpu->arch.regs.gpr[3] = svcpu->gpr[3]; + vcpu->arch.regs.gpr[4] = svcpu->gpr[4]; + vcpu->arch.regs.gpr[5] = svcpu->gpr[5]; + vcpu->arch.regs.gpr[6] = svcpu->gpr[6]; + vcpu->arch.regs.gpr[7] = svcpu->gpr[7]; + vcpu->arch.regs.gpr[8] = svcpu->gpr[8]; + vcpu->arch.regs.gpr[9] = svcpu->gpr[9]; + vcpu->arch.regs.gpr[10] = svcpu->gpr[10]; + vcpu->arch.regs.gpr[11] = svcpu->gpr[11]; + vcpu->arch.regs.gpr[12] = svcpu->gpr[12]; + vcpu->arch.regs.gpr[13] = svcpu->gpr[13]; vcpu->arch.cr = svcpu->cr; vcpu->arch.xer = svcpu->xer; vcpu->arch.ctr = svcpu->ctr; diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 99c3620b40d9..6e41ba7ec8f4 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -334,7 +334,7 @@ X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu) */ /* Return interrupt and old CPPR in GPR4 */ - vcpu->arch.gpr[4] = hirq | (old_cppr << 24); + vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24); return H_SUCCESS; } @@ -369,7 +369,7 @@ X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long hirq = GLUE(X_PFX,scan_interrupts)(xc, pending, scan_poll); /* Return interrupt and old CPPR in GPR4 */ - vcpu->arch.gpr[4] = hirq | (xc->cppr << 24); + vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24); return H_SUCCESS; } diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 990db69a1d0b..8f871fb75228 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -53,7 +53,7 @@ static int dbell2prio(ulong param) static int kvmppc_e500_emul_msgclr(struct kvm_vcpu *vcpu, int rb) { - ulong param = vcpu->arch.gpr[rb]; + ulong param = vcpu->arch.regs.gpr[rb]; int prio = dbell2prio(param); if (prio < 0) @@ -65,7 +65,7 @@ static int kvmppc_e500_emul_msgclr(struct kvm_vcpu *vcpu, int rb) static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb) { - ulong param = vcpu->arch.gpr[rb]; + ulong param = vcpu->arch.regs.gpr[rb]; int prio = dbell2prio(rb); int pir = param & PPC_DBELL_PIR_MASK; int i; From 173c520a049f57e2af498a3f0557d07797ce1c1b Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Mon, 7 May 2018 14:20:08 +0800 Subject: [PATCH 09/63] KVM: PPC: Move nip/ctr/lr/xer registers to pt_regs in kvm_vcpu_arch This patch moves nip/ctr/lr/xer registers from scattered places in kvm_vcpu_arch to pt_regs structure. cr register is "unsigned long" in pt_regs and u32 in vcpu->arch. It will need more consideration and may move in later patches. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s.h | 16 ++++----- arch/powerpc/include/asm/kvm_book3s_64.h | 12 +++---- arch/powerpc/include/asm/kvm_booke.h | 16 ++++----- arch/powerpc/include/asm/kvm_host.h | 4 --- arch/powerpc/kernel/asm-offsets.c | 16 ++++----- arch/powerpc/kvm/book3s_32_mmu.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 6 ++-- arch/powerpc/kvm/book3s_hv_tm.c | 10 +++--- arch/powerpc/kvm/book3s_hv_tm_builtin.c | 10 +++--- arch/powerpc/kvm/book3s_pr.c | 16 ++++----- arch/powerpc/kvm/booke.c | 41 +++++++++++++----------- arch/powerpc/kvm/booke_emulate.c | 6 ++-- arch/powerpc/kvm/e500_emulate.c | 2 +- arch/powerpc/kvm/e500_mmu.c | 2 +- 14 files changed, 79 insertions(+), 80 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index e3182f7ae499..20d3d5a87296 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -295,42 +295,42 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val) { - vcpu->arch.xer = val; + vcpu->arch.regs.xer = val; } static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu) { - return vcpu->arch.xer; + return vcpu->arch.regs.xer; } static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) { - vcpu->arch.ctr = val; + vcpu->arch.regs.ctr = val; } static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu) { - return vcpu->arch.ctr; + return vcpu->arch.regs.ctr; } static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val) { - vcpu->arch.lr = val; + vcpu->arch.regs.link = val; } static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu) { - return vcpu->arch.lr; + return vcpu->arch.regs.link; } static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val) { - vcpu->arch.pc = val; + vcpu->arch.regs.nip = val; } static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) { - return vcpu->arch.pc; + return vcpu->arch.regs.nip; } static inline u64 kvmppc_get_msr(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 38dbcad086d6..dc435a5af7d6 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -483,9 +483,9 @@ static inline u64 sanitize_msr(u64 msr) static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu) { vcpu->arch.cr = vcpu->arch.cr_tm; - vcpu->arch.xer = vcpu->arch.xer_tm; - vcpu->arch.lr = vcpu->arch.lr_tm; - vcpu->arch.ctr = vcpu->arch.ctr_tm; + vcpu->arch.regs.xer = vcpu->arch.xer_tm; + vcpu->arch.regs.link = vcpu->arch.lr_tm; + vcpu->arch.regs.ctr = vcpu->arch.ctr_tm; vcpu->arch.amr = vcpu->arch.amr_tm; vcpu->arch.ppr = vcpu->arch.ppr_tm; vcpu->arch.dscr = vcpu->arch.dscr_tm; @@ -500,9 +500,9 @@ static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu) static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu) { vcpu->arch.cr_tm = vcpu->arch.cr; - vcpu->arch.xer_tm = vcpu->arch.xer; - vcpu->arch.lr_tm = vcpu->arch.lr; - vcpu->arch.ctr_tm = vcpu->arch.ctr; + vcpu->arch.xer_tm = vcpu->arch.regs.xer; + vcpu->arch.lr_tm = vcpu->arch.regs.link; + vcpu->arch.ctr_tm = vcpu->arch.regs.ctr; vcpu->arch.amr_tm = vcpu->arch.amr; vcpu->arch.ppr_tm = vcpu->arch.ppr; vcpu->arch.dscr_tm = vcpu->arch.dscr; diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index f5fc9569ef56..d513e3ed1c65 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -56,12 +56,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val) { - vcpu->arch.xer = val; + vcpu->arch.regs.xer = val; } static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu) { - return vcpu->arch.xer; + return vcpu->arch.regs.xer; } static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu) @@ -72,32 +72,32 @@ static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu) static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) { - vcpu->arch.ctr = val; + vcpu->arch.regs.ctr = val; } static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu) { - return vcpu->arch.ctr; + return vcpu->arch.regs.ctr; } static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val) { - vcpu->arch.lr = val; + vcpu->arch.regs.link = val; } static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu) { - return vcpu->arch.lr; + return vcpu->arch.regs.link; } static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val) { - vcpu->arch.pc = val; + vcpu->arch.regs.nip = val; } static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) { - return vcpu->arch.pc; + return vcpu->arch.regs.nip; } static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index a75443a372bb..8b0ee5e09ea3 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -521,14 +521,10 @@ struct kvm_vcpu_arch { u32 qpr[32]; #endif - ulong pc; - ulong ctr; - ulong lr; #ifdef CONFIG_PPC_BOOK3S ulong tar; #endif - ulong xer; u32 cr; #ifdef CONFIG_PPC_BOOK3S diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 774c6a8ebfb4..70a345c73281 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -431,14 +431,14 @@ int main(void) #ifdef CONFIG_ALTIVEC OFFSET(VCPU_VRS, kvm_vcpu, arch.vr.vr); #endif - OFFSET(VCPU_XER, kvm_vcpu, arch.xer); - OFFSET(VCPU_CTR, kvm_vcpu, arch.ctr); - OFFSET(VCPU_LR, kvm_vcpu, arch.lr); + OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer); + OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr); + OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link); #ifdef CONFIG_PPC_BOOK3S OFFSET(VCPU_TAR, kvm_vcpu, arch.tar); #endif OFFSET(VCPU_CR, kvm_vcpu, arch.cr); - OFFSET(VCPU_PC, kvm_vcpu, arch.pc); + OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr); OFFSET(VCPU_SRR0, kvm_vcpu, arch.shregs.srr0); @@ -695,10 +695,10 @@ int main(void) #else /* CONFIG_PPC_BOOK3S */ OFFSET(VCPU_CR, kvm_vcpu, arch.cr); - OFFSET(VCPU_XER, kvm_vcpu, arch.xer); - OFFSET(VCPU_LR, kvm_vcpu, arch.lr); - OFFSET(VCPU_CTR, kvm_vcpu, arch.ctr); - OFFSET(VCPU_PC, kvm_vcpu, arch.pc); + OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer); + OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link); + OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr); + OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip); OFFSET(VCPU_SPRG9, kvm_vcpu, arch.sprg9); OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst); OFFSET(VCPU_FAULT_DEAR, kvm_vcpu, arch.fault_dear); diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 1992676c7a94..45c8ea4a0487 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -52,7 +52,7 @@ static inline bool check_debug_ip(struct kvm_vcpu *vcpu) { #ifdef DEBUG_MMU_PTE_IP - return vcpu->arch.pc == DEBUG_MMU_PTE_IP; + return vcpu->arch.regs.nip == DEBUG_MMU_PTE_IP; #else return true; #endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index f61dd9efa6fb..336e3468e700 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -397,13 +397,13 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu) pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id); pr_err("pc = %.16lx msr = %.16llx trap = %x\n", - vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap); + vcpu->arch.regs.nip, vcpu->arch.shregs.msr, vcpu->arch.trap); for (r = 0; r < 16; ++r) pr_err("r%2d = %.16lx r%d = %.16lx\n", r, kvmppc_get_gpr(vcpu, r), r+16, kvmppc_get_gpr(vcpu, r+16)); pr_err("ctr = %.16lx lr = %.16lx\n", - vcpu->arch.ctr, vcpu->arch.lr); + vcpu->arch.regs.ctr, vcpu->arch.regs.link); pr_err("srr0 = %.16llx srr1 = %.16llx\n", vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1); pr_err("sprg0 = %.16llx sprg1 = %.16llx\n", @@ -411,7 +411,7 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu) pr_err("sprg2 = %.16llx sprg3 = %.16llx\n", vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3); pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n", - vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr); + vcpu->arch.cr, vcpu->arch.regs.xer, vcpu->arch.shregs.dsisr); pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar); pr_err("fault dar = %.16lx dsisr = %.8x\n", vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c index bf710ad3a6d7..008285058f9b 100644 --- a/arch/powerpc/kvm/book3s_hv_tm.c +++ b/arch/powerpc/kvm/book3s_hv_tm.c @@ -19,7 +19,7 @@ static void emulate_tx_failure(struct kvm_vcpu *vcpu, u64 failure_cause) u64 texasr, tfiar; u64 msr = vcpu->arch.shregs.msr; - tfiar = vcpu->arch.pc & ~0x3ull; + tfiar = vcpu->arch.regs.nip & ~0x3ull; texasr = (failure_cause << 56) | TEXASR_ABORT | TEXASR_FS | TEXASR_EXACT; if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) texasr |= TEXASR_SUSP; @@ -57,8 +57,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) (newmsr & MSR_TM))); newmsr = sanitize_msr(newmsr); vcpu->arch.shregs.msr = newmsr; - vcpu->arch.cfar = vcpu->arch.pc - 4; - vcpu->arch.pc = vcpu->arch.shregs.srr0; + vcpu->arch.cfar = vcpu->arch.regs.nip - 4; + vcpu->arch.regs.nip = vcpu->arch.shregs.srr0; return RESUME_GUEST; case PPC_INST_RFEBB: @@ -90,8 +90,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) vcpu->arch.bescr = bescr; msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; vcpu->arch.shregs.msr = msr; - vcpu->arch.cfar = vcpu->arch.pc - 4; - vcpu->arch.pc = vcpu->arch.ebbrr; + vcpu->arch.cfar = vcpu->arch.regs.nip - 4; + vcpu->arch.regs.nip = vcpu->arch.ebbrr; return RESUME_GUEST; case PPC_INST_MTMSRD: diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c index d98ccfd2b88c..b2c7c6fca4f9 100644 --- a/arch/powerpc/kvm/book3s_hv_tm_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c @@ -35,8 +35,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu) return 0; newmsr = sanitize_msr(newmsr); vcpu->arch.shregs.msr = newmsr; - vcpu->arch.cfar = vcpu->arch.pc - 4; - vcpu->arch.pc = vcpu->arch.shregs.srr0; + vcpu->arch.cfar = vcpu->arch.regs.nip - 4; + vcpu->arch.regs.nip = vcpu->arch.shregs.srr0; return 1; case PPC_INST_RFEBB: @@ -58,8 +58,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu) mtspr(SPRN_BESCR, bescr); msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; vcpu->arch.shregs.msr = msr; - vcpu->arch.cfar = vcpu->arch.pc - 4; - vcpu->arch.pc = mfspr(SPRN_EBBRR); + vcpu->arch.cfar = vcpu->arch.regs.nip - 4; + vcpu->arch.regs.nip = mfspr(SPRN_EBBRR); return 1; case PPC_INST_MTMSRD: @@ -103,7 +103,7 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu) void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu) { vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */ - vcpu->arch.pc = vcpu->arch.tfhar; + vcpu->arch.regs.nip = vcpu->arch.tfhar; copy_from_checkpoint(vcpu); vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0xa0000000; } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 899bc9a02ab5..67061d399cd9 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -162,10 +162,10 @@ void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu) svcpu->gpr[12] = vcpu->arch.regs.gpr[12]; svcpu->gpr[13] = vcpu->arch.regs.gpr[13]; svcpu->cr = vcpu->arch.cr; - svcpu->xer = vcpu->arch.xer; - svcpu->ctr = vcpu->arch.ctr; - svcpu->lr = vcpu->arch.lr; - svcpu->pc = vcpu->arch.pc; + svcpu->xer = vcpu->arch.regs.xer; + svcpu->ctr = vcpu->arch.regs.ctr; + svcpu->lr = vcpu->arch.regs.link; + svcpu->pc = vcpu->arch.regs.nip; #ifdef CONFIG_PPC_BOOK3S_64 svcpu->shadow_fscr = vcpu->arch.shadow_fscr; #endif @@ -209,10 +209,10 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu) vcpu->arch.regs.gpr[12] = svcpu->gpr[12]; vcpu->arch.regs.gpr[13] = svcpu->gpr[13]; vcpu->arch.cr = svcpu->cr; - vcpu->arch.xer = svcpu->xer; - vcpu->arch.ctr = svcpu->ctr; - vcpu->arch.lr = svcpu->lr; - vcpu->arch.pc = svcpu->pc; + vcpu->arch.regs.xer = svcpu->xer; + vcpu->arch.regs.ctr = svcpu->ctr; + vcpu->arch.regs.link = svcpu->lr; + vcpu->arch.regs.nip = svcpu->pc; vcpu->arch.shadow_srr1 = svcpu->shadow_srr1; vcpu->arch.fault_dar = svcpu->fault_dar; vcpu->arch.fault_dsisr = svcpu->fault_dsisr; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 876d4f294fdd..a9ca016da670 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -77,8 +77,10 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) { int i; - printk("pc: %08lx msr: %08llx\n", vcpu->arch.pc, vcpu->arch.shared->msr); - printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr); + printk("pc: %08lx msr: %08llx\n", vcpu->arch.regs.nip, + vcpu->arch.shared->msr); + printk("lr: %08lx ctr: %08lx\n", vcpu->arch.regs.link, + vcpu->arch.regs.ctr); printk("srr0: %08llx srr1: %08llx\n", vcpu->arch.shared->srr0, vcpu->arch.shared->srr1); @@ -491,24 +493,25 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, if (allowed) { switch (int_class) { case INT_CLASS_NONCRIT: - set_guest_srr(vcpu, vcpu->arch.pc, + set_guest_srr(vcpu, vcpu->arch.regs.nip, vcpu->arch.shared->msr); break; case INT_CLASS_CRIT: - set_guest_csrr(vcpu, vcpu->arch.pc, + set_guest_csrr(vcpu, vcpu->arch.regs.nip, vcpu->arch.shared->msr); break; case INT_CLASS_DBG: - set_guest_dsrr(vcpu, vcpu->arch.pc, + set_guest_dsrr(vcpu, vcpu->arch.regs.nip, vcpu->arch.shared->msr); break; case INT_CLASS_MC: - set_guest_mcsrr(vcpu, vcpu->arch.pc, + set_guest_mcsrr(vcpu, vcpu->arch.regs.nip, vcpu->arch.shared->msr); break; } - vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; + vcpu->arch.regs.nip = vcpu->arch.ivpr | + vcpu->arch.ivor[priority]; if (update_esr == true) kvmppc_set_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) @@ -826,7 +829,7 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) case EMULATE_FAIL: printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", - __func__, vcpu->arch.pc, vcpu->arch.last_inst); + __func__, vcpu->arch.regs.nip, vcpu->arch.last_inst); /* For debugging, encode the failing instruction and * report it to userspace. */ run->hw.hardware_exit_reason = ~0ULL << 32; @@ -875,7 +878,7 @@ static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu) */ vcpu->arch.dbsr = 0; run->debug.arch.status = 0; - run->debug.arch.address = vcpu->arch.pc; + run->debug.arch.address = vcpu->arch.regs.nip; if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) { run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT; @@ -971,7 +974,7 @@ static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu, case EMULATE_FAIL: pr_debug("%s: load instruction from guest address %lx failed\n", - __func__, vcpu->arch.pc); + __func__, vcpu->arch.regs.nip); /* For debugging, encode the failing instruction and * report it to userspace. */ run->hw.hardware_exit_reason = ~0ULL << 32; @@ -1169,7 +1172,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, case BOOKE_INTERRUPT_SPE_FP_DATA: case BOOKE_INTERRUPT_SPE_FP_ROUND: printk(KERN_CRIT "%s: unexpected SPE interrupt %u at %08lx\n", - __func__, exit_nr, vcpu->arch.pc); + __func__, exit_nr, vcpu->arch.regs.nip); run->hw.hardware_exit_reason = exit_nr; r = RESUME_HOST; break; @@ -1299,7 +1302,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } case BOOKE_INTERRUPT_ITLB_MISS: { - unsigned long eaddr = vcpu->arch.pc; + unsigned long eaddr = vcpu->arch.regs.nip; gpa_t gpaddr; gfn_t gfn; int gtlb_index; @@ -1391,7 +1394,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) int i; int r; - vcpu->arch.pc = 0; + vcpu->arch.regs.nip = 0; vcpu->arch.shared->pir = vcpu->vcpu_id; kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ kvmppc_set_msr(vcpu, 0); @@ -1440,10 +1443,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu_load(vcpu); - regs->pc = vcpu->arch.pc; + regs->pc = vcpu->arch.regs.nip; regs->cr = kvmppc_get_cr(vcpu); - regs->ctr = vcpu->arch.ctr; - regs->lr = vcpu->arch.lr; + regs->ctr = vcpu->arch.regs.ctr; + regs->lr = vcpu->arch.regs.link; regs->xer = kvmppc_get_xer(vcpu); regs->msr = vcpu->arch.shared->msr; regs->srr0 = kvmppc_get_srr0(vcpu); @@ -1471,10 +1474,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu_load(vcpu); - vcpu->arch.pc = regs->pc; + vcpu->arch.regs.nip = regs->pc; kvmppc_set_cr(vcpu, regs->cr); - vcpu->arch.ctr = regs->ctr; - vcpu->arch.lr = regs->lr; + vcpu->arch.regs.ctr = regs->ctr; + vcpu->arch.regs.link = regs->lr; kvmppc_set_xer(vcpu, regs->xer); kvmppc_set_msr(vcpu, regs->msr); kvmppc_set_srr0(vcpu, regs->srr0); diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index a82f64502de1..d23e582f0fee 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -34,19 +34,19 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) { - vcpu->arch.pc = vcpu->arch.shared->srr0; + vcpu->arch.regs.nip = vcpu->arch.shared->srr0; kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1); } static void kvmppc_emul_rfdi(struct kvm_vcpu *vcpu) { - vcpu->arch.pc = vcpu->arch.dsrr0; + vcpu->arch.regs.nip = vcpu->arch.dsrr0; kvmppc_set_msr(vcpu, vcpu->arch.dsrr1); } static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu) { - vcpu->arch.pc = vcpu->arch.csrr0; + vcpu->arch.regs.nip = vcpu->arch.csrr0; kvmppc_set_msr(vcpu, vcpu->arch.csrr1); } diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 8f871fb75228..3f8189eb56ed 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -94,7 +94,7 @@ static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu, switch (get_oc(inst)) { case EHPRIV_OC_DEBUG: run->exit_reason = KVM_EXIT_DEBUG; - run->debug.arch.address = vcpu->arch.pc; + run->debug.arch.address = vcpu->arch.regs.nip; run->debug.arch.status = 0; kvmppc_account_exit(vcpu, DEBUG_EXITS); emulated = EMULATE_EXIT_USER; diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index ddbf8f0284c0..24296f4cadc6 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -513,7 +513,7 @@ void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu) { unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); - kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as); + kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.regs.nip, as); } void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu) From f19d1f367a506bc645f8d6695942b8873fc82c84 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Mon, 7 May 2018 14:20:09 +0800 Subject: [PATCH 10/63] KVM: PPC: Fix a mmio_host_swabbed uninitialized usage issue When KVM emulates VMX store, it will invoke kvmppc_get_vmx_data() to retrieve VMX reg val. kvmppc_get_vmx_data() will check mmio_host_swabbed to decide which double word of vr[] to be used. But the mmio_host_swabbed can be uninitialized during VMX store procedure: kvmppc_emulate_loadstore \- kvmppc_handle_store128_by2x64 \- kvmppc_get_vmx_data So vcpu->arch.mmio_host_swabbed is not meant to be used at all for emulation of store instructions, and this patch makes that true for VMX stores. This patch also initializes mmio_host_swabbed to avoid possible future problems. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/emulate_loadstore.c | 1 + arch/powerpc/kvm/powerpc.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index a382e15135e6..b8a3aefc3033 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -111,6 +111,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) vcpu->arch.mmio_sp64_extend = 0; vcpu->arch.mmio_sign_extend = 0; vcpu->arch.mmio_vmx_copy_nums = 0; + vcpu->arch.mmio_host_swabbed = 0; switch (get_op(inst)) { case 31: diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4e387647b5af..bef27b16d233 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1374,7 +1374,7 @@ static inline int kvmppc_get_vmx_data(struct kvm_vcpu *vcpu, int rs, u64 *val) if (di > 1) return -1; - if (vcpu->arch.mmio_host_swabbed) + if (kvmppc_need_byteswap(vcpu)) di = 1 - di; w0 = vrs.u[di * 2]; From b7557451475d747740bc1598045bd273ece80ab0 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 May 2018 16:59:10 +1000 Subject: [PATCH 11/63] KVM: PPC: Book3S HV: Lockless tlbie for HPT hcalls tlbies to an LPAR do not have to be serialised since POWER4/PPC970, after which the MMU_FTR_LOCKLESS_TLBIE feature was introduced to avoid tlbie locking. Since commit c17b98cf6028 ("KVM: PPC: Book3S HV: Remove code for PPC970 processors"), KVM no longer supports processors that do not have this feature, so the tlbie locking can be removed completely. A sanity check for the feature is put in kvmppc_mmu_hv_init. Testing was done on a POWER9 system in HPT mode, with a -smp 32 guest in HPT mode. 32 instances of the powerpc fork benchmark from selftests were run with --fork, and the results measured. Without this patch, total throughput was about 13.5K/sec, and this is the top of the host profile: 74.52% [k] do_tlbies 2.95% [k] kvmppc_book3s_hv_page_fault 1.80% [k] calc_checksum 1.80% [k] kvmppc_vcpu_run_hv 1.49% [k] kvmppc_run_core After this patch, throughput was about 51K/sec, with this profile: 21.28% [k] do_tlbies 5.26% [k] kvmppc_run_core 4.88% [k] kvmppc_book3s_hv_page_fault 3.30% [k] _raw_spin_lock_irqsave 3.25% [k] gup_pgd_range Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_host.h | 1 - arch/powerpc/kvm/book3s_64_mmu_hv.c | 3 +++ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 21 --------------------- 3 files changed, 3 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 8b0ee5e09ea3..89f44ecc4dbd 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -269,7 +269,6 @@ struct kvm_arch { unsigned long host_lpcr; unsigned long sdr1; unsigned long host_sdr1; - int tlbie_lock; unsigned long lpcr; unsigned long vrma_slb_v; int mmu_ready; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index a670fa5fbe50..37cd6434d1c8 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -272,6 +272,9 @@ int kvmppc_mmu_hv_init(void) if (!cpu_has_feature(CPU_FTR_HVMODE)) return -EINVAL; + if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE)) + return -EINVAL; + /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */ host_lpid = mfspr(SPRN_LPID); rsvd_lpid = LPID_RSVD; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 8e12c5c3c4ee..1f22d9e977d4 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -435,24 +435,6 @@ static inline int is_mmio_hpte(unsigned long v, unsigned long r) (HPTE_R_KEY_HI | HPTE_R_KEY_LO)); } -static inline int try_lock_tlbie(unsigned int *lock) -{ - unsigned int tmp, old; - unsigned int token = LOCK_TOKEN; - - asm volatile("1:lwarx %1,0,%2\n" - " cmpwi cr0,%1,0\n" - " bne 2f\n" - " stwcx. %3,0,%2\n" - " bne- 1b\n" - " isync\n" - "2:" - : "=&r" (tmp), "=&r" (old) - : "r" (lock), "r" (token) - : "cc", "memory"); - return old == 0; -} - static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, long npages, int global, bool need_sync) { @@ -464,8 +446,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, * the RS field, this is backwards-compatible with P7 and P8. */ if (global) { - while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) - cpu_relax(); if (need_sync) asm volatile("ptesync" : : : "memory"); for (i = 0; i < npages; ++i) { @@ -484,7 +464,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, } asm volatile("eieio; tlbsync; ptesync" : : : "memory"); - kvm->arch.tlbie_lock = 0; } else { if (need_sync) asm volatile("ptesync" : : : "memory"); From a5fad1e959529eda20f38d1e02be65ab629de899 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 May 2018 17:06:26 +1000 Subject: [PATCH 12/63] KVM: PPC: Book3S HV: Use a helper to unmap ptes in the radix fault path Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 46 +++++++++++++------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 481da8f93fa4..2c49b31ec7fb 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -228,6 +228,25 @@ static void kvmppc_pmd_free(pmd_t *pmdp) kmem_cache_free(kvm_pmd_cache, pmdp); } +static void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, + unsigned long gpa, unsigned int shift) + +{ + unsigned long page_size = 1ul << shift; + unsigned long old; + + old = kvmppc_radix_update_pte(kvm, pte, ~0UL, 0, gpa, shift); + kvmppc_radix_tlbie_page(kvm, gpa, shift); + if (old & _PAGE_DIRTY) { + unsigned long gfn = gpa >> PAGE_SHIFT; + struct kvm_memory_slot *memslot; + + memslot = gfn_to_memslot(kvm, gfn); + if (memslot && memslot->dirty_bitmap) + kvmppc_update_dirty_map(memslot, gfn, page_size); + } +} + static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, unsigned int level, unsigned long mmu_seq) { @@ -235,7 +254,6 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, pud_t *pud, *new_pud = NULL; pmd_t *pmd, *new_pmd = NULL; pte_t *ptep, *new_ptep = NULL; - unsigned long old; int ret; /* Traverse the guest's 2nd-level tree, allocate new levels needed */ @@ -287,17 +305,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, goto out_unlock; } /* Valid 1GB page here already, remove it */ - old = kvmppc_radix_update_pte(kvm, (pte_t *)pud, - ~0UL, 0, hgpa, PUD_SHIFT); - kvmppc_radix_tlbie_page(kvm, hgpa, PUD_SHIFT); - if (old & _PAGE_DIRTY) { - unsigned long gfn = hgpa >> PAGE_SHIFT; - struct kvm_memory_slot *memslot; - memslot = gfn_to_memslot(kvm, gfn); - if (memslot && memslot->dirty_bitmap) - kvmppc_update_dirty_map(memslot, - gfn, PUD_SIZE); - } + kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT); } if (level == 2) { if (!pud_none(*pud)) { @@ -338,17 +346,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, goto out_unlock; } /* Valid 2MB page here already, remove it */ - old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd), - ~0UL, 0, lgpa, PMD_SHIFT); - kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT); - if (old & _PAGE_DIRTY) { - unsigned long gfn = lgpa >> PAGE_SHIFT; - struct kvm_memory_slot *memslot; - memslot = gfn_to_memslot(kvm, gfn); - if (memslot && memslot->dirty_bitmap) - kvmppc_update_dirty_map(memslot, - gfn, PMD_SIZE); - } + kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT); } if (level == 1) { if (!pmd_none(*pmd)) { @@ -373,6 +371,8 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, } ptep = pte_offset_kernel(pmd, gpa); if (pte_present(*ptep)) { + unsigned long old; + /* Check if someone else set the same thing */ if (pte_raw(*ptep) == pte_raw(pte)) { ret = 0; From a5704e83aa3d672327409509b2d1bff2def72966 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 May 2018 17:06:27 +1000 Subject: [PATCH 13/63] KVM: PPC: Book3S HV: Recursively unmap all page table entries when unmapping When partition scope mappings are unmapped with kvm_unmap_radix, the pte is cleared, but the page table structure is left in place. If the next page fault requests a different page table geometry (e.g., due to THP promotion or split), kvmppc_create_pte is responsible for changing the page tables. When a page table entry is to be converted to a large pte, the page table entry is cleared, the PWC flushed, then the page table it points to freed. This will cause pte page tables to leak when a 1GB page is to replace a pud entry points to a pmd table with pte tables under it: The pmd table will be freed, but its pte tables will be missed. Fix this by replacing the simple clear and free code with one that walks down the page tables and frees children. Care must be taken to clear the root entry being unmapped then flushing the PWC before freeing any page tables, as explained in comments. This requires PWC flush to logically become a flush-all-PWC (which it already is in hardware, but the KVM API needs to be changed to avoid confusion). This code also checks that no unexpected pte entries exist in any page table being freed, and unmaps those and emits a WARN. This is an expensive operation for the pte page level, but partition scope changes are rare, so it's unconditional for now to iron out bugs. It can be put under a CONFIG option or removed after some time. Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 192 ++++++++++++++++++------- 1 file changed, 138 insertions(+), 54 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 2c49b31ec7fb..e514370ab5ae 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -165,7 +165,7 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, asm volatile("eieio ; tlbsync ; ptesync": : :"memory"); } -static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr) +static void kvmppc_radix_flush_pwc(struct kvm *kvm) { unsigned long rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */ @@ -247,6 +247,139 @@ static void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, } } +/* + * kvmppc_free_p?d are used to free existing page tables, and recursively + * descend and clear and free children. + * Callers are responsible for flushing the PWC. + * + * When page tables are being unmapped/freed as part of page fault path + * (full == false), ptes are not expected. There is code to unmap them + * and emit a warning if encountered, but there may already be data + * corruption due to the unexpected mappings. + */ +static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full) +{ + if (full) { + memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE); + } else { + pte_t *p = pte; + unsigned long it; + + for (it = 0; it < PTRS_PER_PTE; ++it, ++p) { + if (pte_val(*p) == 0) + continue; + WARN_ON_ONCE(1); + kvmppc_unmap_pte(kvm, p, + pte_pfn(*p) << PAGE_SHIFT, + PAGE_SHIFT); + } + } + + kvmppc_pte_free(pte); +} + +static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full) +{ + unsigned long im; + pmd_t *p = pmd; + + for (im = 0; im < PTRS_PER_PMD; ++im, ++p) { + if (!pmd_present(*p)) + continue; + if (pmd_is_leaf(*p)) { + if (full) { + pmd_clear(p); + } else { + WARN_ON_ONCE(1); + kvmppc_unmap_pte(kvm, (pte_t *)p, + pte_pfn(*(pte_t *)p) << PAGE_SHIFT, + PMD_SHIFT); + } + } else { + pte_t *pte; + + pte = pte_offset_map(p, 0); + kvmppc_unmap_free_pte(kvm, pte, full); + pmd_clear(p); + } + } + kvmppc_pmd_free(pmd); +} + +static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud) +{ + unsigned long iu; + pud_t *p = pud; + + for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) { + if (!pud_present(*p)) + continue; + if (pud_huge(*p)) { + pud_clear(p); + } else { + pmd_t *pmd; + + pmd = pmd_offset(p, 0); + kvmppc_unmap_free_pmd(kvm, pmd, true); + pud_clear(p); + } + } + pud_free(kvm->mm, pud); +} + +void kvmppc_free_radix(struct kvm *kvm) +{ + unsigned long ig; + pgd_t *pgd; + + if (!kvm->arch.pgtable) + return; + pgd = kvm->arch.pgtable; + for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) { + pud_t *pud; + + if (!pgd_present(*pgd)) + continue; + pud = pud_offset(pgd, 0); + kvmppc_unmap_free_pud(kvm, pud); + pgd_clear(pgd); + } + pgd_free(kvm->mm, kvm->arch.pgtable); + kvm->arch.pgtable = NULL; +} + +static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd, + unsigned long gpa) +{ + pte_t *pte = pte_offset_kernel(pmd, 0); + + /* + * Clearing the pmd entry then flushing the PWC ensures that the pte + * page no longer be cached by the MMU, so can be freed without + * flushing the PWC again. + */ + pmd_clear(pmd); + kvmppc_radix_flush_pwc(kvm); + + kvmppc_unmap_free_pte(kvm, pte, false); +} + +static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud, + unsigned long gpa) +{ + pmd_t *pmd = pmd_offset(pud, 0); + + /* + * Clearing the pud entry then flushing the PWC ensures that the pmd + * page and any children pte pages will no longer be cached by the MMU, + * so can be freed without flushing the PWC again. + */ + pud_clear(pud); + kvmppc_radix_flush_pwc(kvm); + + kvmppc_unmap_free_pmd(kvm, pmd, false); +} + static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, unsigned int level, unsigned long mmu_seq) { @@ -312,11 +445,9 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, /* * There's a page table page here, but we wanted to * install a large page, so remove and free the page - * table page. new_pmd will be NULL since level == 2. + * table page. */ - new_pmd = pmd_offset(pud, 0); - pud_clear(pud); - kvmppc_radix_flush_pwc(kvm, gpa); + kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa); } kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte); ret = 0; @@ -353,11 +484,9 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, /* * There's a page table page here, but we wanted to * install a large page, so remove and free the page - * table page. new_ptep will be NULL since level == 1. + * table page. */ - new_ptep = pte_offset_kernel(pmd, 0); - pmd_clear(pmd); - kvmppc_radix_flush_pwc(kvm, gpa); + kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa); } kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte); ret = 0; @@ -734,51 +863,6 @@ int kvmppc_init_vm_radix(struct kvm *kvm) return 0; } -void kvmppc_free_radix(struct kvm *kvm) -{ - unsigned long ig, iu, im; - pte_t *pte; - pmd_t *pmd; - pud_t *pud; - pgd_t *pgd; - - if (!kvm->arch.pgtable) - return; - pgd = kvm->arch.pgtable; - for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) { - if (!pgd_present(*pgd)) - continue; - pud = pud_offset(pgd, 0); - for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) { - if (!pud_present(*pud)) - continue; - if (pud_huge(*pud)) { - pud_clear(pud); - continue; - } - pmd = pmd_offset(pud, 0); - for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) { - if (pmd_is_leaf(*pmd)) { - pmd_clear(pmd); - continue; - } - if (!pmd_present(*pmd)) - continue; - pte = pte_offset_map(pmd, 0); - memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE); - kvmppc_pte_free(pte); - pmd_clear(pmd); - } - kvmppc_pmd_free(pmd_offset(pud, 0)); - pud_clear(pud); - } - pud_free(kvm->mm, pud_offset(pgd, 0)); - pgd_clear(pgd); - } - pgd_free(kvm->mm, kvm->arch.pgtable); - kvm->arch.pgtable = NULL; -} - static void pte_ctor(void *addr) { memset(addr, 0, RADIX_PTE_TABLE_SIZE); From d91cb39ffa7b8af2ed1ff012b95835ff057a6400 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 May 2018 17:06:28 +1000 Subject: [PATCH 14/63] KVM: PPC: Book3S HV: Make radix use the Linux translation flush functions for partition scope This has the advantage of consolidating TLB flush code in fewer places, and it also implements powerpc:tlbie trace events. Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 34 ++++++-------------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index e514370ab5ae..e55db915af49 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -139,41 +139,21 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, return 0; } -#ifdef CONFIG_PPC_64K_PAGES -#define MMU_BASE_PSIZE MMU_PAGE_64K -#else -#define MMU_BASE_PSIZE MMU_PAGE_4K -#endif - static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, unsigned int pshift) { - int psize = MMU_BASE_PSIZE; + unsigned long psize = PAGE_SIZE; - if (pshift >= PUD_SHIFT) - psize = MMU_PAGE_1G; - else if (pshift >= PMD_SHIFT) - psize = MMU_PAGE_2M; - addr &= ~0xfffUL; - addr |= mmu_psize_defs[psize].ap << 5; - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) - : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); - if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) - asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) - : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); - asm volatile("eieio ; tlbsync ; ptesync": : :"memory"); + if (pshift) + psize = 1UL << pshift; + + addr &= ~(psize - 1); + radix__flush_tlb_lpid_page(kvm->arch.lpid, addr, psize); } static void kvmppc_radix_flush_pwc(struct kvm *kvm) { - unsigned long rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */ - - asm volatile("ptesync": : :"memory"); - /* RIC=1 PRS=0 R=1 IS=2 */ - asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1) - : : "r" (rb), "r" (kvm->arch.lpid) : "memory"); - asm volatile("eieio ; tlbsync ; ptesync": : :"memory"); + radix__flush_pwc_lpid(kvm->arch.lpid); } unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, From 9a4506e11b9717db2e03c8eedc14d2baaf78b66b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 May 2018 17:06:29 +1000 Subject: [PATCH 15/63] KVM: PPC: Book3S HV: Make radix handle process scoped LPID flush in C, with relocation on The radix guest code can has fewer restrictions about what context it can run in, so move this flushing out of assembly and have it use the Linux TLB flush implementations introduced previously. This allows powerpc:tlbie trace events to be used. This changes the tlbiel sequence to only execute RIC=2 flush once on the first set flushed, then RIC=0 for the rest of the sets. The end result of the flush should be unchanged. This matches the local PID flush pattern that was introduced in a5998fcb92 ("powerpc/mm/radix: Optimise tlbiel flush all case"). Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 26 +++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 13 ++++++------- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 336e3468e700..9b6a118ea771 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2958,6 +2958,32 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) for (sub = 0; sub < core_info.n_subcores; ++sub) spin_unlock(&core_info.vc[sub]->lock); + if (kvm_is_radix(vc->kvm)) { + int tmp = pcpu; + + /* + * Do we need to flush the process scoped TLB for the LPAR? + * + * On POWER9, individual threads can come in here, but the + * TLB is shared between the 4 threads in a core, hence + * invalidating on one thread invalidates for all. + * Thus we make all 4 threads use the same bit here. + * + * Hash must be flushed in realmode in order to use tlbiel. + */ + mtspr(SPRN_LPID, vc->kvm->arch.lpid); + isync(); + + if (cpu_has_feature(CPU_FTR_ARCH_300)) + tmp &= ~0x3UL; + + if (cpumask_test_cpu(tmp, &vc->kvm->arch.need_tlb_flush)) { + radix__local_flush_tlb_lpid_guest(vc->kvm->arch.lpid); + /* Clear the bit after the TLB flush */ + cpumask_clear_cpu(tmp, &vc->kvm->arch.need_tlb_flush); + } + } + /* * Interrupts will be enabled once we get into the guest, * so tell lockdep that we're about to enable interrupts. diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 07ca1b2a7966..ef9e665fc8e2 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -636,6 +636,10 @@ kvmppc_hv_entry: /* Primary thread switches to guest partition. */ cmpwi r6,0 bne 10f + + /* Radix has already switched LPID and flushed core TLB */ + bne cr7, 22f + lwz r7,KVM_LPID(r9) BEGIN_FTR_SECTION ld r6,KVM_SDR1(r9) @@ -647,7 +651,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_LPID,r7 isync - /* See if we need to flush the TLB */ + /* See if we need to flush the TLB. Hash has to be done in RM */ lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */ BEGIN_FTR_SECTION /* @@ -674,15 +678,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) li r7,0x800 /* IS field = 0b10 */ ptesync li r0,0 /* RS for P9 version of tlbiel */ - bne cr7, 29f 28: tlbiel r7 /* On P9, rs=0, RIC=0, PRS=0, R=0 */ addi r7,r7,0x1000 bdnz 28b - b 30f -29: PPC_TLBIEL(7,0,2,1,1) /* for radix, RIC=2, PRS=1, R=1 */ - addi r7,r7,0x1000 - bdnz 29b -30: ptesync + ptesync 23: ldarx r7,0,r6 /* clear the bit after TLB flushed */ andc r7,r7,r8 stdcx. r7,0,r6 From bc64dd0e1c4eddbec75dd5aa86b60c2a834aaef3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 May 2018 17:06:30 +1000 Subject: [PATCH 16/63] KVM: PPC: Book3S HV: radix: Refine IO region partition scope attributes When the radix fault handler has no page from the process address space (e.g., for IO memory), it looks up the process pte and sets partition table pte using that to get attributes like CI and guarded. If the process table entry is to be writable, set _PAGE_DIRTY as well to avoid an RC update. If not, then ensure _PAGE_DIRTY does not come across. Set _PAGE_ACCESSED as well to avoid RC update. Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index e55db915af49..b0ba3628adc2 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -674,9 +674,13 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long mask = (1ul << shift) - PAGE_SIZE; pte = __pte(pte_val(pte) | (hva & mask)); } - if (!(writing || upgrade_write)) - pte = __pte(pte_val(pte) & ~ _PAGE_WRITE); - pte = __pte(pte_val(pte) | _PAGE_EXEC); + pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED); + if (writing || upgrade_write) { + if (pte_val(pte) & _PAGE_WRITE) + pte = __pte(pte_val(pte) | _PAGE_DIRTY); + } else { + pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY)); + } } /* Allocate space in the tree and write the PTE */ From 878cf2bb2d8d6164df7b63b2239859f99fea212a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 May 2018 17:06:31 +1000 Subject: [PATCH 17/63] KVM: PPC: Book3S HV: radix: Do not clear partition PTE when RC or write bits do not match Adding the write bit and RC bits to pte permissions does not require a pte clear and flush. There should not be other bits changed here, because restricting access or changing the PFN must have already invalidated any existing ptes (otherwise the race is already lost). Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 68 ++++++++++++++++++-------- 1 file changed, 47 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index b0ba3628adc2..176f911ee983 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -156,7 +156,7 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm) radix__flush_pwc_lpid(kvm->arch.lpid); } -unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, +static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, unsigned long clr, unsigned long set, unsigned long addr, unsigned int shift) { @@ -360,6 +360,15 @@ static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud, kvmppc_unmap_free_pmd(kvm, pmd, false); } +/* + * There are a number of bits which may differ between different faults to + * the same partition scope entry. RC bits, in the course of cleaning and + * aging. And the write bit can change, either the access could have been + * upgraded, or a read fault could happen concurrently with a write fault + * that sets those bits first. + */ +#define PTE_BITS_MUST_MATCH (~(_PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED)) + static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, unsigned int level, unsigned long mmu_seq) { @@ -404,19 +413,28 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, if (pud_huge(*pud)) { unsigned long hgpa = gpa & PUD_MASK; + /* Check if we raced and someone else has set the same thing */ + if (level == 2) { + if (pud_raw(*pud) == pte_raw(pte)) { + ret = 0; + goto out_unlock; + } + /* Valid 1GB page here already, add our extra bits */ + WARN_ON_ONCE((pud_val(*pud) ^ pte_val(pte)) & + PTE_BITS_MUST_MATCH); + kvmppc_radix_update_pte(kvm, (pte_t *)pud, + 0, pte_val(pte), hgpa, PUD_SHIFT); + ret = 0; + goto out_unlock; + } /* * If we raced with another CPU which has just put * a 1GB pte in after we saw a pmd page, try again. */ - if (level <= 1 && !new_pmd) { + if (!new_pmd) { ret = -EAGAIN; goto out_unlock; } - /* Check if we raced and someone else has set the same thing */ - if (level == 2 && pud_raw(*pud) == pte_raw(pte)) { - ret = 0; - goto out_unlock; - } /* Valid 1GB page here already, remove it */ kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT); } @@ -443,19 +461,29 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, if (pmd_is_leaf(*pmd)) { unsigned long lgpa = gpa & PMD_MASK; + /* Check if we raced and someone else has set the same thing */ + if (level == 1) { + if (pmd_raw(*pmd) == pte_raw(pte)) { + ret = 0; + goto out_unlock; + } + /* Valid 2MB page here already, add our extra bits */ + WARN_ON_ONCE((pmd_val(*pmd) ^ pte_val(pte)) & + PTE_BITS_MUST_MATCH); + kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd), + 0, pte_val(pte), lgpa, PMD_SHIFT); + ret = 0; + goto out_unlock; + } + /* * If we raced with another CPU which has just put * a 2MB pte in after we saw a pte page, try again. */ - if (level == 0 && !new_ptep) { + if (!new_ptep) { ret = -EAGAIN; goto out_unlock; } - /* Check if we raced and someone else has set the same thing */ - if (level == 1 && pmd_raw(*pmd) == pte_raw(pte)) { - ret = 0; - goto out_unlock; - } /* Valid 2MB page here already, remove it */ kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT); } @@ -480,19 +508,17 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, } ptep = pte_offset_kernel(pmd, gpa); if (pte_present(*ptep)) { - unsigned long old; - /* Check if someone else set the same thing */ if (pte_raw(*ptep) == pte_raw(pte)) { ret = 0; goto out_unlock; } - /* PTE was previously valid, so invalidate it */ - old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, - 0, gpa, 0); - kvmppc_radix_tlbie_page(kvm, gpa, 0); - if (old & _PAGE_DIRTY) - mark_page_dirty(kvm, gpa >> PAGE_SHIFT); + /* Valid page here already, add our extra bits */ + WARN_ON_ONCE((pte_val(*ptep) ^ pte_val(pte)) & + PTE_BITS_MUST_MATCH); + kvmppc_radix_update_pte(kvm, ptep, 0, pte_val(pte), gpa, 0); + ret = 0; + goto out_unlock; } kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte); ret = 0; From eadce3b48b5a8ffec7c8abbd4950a501c91d2515 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 18 May 2018 03:49:43 +1000 Subject: [PATCH 18/63] KVM: PPC: Book3S HV: Fix kvmppc_bad_host_intr for real mode interrupts When CONFIG_RELOCATABLE=n, the Linux real mode interrupt handlers call into KVM using real address. This needs to be translated to the kernel linear effective address before the MMU is switched on. kvmppc_bad_host_intr misses adding these bits, so when it is used to handle a system reset interrupt (that always gets delivered in real mode), it results in an instruction access fault immediately after the MMU is turned on. Fix this by ensuring the top 2 address bits are set when the MMU is turned on. Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index ef9e665fc8e2..5e6e493e065e 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -3568,6 +3568,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) bcl 20, 31, .+4 5: mflr r3 addi r3, r3, 9f - 5b + li r4, -1 + rldimi r3, r4, 62, 0 /* ensure 0xc000000000000000 bits are set */ ld r4, PACAKMSR(r13) mtspr SPRN_SRR0, r3 mtspr SPRN_SRR1, r4 From 7c1bd80cc216e7255bfabb94222676b51ab6868e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 18 May 2018 03:49:44 +1000 Subject: [PATCH 19/63] KVM: PPC: Book3S HV: Send kvmppc_bad_interrupt NMIs to Linux handlers It's possible to take a SRESET or MCE in these paths due to a bug in the host code or a NMI IPI, etc. A recent bug attempting to load a virtual address from real mode gave th complete but cryptic error, abridged: Oops: Bad interrupt in KVM entry/exit code, sig: 6 [#1] LE SMP NR_CPUS=2048 NUMA PowerNV CPU: 53 PID: 6582 Comm: qemu-system-ppc Not tainted NIP: c0000000000155ac LR: c0000000000c2430 CTR: c000000000015580 REGS: c000000fff76dd80 TRAP: 0200 Not tainted MSR: 9000000000201003 CR: 48082222 XER: 00000000 CFAR: 0000000102900ef0 DAR: d00017fffd941a28 DSISR: 00000040 SOFTE: 3 NIP [c0000000000155ac] perf_trace_tlbie+0x2c/0x1a0 LR [c0000000000c2430] do_tlbies+0x230/0x2f0 Sending the NMIs through the Linux handlers gives a nicer output: Severe Machine check interrupt [Not recovered] NIP [c0000000000155ac]: perf_trace_tlbie+0x2c/0x1a0 Initiator: CPU Error type: Real address [Load (bad)] Effective address: d00017fffcc01a28 opal: Machine check interrupt unrecoverable: MSR(RI=0) opal: Hardware platform error: Unrecoverable Machine Check exception CPU: 0 PID: 6700 Comm: qemu-system-ppc Tainted: G M NIP: c0000000000155ac LR: c0000000000c23c0 CTR: c000000000015580 REGS: c000000fff9e9d80 TRAP: 0200 Tainted: G M MSR: 9000000000201001 CR: 48082222 XER: 00000000 CFAR: 000000010cbc1a30 DAR: d00017fffcc01a28 DSISR: 00000040 SOFTE: 3 NIP [c0000000000155ac] perf_trace_tlbie+0x2c/0x1a0 LR [c0000000000c23c0] do_tlbies+0x1c0/0x280 Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_builtin.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 2b127586be30..d4a3f4da409b 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -633,7 +634,19 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) void kvmppc_bad_interrupt(struct pt_regs *regs) { - die("Bad interrupt in KVM entry/exit code", regs, SIGABRT); + /* + * 100 could happen at any time, 200 can happen due to invalid real + * address access for example (or any time due to a hardware problem). + */ + if (TRAP(regs) == 0x100) { + get_paca()->in_nmi++; + system_reset_exception(regs); + get_paca()->in_nmi--; + } else if (TRAP(regs) == 0x200) { + machine_check_exception(regs); + } else { + die("Bad interrupt in KVM entry/exit code", regs, SIGABRT); + } panic("Bad KVM trap"); } From ec531d027ab29b0cfa1c80c8af561b0e74bd4283 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 18 May 2018 21:49:28 +1000 Subject: [PATCH 20/63] KVM: PPC: Book3S PR: Enable use on POWER9 inside HPT-mode guests This relaxes the restriction on using PR KVM on POWER9. The existing code does work inside a guest partition running in HPT mode, because hypercalls such as H_ENTER use the old HPTE format, not the new format used by POWER9, and so no change to PR KVM's HPT manipulation code is required. PR KVM will still refuse to run if the kernel is using radix translation or if it is running bare-metal. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_pr.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 67061d399cd9..3d0251edc13c 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1735,9 +1735,16 @@ static void kvmppc_core_destroy_vm_pr(struct kvm *kvm) static int kvmppc_core_check_processor_compat_pr(void) { /* - * Disable KVM for Power9 untill the required bits merged. + * PR KVM can work on POWER9 inside a guest partition + * running in HPT mode. It can't work if we are using + * radix translation (because radix provides no way for + * a process to have unique translations in quadrant 3) + * or in a bare-metal HPT-mode host (because POWER9 + * uses a modified HPTE format which the PR KVM code + * has not been adapted to use). */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_300) && + (radix_enabled() || cpu_has_feature(CPU_FTR_HVMODE))) return -EIO; return 0; } From 94dd7fa1c0b75e909fa54d86ce2d1aaf2c54ceb7 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Mon, 21 May 2018 13:24:20 +0800 Subject: [PATCH 21/63] KVM: PPC: Add KVMPPC_VSX_COPY_WORD_LOAD_DUMP type support for mmio emulation Some VSX instructions like lxvwsx will splat word into VSR. This patch adds a new VSX copy type KVMPPC_VSX_COPY_WORD_LOAD_DUMP to support this. Signed-off-by: Simon Guo Reviewed-by: Paul Mackerras Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/kvm/powerpc.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 89f44ecc4dbd..4bade292892f 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -453,6 +453,7 @@ struct mmio_hpte_cache { #define KVMPPC_VSX_COPY_WORD 1 #define KVMPPC_VSX_COPY_DWORD 2 #define KVMPPC_VSX_COPY_DWORD_LOAD_DUMP 3 +#define KVMPPC_VSX_COPY_WORD_LOAD_DUMP 4 struct openpic; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index bef27b16d233..45daf3b9d9d2 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -907,6 +907,26 @@ static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu, } } +static inline void kvmppc_set_vsr_word_dump(struct kvm_vcpu *vcpu, + u32 gpr) +{ + union kvmppc_one_reg val; + int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; + + if (vcpu->arch.mmio_vsx_tx_sx_enabled) { + val.vsx32val[0] = gpr; + val.vsx32val[1] = gpr; + val.vsx32val[2] = gpr; + val.vsx32val[3] = gpr; + VCPU_VSX_VR(vcpu, index) = val.vval; + } else { + val.vsx32val[0] = gpr; + val.vsx32val[1] = gpr; + VCPU_VSX_FPR(vcpu, index, 0) = val.vsxval[0]; + VCPU_VSX_FPR(vcpu, index, 1) = val.vsxval[0]; + } +} + static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu, u32 gpr32) { @@ -1061,6 +1081,9 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, else if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_DWORD_LOAD_DUMP) kvmppc_set_vsr_dword_dump(vcpu, gpr); + else if (vcpu->arch.mmio_vsx_copy_type == + KVMPPC_VSX_COPY_WORD_LOAD_DUMP) + kvmppc_set_vsr_word_dump(vcpu, gpr); break; #endif #ifdef CONFIG_ALTIVEC From 7092360399644ad4b12ac573c1996536b9e9b4b6 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Mon, 21 May 2018 13:24:21 +0800 Subject: [PATCH 22/63] KVM: PPC: Reimplement non-SIMD LOAD/STORE instruction mmio emulation with analyse_instr() input This patch reimplements non-SIMD LOAD/STORE instruction MMIO emulation with analyse_instr() input. It utilizes the BYTEREV/UPDATE/SIGNEXT properties exported by analyse_instr() and invokes kvmppc_handle_load(s)/kvmppc_handle_store() accordingly. It also moves CACHEOP type handling into the skeleton. instruction_type within kvm_ppc.h is renamed to avoid conflict with sstep.h. Suggested-by: Paul Mackerras Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_ppc.h | 6 +- arch/powerpc/kvm/book3s.c | 4 +- arch/powerpc/kvm/e500_mmu_host.c | 8 +- arch/powerpc/kvm/emulate_loadstore.c | 273 +++++---------------------- 4 files changed, 52 insertions(+), 239 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index abe7032cdb54..139cdf0abf90 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -52,7 +52,7 @@ enum emulation_result { EMULATE_EXIT_USER, /* emulation requires exit to user-space */ }; -enum instruction_type { +enum instruction_fetch_type { INST_GENERIC, INST_SC, /* system call */ }; @@ -93,7 +93,7 @@ extern int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, int is_default_endian); extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, - enum instruction_type type, u32 *inst); + enum instruction_fetch_type type, u32 *inst); extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); @@ -330,7 +330,7 @@ extern struct kvmppc_ops *kvmppc_hv_ops; extern struct kvmppc_ops *kvmppc_pr_ops; static inline int kvmppc_get_last_inst(struct kvm_vcpu *vcpu, - enum instruction_type type, u32 *inst) + enum instruction_fetch_type type, u32 *inst) { int ret = EMULATE_DONE; u32 fetched_inst; diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 97d4a112648f..320cdcf84591 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -450,8 +450,8 @@ int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid, return r; } -int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, - u32 *inst) +int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, + enum instruction_fetch_type type, u32 *inst) { ulong pc = kvmppc_get_pc(vcpu); int r; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index c878b4ffb86f..8f2985e46f6f 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -625,8 +625,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, } #ifdef CONFIG_KVM_BOOKE_HV -int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, - u32 *instr) +int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, + enum instruction_fetch_type type, u32 *instr) { gva_t geaddr; hpa_t addr; @@ -715,8 +715,8 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, return EMULATE_DONE; } #else -int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, - u32 *instr) +int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, + enum instruction_fetch_type type, u32 *instr) { return EMULATE_AGAIN; } diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index b8a3aefc3033..af7c71a0ae6f 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "timing.h" #include "trace.h" @@ -84,8 +85,9 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) struct kvm_run *run = vcpu->run; u32 inst; int ra, rs, rt; - enum emulation_result emulated; + enum emulation_result emulated = EMULATE_FAIL; int advance = 1; + struct instruction_op op; /* this default type might be overwritten by subcategories */ kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); @@ -113,144 +115,61 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) vcpu->arch.mmio_vmx_copy_nums = 0; vcpu->arch.mmio_host_swabbed = 0; - switch (get_op(inst)) { - case 31: - switch (get_xop(inst)) { - case OP_31_XOP_LWZX: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - break; + emulated = EMULATE_FAIL; + vcpu->arch.regs.msr = vcpu->arch.shared->msr; + vcpu->arch.regs.ccr = vcpu->arch.cr; + if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) { + int type = op.type & INSTR_TYPE_MASK; + int size = GETSIZE(op.type); - case OP_31_XOP_LWZUX: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; + switch (type) { + case LOAD: { + int instr_byte_swap = op.type & BYTEREV; - case OP_31_XOP_LBZX: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - break; + if (op.type & SIGNEXT) + emulated = kvmppc_handle_loads(run, vcpu, + op.reg, size, !instr_byte_swap); + else + emulated = kvmppc_handle_load(run, vcpu, + op.reg, size, !instr_byte_swap); - case OP_31_XOP_LBZUX: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; + if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) + kvmppc_set_gpr(vcpu, op.update_reg, op.ea); - case OP_31_XOP_STDX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 8, 1); break; + } + case STORE: + /* if need byte reverse, op.val has been reversed by + * analyse_instr(). + */ + emulated = kvmppc_handle_store(run, vcpu, op.val, + size, 1); + + if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) + kvmppc_set_gpr(vcpu, op.update_reg, op.ea); - case OP_31_XOP_STDUX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 8, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; - - case OP_31_XOP_STWX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 4, 1); - break; - - case OP_31_XOP_STWUX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_STBX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 1, 1); - break; - - case OP_31_XOP_STBUX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_LHAX: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - break; - - case OP_31_XOP_LHAUX: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_LHZX: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - break; - - case OP_31_XOP_LHZUX: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_STHX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 2, 1); - break; - - case OP_31_XOP_STHUX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_DCBST: - case OP_31_XOP_DCBF: - case OP_31_XOP_DCBI: + case CACHEOP: /* Do nothing. The guest is performing dcbi because * hardware DMA is not snooped by the dcache, but * emulated DMA either goes through the dcache as * normal writes, or the host kernel has handled dcache - * coherence. */ + * coherence. + */ + emulated = EMULATE_DONE; break; - - case OP_31_XOP_LWBRX: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); + default: break; + } + } - case OP_31_XOP_STWBRX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 4, 0); - break; - case OP_31_XOP_LHBRX: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); - break; - - case OP_31_XOP_STHBRX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 2, 0); - break; - - case OP_31_XOP_LDBRX: - emulated = kvmppc_handle_load(run, vcpu, rt, 8, 0); - break; - - case OP_31_XOP_STDBRX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 8, 0); - break; - - case OP_31_XOP_LDX: - emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); - break; - - case OP_31_XOP_LDUX: - emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_LWAX: - emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1); - break; - - case OP_31_XOP_LWAUX: - emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; + if ((emulated == EMULATE_DONE) || (emulated == EMULATE_DO_MMIO)) + goto out; + switch (get_op(inst)) { + case 31: + switch (get_xop(inst)) { #ifdef CONFIG_PPC_FPU case OP_31_XOP_LFSX: if (kvmppc_check_fp_disabled(vcpu)) @@ -502,10 +421,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) } break; - case OP_LWZ: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - break; - #ifdef CONFIG_PPC_FPU case OP_STFS: if (kvmppc_check_fp_disabled(vcpu)) @@ -542,110 +457,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) 8, 1); kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; -#endif - case OP_LD: - rt = get_rt(inst); - switch (inst & 3) { - case 0: /* ld */ - emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); - break; - case 1: /* ldu */ - emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - case 2: /* lwa */ - emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1); - break; - default: - emulated = EMULATE_FAIL; - } - break; - - case OP_LWZU: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LBZ: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - break; - - case OP_LBZU: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STW: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 1); - break; - - case OP_STD: - rs = get_rs(inst); - switch (inst & 3) { - case 0: /* std */ - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 8, 1); - break; - case 1: /* stdu */ - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 8, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - default: - emulated = EMULATE_FAIL; - } - break; - - case OP_STWU: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STB: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 1, 1); - break; - - case OP_STBU: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LHZ: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - break; - - case OP_LHZU: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LHA: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - break; - - case OP_LHAU: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STH: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 2, 1); - break; - - case OP_STHU: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - -#ifdef CONFIG_PPC_FPU case OP_LFS: if (kvmppc_check_fp_disabled(vcpu)) return EMULATE_DONE; @@ -684,6 +496,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) break; } +out: if (emulated == EMULATE_FAIL) { advance = 0; kvmppc_core_queue_program(vcpu, 0); From 2e6baa46b4ae785e3e954aaf9d2e8a0bb06ad33a Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Mon, 21 May 2018 13:24:22 +0800 Subject: [PATCH 23/63] KVM: PPC: Add giveup_ext() hook to PPC KVM ops Currently HV will save math regs(FP/VEC/VSX) when trap into host. But PR KVM will only save math regs when qemu task switch out of CPU, or when returning from qemu code. To emulate FP/VEC/VSX mmio load, PR KVM need to make sure that math regs were flushed firstly and then be able to update saved VCPU FPR/VEC/VSX area reasonably. This patch adds giveup_ext() field to KVM ops. Only PR KVM has non-NULL giveup_ext() ops. kvmppc_complete_mmio_load() can invoke that hook (when not NULL) to flush math regs accordingly, before updating saved register vals. Math regs flush is also necessary for STORE, which will be covered in later patch within this patch series. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kvm/book3s_pr.c | 1 + arch/powerpc/kvm/powerpc.c | 9 +++++++++ 3 files changed, 11 insertions(+) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 139cdf0abf90..1f087c4cb386 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -324,6 +324,7 @@ struct kvmppc_ops { int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info); int (*set_smt_mode)(struct kvm *kvm, unsigned long mode, unsigned long flags); + void (*giveup_ext)(struct kvm_vcpu *vcpu, ulong msr); }; extern struct kvmppc_ops *kvmppc_hv_ops; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 3d0251edc13c..c74a8885427d 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1789,6 +1789,7 @@ static struct kvmppc_ops kvm_ops_pr = { #ifdef CONFIG_PPC_BOOK3S_64 .hcall_implemented = kvmppc_hcall_impl_pr, #endif + .giveup_ext = kvmppc_giveup_ext, }; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 45daf3b9d9d2..8ce9e7ba5fed 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1061,6 +1061,9 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); break; case KVM_MMIO_REG_FPR: + if (vcpu->kvm->arch.kvm_ops->giveup_ext) + vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_FP); + VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr; break; #ifdef CONFIG_PPC_BOOK3S @@ -1074,6 +1077,9 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, #endif #ifdef CONFIG_VSX case KVM_MMIO_REG_VSX: + if (vcpu->kvm->arch.kvm_ops->giveup_ext) + vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_VSX); + if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_DWORD) kvmppc_set_vsr_dword(vcpu, gpr); else if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_WORD) @@ -1088,6 +1094,9 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, #endif #ifdef CONFIG_ALTIVEC case KVM_MMIO_REG_VMX: + if (vcpu->kvm->arch.kvm_ops->giveup_ext) + vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_VEC); + kvmppc_set_vmx_dword(vcpu, gpr); break; #endif From 2b33cb585f940180b4c527f0de46ff226a9d25ab Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Mon, 21 May 2018 13:24:23 +0800 Subject: [PATCH 24/63] KVM: PPC: Reimplement LOAD_FP/STORE_FP instruction mmio emulation with analyse_instr() input This patch reimplements LOAD_FP/STORE_FP instruction MMIO emulation with analyse_instr() input. It utilizes the FPCONV/UPDATE properties exported by analyse_instr() and invokes kvmppc_handle_load(s)/kvmppc_handle_store() accordingly. For FP store MMIO emulation, the FP regs need to be flushed firstly so that the right FP reg vals can be read from vcpu->arch.fpr, which will be stored into MMIO data. Suggested-by: Paul Mackerras Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/emulate_loadstore.c | 201 ++++++--------------------- 1 file changed, 44 insertions(+), 157 deletions(-) diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index af7c71a0ae6f..82f13c17e055 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -138,6 +138,26 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) break; } +#ifdef CONFIG_PPC_FPU + case LOAD_FP: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + + if (op.type & FPCONV) + vcpu->arch.mmio_sp64_extend = 1; + + if (op.type & SIGNEXT) + emulated = kvmppc_handle_loads(run, vcpu, + KVM_MMIO_REG_FPR|op.reg, size, 1); + else + emulated = kvmppc_handle_load(run, vcpu, + KVM_MMIO_REG_FPR|op.reg, size, 1); + + if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) + kvmppc_set_gpr(vcpu, op.update_reg, op.ea); + + break; +#endif case STORE: /* if need byte reverse, op.val has been reversed by * analyse_instr(). @@ -149,6 +169,30 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) kvmppc_set_gpr(vcpu, op.update_reg, op.ea); break; +#ifdef CONFIG_PPC_FPU + case STORE_FP: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + + /* The FP registers need to be flushed so that + * kvmppc_handle_store() can read actual FP vals + * from vcpu->arch. + */ + if (vcpu->kvm->arch.kvm_ops->giveup_ext) + vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, + MSR_FP); + + if (op.type & FPCONV) + vcpu->arch.mmio_sp64_extend = 1; + + emulated = kvmppc_handle_store(run, vcpu, + VCPU_FPR(vcpu, op.reg), size, 1); + + if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) + kvmppc_set_gpr(vcpu, op.update_reg, op.ea); + + break; +#endif case CACHEOP: /* Do nothing. The guest is performing dcbi because * hardware DMA is not snooped by the dcache, but @@ -170,93 +214,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) switch (get_op(inst)) { case 31: switch (get_xop(inst)) { -#ifdef CONFIG_PPC_FPU - case OP_31_XOP_LFSX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_load(run, vcpu, - KVM_MMIO_REG_FPR|rt, 4, 1); - break; - - case OP_31_XOP_LFSUX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_load(run, vcpu, - KVM_MMIO_REG_FPR|rt, 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_LFDX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_load(run, vcpu, - KVM_MMIO_REG_FPR|rt, 8, 1); - break; - - case OP_31_XOP_LFDUX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_load(run, vcpu, - KVM_MMIO_REG_FPR|rt, 8, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_LFIWAX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_loads(run, vcpu, - KVM_MMIO_REG_FPR|rt, 4, 1); - break; - - case OP_31_XOP_LFIWZX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_load(run, vcpu, - KVM_MMIO_REG_FPR|rt, 4, 1); - break; - - case OP_31_XOP_STFSX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_store(run, vcpu, - VCPU_FPR(vcpu, rs), 4, 1); - break; - - case OP_31_XOP_STFSUX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_store(run, vcpu, - VCPU_FPR(vcpu, rs), 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_STFDX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_store(run, vcpu, - VCPU_FPR(vcpu, rs), 8, 1); - break; - - case OP_31_XOP_STFDUX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_store(run, vcpu, - VCPU_FPR(vcpu, rs), 8, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_STFIWX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_store(run, vcpu, - VCPU_FPR(vcpu, rs), 4, 1); - break; -#endif - #ifdef CONFIG_VSX case OP_31_XOP_LXSDX: if (kvmppc_check_vsx_disabled(vcpu)) @@ -421,76 +378,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) } break; -#ifdef CONFIG_PPC_FPU - case OP_STFS: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_store(run, vcpu, - VCPU_FPR(vcpu, rs), - 4, 1); - break; - - case OP_STFSU: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_store(run, vcpu, - VCPU_FPR(vcpu, rs), - 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STFD: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_store(run, vcpu, - VCPU_FPR(vcpu, rs), - 8, 1); - break; - - case OP_STFDU: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_store(run, vcpu, - VCPU_FPR(vcpu, rs), - 8, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LFS: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_load(run, vcpu, - KVM_MMIO_REG_FPR|rt, 4, 1); - break; - - case OP_LFSU: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_load(run, vcpu, - KVM_MMIO_REG_FPR|rt, 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LFD: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_load(run, vcpu, - KVM_MMIO_REG_FPR|rt, 8, 1); - break; - - case OP_LFDU: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_load(run, vcpu, - KVM_MMIO_REG_FPR|rt, 8, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; -#endif - default: emulated = EMULATE_FAIL; break; From b01c78c297da45500c18adb99bcc1e08d96768d5 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Mon, 21 May 2018 13:24:24 +0800 Subject: [PATCH 25/63] KVM: PPC: Reimplement LOAD_VSX/STORE_VSX instruction mmio emulation with analyse_instr() input This patch reimplements LOAD_VSX/STORE_VSX instruction MMIO emulation with analyse_instr() input. It utilizes VSX_FPCONV/VSX_SPLAT/SIGNEXT exported by analyse_instr() and handle accordingly. When emulating VSX store, the VSX reg will need to be flushed so that the right reg val can be retrieved before writing to IO MEM. [paulus@ozlabs.org - mask the register number to 5 bits.] Suggested-by: Paul Mackerras Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/emulate_loadstore.c | 227 +++++++++++---------------- 1 file changed, 91 insertions(+), 136 deletions(-) diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 82f13c17e055..6dcec74ab249 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -157,6 +157,54 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) kvmppc_set_gpr(vcpu, op.update_reg, op.ea); break; +#endif +#ifdef CONFIG_VSX + case LOAD_VSX: { + int io_size_each; + + if (op.vsx_flags & VSX_CHECK_VEC) { + if (kvmppc_check_altivec_disabled(vcpu)) + return EMULATE_DONE; + } else { + if (kvmppc_check_vsx_disabled(vcpu)) + return EMULATE_DONE; + } + + if (op.vsx_flags & VSX_FPCONV) + vcpu->arch.mmio_sp64_extend = 1; + + if (op.element_size == 8) { + if (op.vsx_flags & VSX_SPLAT) + vcpu->arch.mmio_vsx_copy_type = + KVMPPC_VSX_COPY_DWORD_LOAD_DUMP; + else + vcpu->arch.mmio_vsx_copy_type = + KVMPPC_VSX_COPY_DWORD; + } else if (op.element_size == 4) { + if (op.vsx_flags & VSX_SPLAT) + vcpu->arch.mmio_vsx_copy_type = + KVMPPC_VSX_COPY_WORD_LOAD_DUMP; + else + vcpu->arch.mmio_vsx_copy_type = + KVMPPC_VSX_COPY_WORD; + } else + break; + + if (size < op.element_size) { + /* precision convert case: lxsspx, etc */ + vcpu->arch.mmio_vsx_copy_nums = 1; + io_size_each = size; + } else { /* lxvw4x, lxvd2x, etc */ + vcpu->arch.mmio_vsx_copy_nums = + size/op.element_size; + io_size_each = op.element_size; + } + + emulated = kvmppc_handle_vsx_load(run, vcpu, + KVM_MMIO_REG_VSX | (op.reg & 0x1f), + io_size_each, 1, op.type & SIGNEXT); + break; + } #endif case STORE: /* if need byte reverse, op.val has been reversed by @@ -192,6 +240,49 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) kvmppc_set_gpr(vcpu, op.update_reg, op.ea); break; +#endif +#ifdef CONFIG_VSX + case STORE_VSX: { + int io_size_each; + + if (op.vsx_flags & VSX_CHECK_VEC) { + if (kvmppc_check_altivec_disabled(vcpu)) + return EMULATE_DONE; + } else { + if (kvmppc_check_vsx_disabled(vcpu)) + return EMULATE_DONE; + } + + if (vcpu->kvm->arch.kvm_ops->giveup_ext) + vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, + MSR_VSX); + + if (op.vsx_flags & VSX_FPCONV) + vcpu->arch.mmio_sp64_extend = 1; + + if (op.element_size == 8) + vcpu->arch.mmio_vsx_copy_type = + KVMPPC_VSX_COPY_DWORD; + else if (op.element_size == 4) + vcpu->arch.mmio_vsx_copy_type = + KVMPPC_VSX_COPY_WORD; + else + break; + + if (size < op.element_size) { + /* precise conversion case, like stxsspx */ + vcpu->arch.mmio_vsx_copy_nums = 1; + io_size_each = size; + } else { /* stxvw4x, stxvd2x, etc */ + vcpu->arch.mmio_vsx_copy_nums = + size/op.element_size; + io_size_each = op.element_size; + } + + emulated = kvmppc_handle_vsx_store(run, vcpu, + op.reg & 0x1f, io_size_each, 1); + break; + } #endif case CACHEOP: /* Do nothing. The guest is performing dcbi because @@ -214,142 +305,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) switch (get_op(inst)) { case 31: switch (get_xop(inst)) { -#ifdef CONFIG_VSX - case OP_31_XOP_LXSDX: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 1; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; - emulated = kvmppc_handle_vsx_load(run, vcpu, - KVM_MMIO_REG_VSX|rt, 8, 1, 0); - break; - - case OP_31_XOP_LXSSPX: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 1; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_vsx_load(run, vcpu, - KVM_MMIO_REG_VSX|rt, 4, 1, 0); - break; - - case OP_31_XOP_LXSIWAX: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 1; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; - emulated = kvmppc_handle_vsx_load(run, vcpu, - KVM_MMIO_REG_VSX|rt, 4, 1, 1); - break; - - case OP_31_XOP_LXSIWZX: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 1; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; - emulated = kvmppc_handle_vsx_load(run, vcpu, - KVM_MMIO_REG_VSX|rt, 4, 1, 0); - break; - - case OP_31_XOP_LXVD2X: - /* - * In this case, the official load/store process is like this: - * Step1, exit from vm by page fault isr, then kvm save vsr. - * Please see guest_exit_cont->store_fp_state->SAVE_32VSRS - * as reference. - * - * Step2, copy data between memory and VCPU - * Notice: for LXVD2X/STXVD2X/LXVW4X/STXVW4X, we use - * 2copies*8bytes or 4copies*4bytes - * to simulate one copy of 16bytes. - * Also there is an endian issue here, we should notice the - * layout of memory. - * Please see MARCO of LXVD2X_ROT/STXVD2X_ROT as more reference. - * If host is little-endian, kvm will call XXSWAPD for - * LXVD2X_ROT/STXVD2X_ROT. - * So, if host is little-endian, - * the postion of memeory should be swapped. - * - * Step3, return to guest, kvm reset register. - * Please see kvmppc_hv_entry->load_fp_state->REST_32VSRS - * as reference. - */ - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 2; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; - emulated = kvmppc_handle_vsx_load(run, vcpu, - KVM_MMIO_REG_VSX|rt, 8, 1, 0); - break; - - case OP_31_XOP_LXVW4X: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 4; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD; - emulated = kvmppc_handle_vsx_load(run, vcpu, - KVM_MMIO_REG_VSX|rt, 4, 1, 0); - break; - - case OP_31_XOP_LXVDSX: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 1; - vcpu->arch.mmio_vsx_copy_type = - KVMPPC_VSX_COPY_DWORD_LOAD_DUMP; - emulated = kvmppc_handle_vsx_load(run, vcpu, - KVM_MMIO_REG_VSX|rt, 8, 1, 0); - break; - - case OP_31_XOP_STXSDX: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 1; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; - emulated = kvmppc_handle_vsx_store(run, vcpu, - rs, 8, 1); - break; - - case OP_31_XOP_STXSSPX: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 1; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_vsx_store(run, vcpu, - rs, 4, 1); - break; - - case OP_31_XOP_STXSIWX: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_offset = 1; - vcpu->arch.mmio_vsx_copy_nums = 1; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD; - emulated = kvmppc_handle_vsx_store(run, vcpu, - rs, 4, 1); - break; - - case OP_31_XOP_STXVD2X: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 2; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; - emulated = kvmppc_handle_vsx_store(run, vcpu, - rs, 8, 1); - break; - - case OP_31_XOP_STXVW4X: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 4; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD; - emulated = kvmppc_handle_vsx_store(run, vcpu, - rs, 4, 1); - break; -#endif /* CONFIG_VSX */ - #ifdef CONFIG_ALTIVEC case OP_31_XOP_LVX: if (kvmppc_check_altivec_disabled(vcpu)) From da2a32b876e979d74f84746ae8d066e1d54b568f Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Mon, 21 May 2018 13:24:25 +0800 Subject: [PATCH 26/63] KVM: PPC: Expand mmio_vsx_copy_type to cover VMX load/store element types VSX MMIO emulation uses mmio_vsx_copy_type to represent VSX emulated element size/type, such as KVMPPC_VSX_COPY_DWORD_LOAD, etc. This patch expands mmio_vsx_copy_type to cover VMX copy type, such as KVMPPC_VMX_COPY_BYTE(stvebx/lvebx), etc. As a result, mmio_vsx_copy_type is also renamed to mmio_copy_type. It is a preparation for reimplementing VMX MMIO emulation. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_host.h | 9 +++++++-- arch/powerpc/kvm/emulate_loadstore.c | 14 +++++++------- arch/powerpc/kvm/powerpc.c | 10 +++++----- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 4bade292892f..fe506c86404a 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -455,6 +455,11 @@ struct mmio_hpte_cache { #define KVMPPC_VSX_COPY_DWORD_LOAD_DUMP 3 #define KVMPPC_VSX_COPY_WORD_LOAD_DUMP 4 +#define KVMPPC_VMX_COPY_BYTE 8 +#define KVMPPC_VMX_COPY_HWORD 9 +#define KVMPPC_VMX_COPY_WORD 10 +#define KVMPPC_VMX_COPY_DWORD 11 + struct openpic; /* W0 and W1 of a XIVE thread management context */ @@ -677,16 +682,16 @@ struct kvm_vcpu_arch { * Number of simulations for vsx. * If we use 2*8bytes to simulate 1*16bytes, * then the number should be 2 and - * mmio_vsx_copy_type=KVMPPC_VSX_COPY_DWORD. + * mmio_copy_type=KVMPPC_VSX_COPY_DWORD. * If we use 4*4bytes to simulate 1*16bytes, * the number should be 4 and * mmio_vsx_copy_type=KVMPPC_VSX_COPY_WORD. */ u8 mmio_vsx_copy_nums; u8 mmio_vsx_offset; - u8 mmio_vsx_copy_type; u8 mmio_vsx_tx_sx_enabled; u8 mmio_vmx_copy_nums; + u8 mmio_copy_type; u8 osi_needed; u8 osi_enabled; u8 papr_enabled; diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 6dcec74ab249..324cfbfa6fa2 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -109,7 +109,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) vcpu->arch.mmio_vsx_tx_sx_enabled = get_tx_or_sx(inst); vcpu->arch.mmio_vsx_copy_nums = 0; vcpu->arch.mmio_vsx_offset = 0; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_NONE; + vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE; vcpu->arch.mmio_sp64_extend = 0; vcpu->arch.mmio_sign_extend = 0; vcpu->arch.mmio_vmx_copy_nums = 0; @@ -175,17 +175,17 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) if (op.element_size == 8) { if (op.vsx_flags & VSX_SPLAT) - vcpu->arch.mmio_vsx_copy_type = + vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_DWORD_LOAD_DUMP; else - vcpu->arch.mmio_vsx_copy_type = + vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_DWORD; } else if (op.element_size == 4) { if (op.vsx_flags & VSX_SPLAT) - vcpu->arch.mmio_vsx_copy_type = + vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_WORD_LOAD_DUMP; else - vcpu->arch.mmio_vsx_copy_type = + vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_WORD; } else break; @@ -261,10 +261,10 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) vcpu->arch.mmio_sp64_extend = 1; if (op.element_size == 8) - vcpu->arch.mmio_vsx_copy_type = + vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_DWORD; else if (op.element_size == 4) - vcpu->arch.mmio_vsx_copy_type = + vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_WORD; else break; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8ce9e7ba5fed..1580bd24bc74 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1080,14 +1080,14 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, if (vcpu->kvm->arch.kvm_ops->giveup_ext) vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_VSX); - if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_DWORD) + if (vcpu->arch.mmio_copy_type == KVMPPC_VSX_COPY_DWORD) kvmppc_set_vsr_dword(vcpu, gpr); - else if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_WORD) + else if (vcpu->arch.mmio_copy_type == KVMPPC_VSX_COPY_WORD) kvmppc_set_vsr_word(vcpu, gpr); - else if (vcpu->arch.mmio_vsx_copy_type == + else if (vcpu->arch.mmio_copy_type == KVMPPC_VSX_COPY_DWORD_LOAD_DUMP) kvmppc_set_vsr_dword_dump(vcpu, gpr); - else if (vcpu->arch.mmio_vsx_copy_type == + else if (vcpu->arch.mmio_copy_type == KVMPPC_VSX_COPY_WORD_LOAD_DUMP) kvmppc_set_vsr_word_dump(vcpu, gpr); break; @@ -1260,7 +1260,7 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val) u32 dword_offset, word_offset; union kvmppc_one_reg reg; int vsx_offset = 0; - int copy_type = vcpu->arch.mmio_vsx_copy_type; + int copy_type = vcpu->arch.mmio_copy_type; int result = 0; switch (copy_type) { From acc9eb9305fecd958e2877c4e6cd3284d01c2e82 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Mon, 21 May 2018 13:24:26 +0800 Subject: [PATCH 27/63] KVM: PPC: Reimplement LOAD_VMX/STORE_VMX instruction mmio emulation with analyse_instr() input This patch reimplements LOAD_VMX/STORE_VMX MMIO emulation with analyse_instr() input. When emulating the store, the VMX reg will need to be flushed so that the right reg val can be retrieved before writing to IO MEM. This patch also adds support for lvebx/lvehx/lvewx/stvebx/stvehx/stvewx MMIO emulation. To meet the requirement of handling different element sizes, kvmppc_handle_load128_by2x64()/kvmppc_handle_store128_by2x64() were replaced with kvmppc_handle_vmx_load()/kvmppc_handle_vmx_store(). The framework used is similar to VSX instruction MMIO emulation. Suggested-by: Paul Mackerras Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/include/asm/kvm_ppc.h | 10 +- arch/powerpc/kvm/emulate_loadstore.c | 124 +++++++++---- arch/powerpc/kvm/powerpc.c | 267 ++++++++++++++++++++++----- 4 files changed, 306 insertions(+), 96 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index fe506c86404a..8dc5e439b387 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -691,6 +691,7 @@ struct kvm_vcpu_arch { u8 mmio_vsx_offset; u8 mmio_vsx_tx_sx_enabled; u8 mmio_vmx_copy_nums; + u8 mmio_vmx_offset; u8 mmio_copy_type; u8 osi_needed; u8 osi_enabled; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 1f087c4cb386..e991821dd7fa 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -81,10 +81,10 @@ extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, extern int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian, int mmio_sign_extend); -extern int kvmppc_handle_load128_by2x64(struct kvm_run *run, - struct kvm_vcpu *vcpu, unsigned int rt, int is_default_endian); -extern int kvmppc_handle_store128_by2x64(struct kvm_run *run, - struct kvm_vcpu *vcpu, unsigned int rs, int is_default_endian); +extern int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rt, unsigned int bytes, int is_default_endian); +extern int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rs, unsigned int bytes, int is_default_endian); extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, u64 val, unsigned int bytes, int is_default_endian); @@ -265,6 +265,8 @@ union kvmppc_one_reg { vector128 vval; u64 vsxval[2]; u32 vsx32val[4]; + u16 vsx16val[8]; + u8 vsx8val[16]; struct { u64 addr; u64 length; diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 324cfbfa6fa2..afde788be141 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -113,6 +113,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) vcpu->arch.mmio_sp64_extend = 0; vcpu->arch.mmio_sign_extend = 0; vcpu->arch.mmio_vmx_copy_nums = 0; + vcpu->arch.mmio_vmx_offset = 0; vcpu->arch.mmio_host_swabbed = 0; emulated = EMULATE_FAIL; @@ -158,6 +159,46 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) break; #endif +#ifdef CONFIG_ALTIVEC + case LOAD_VMX: + if (kvmppc_check_altivec_disabled(vcpu)) + return EMULATE_DONE; + + /* Hardware enforces alignment of VMX accesses */ + vcpu->arch.vaddr_accessed &= ~((unsigned long)size - 1); + vcpu->arch.paddr_accessed &= ~((unsigned long)size - 1); + + if (size == 16) { /* lvx */ + vcpu->arch.mmio_copy_type = + KVMPPC_VMX_COPY_DWORD; + } else if (size == 4) { /* lvewx */ + vcpu->arch.mmio_copy_type = + KVMPPC_VMX_COPY_WORD; + } else if (size == 2) { /* lvehx */ + vcpu->arch.mmio_copy_type = + KVMPPC_VMX_COPY_HWORD; + } else if (size == 1) { /* lvebx */ + vcpu->arch.mmio_copy_type = + KVMPPC_VMX_COPY_BYTE; + } else + break; + + vcpu->arch.mmio_vmx_offset = + (vcpu->arch.vaddr_accessed & 0xf)/size; + + if (size == 16) { + vcpu->arch.mmio_vmx_copy_nums = 2; + emulated = kvmppc_handle_vmx_load(run, + vcpu, KVM_MMIO_REG_VMX|op.reg, + 8, 1); + } else { + vcpu->arch.mmio_vmx_copy_nums = 1; + emulated = kvmppc_handle_vmx_load(run, vcpu, + KVM_MMIO_REG_VMX|op.reg, + size, 1); + } + break; +#endif #ifdef CONFIG_VSX case LOAD_VSX: { int io_size_each; @@ -241,6 +282,48 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) break; #endif +#ifdef CONFIG_ALTIVEC + case STORE_VMX: + if (kvmppc_check_altivec_disabled(vcpu)) + return EMULATE_DONE; + + /* Hardware enforces alignment of VMX accesses. */ + vcpu->arch.vaddr_accessed &= ~((unsigned long)size - 1); + vcpu->arch.paddr_accessed &= ~((unsigned long)size - 1); + + if (vcpu->kvm->arch.kvm_ops->giveup_ext) + vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, + MSR_VEC); + if (size == 16) { /* stvx */ + vcpu->arch.mmio_copy_type = + KVMPPC_VMX_COPY_DWORD; + } else if (size == 4) { /* stvewx */ + vcpu->arch.mmio_copy_type = + KVMPPC_VMX_COPY_WORD; + } else if (size == 2) { /* stvehx */ + vcpu->arch.mmio_copy_type = + KVMPPC_VMX_COPY_HWORD; + } else if (size == 1) { /* stvebx */ + vcpu->arch.mmio_copy_type = + KVMPPC_VMX_COPY_BYTE; + } else + break; + + vcpu->arch.mmio_vmx_offset = + (vcpu->arch.vaddr_accessed & 0xf)/size; + + if (size == 16) { + vcpu->arch.mmio_vmx_copy_nums = 2; + emulated = kvmppc_handle_vmx_store(run, + vcpu, op.reg, 8, 1); + } else { + vcpu->arch.mmio_vmx_copy_nums = 1; + emulated = kvmppc_handle_vmx_store(run, + vcpu, op.reg, size, 1); + } + + break; +#endif #ifdef CONFIG_VSX case STORE_VSX: { int io_size_each; @@ -298,47 +381,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) } } - - if ((emulated == EMULATE_DONE) || (emulated == EMULATE_DO_MMIO)) - goto out; - - switch (get_op(inst)) { - case 31: - switch (get_xop(inst)) { -#ifdef CONFIG_ALTIVEC - case OP_31_XOP_LVX: - if (kvmppc_check_altivec_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.vaddr_accessed &= ~0xFULL; - vcpu->arch.paddr_accessed &= ~0xFULL; - vcpu->arch.mmio_vmx_copy_nums = 2; - emulated = kvmppc_handle_load128_by2x64(run, vcpu, - KVM_MMIO_REG_VMX|rt, 1); - break; - - case OP_31_XOP_STVX: - if (kvmppc_check_altivec_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.vaddr_accessed &= ~0xFULL; - vcpu->arch.paddr_accessed &= ~0xFULL; - vcpu->arch.mmio_vmx_copy_nums = 2; - emulated = kvmppc_handle_store128_by2x64(run, vcpu, - rs, 1); - break; -#endif /* CONFIG_ALTIVEC */ - - default: - emulated = EMULATE_FAIL; - break; - } - break; - - default: - emulated = EMULATE_FAIL; - break; - } - -out: if (emulated == EMULATE_FAIL) { advance = 0; kvmppc_core_queue_program(vcpu, 0); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1580bd24bc74..05eccdc10fdd 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -953,30 +953,110 @@ static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu, #endif /* CONFIG_VSX */ #ifdef CONFIG_ALTIVEC -static inline void kvmppc_set_vmx_dword(struct kvm_vcpu *vcpu, - u64 gpr) +static inline int kvmppc_get_vmx_offset_generic(struct kvm_vcpu *vcpu, + int index, int element_size) { + int offset; + int elts = sizeof(vector128)/element_size; + + if ((index < 0) || (index >= elts)) + return -1; + + if (kvmppc_need_byteswap(vcpu)) + offset = elts - index - 1; + else + offset = index; + + return offset; +} + +static inline int kvmppc_get_vmx_dword_offset(struct kvm_vcpu *vcpu, + int index) +{ + return kvmppc_get_vmx_offset_generic(vcpu, index, 8); +} + +static inline int kvmppc_get_vmx_word_offset(struct kvm_vcpu *vcpu, + int index) +{ + return kvmppc_get_vmx_offset_generic(vcpu, index, 4); +} + +static inline int kvmppc_get_vmx_hword_offset(struct kvm_vcpu *vcpu, + int index) +{ + return kvmppc_get_vmx_offset_generic(vcpu, index, 2); +} + +static inline int kvmppc_get_vmx_byte_offset(struct kvm_vcpu *vcpu, + int index) +{ + return kvmppc_get_vmx_offset_generic(vcpu, index, 1); +} + + +static inline void kvmppc_set_vmx_dword(struct kvm_vcpu *vcpu, + u64 gpr) +{ + union kvmppc_one_reg val; + int offset = kvmppc_get_vmx_dword_offset(vcpu, + vcpu->arch.mmio_vmx_offset); int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; - u32 hi, lo; - u32 di; -#ifdef __BIG_ENDIAN - hi = gpr >> 32; - lo = gpr & 0xffffffff; -#else - lo = gpr >> 32; - hi = gpr & 0xffffffff; -#endif - - di = 2 - vcpu->arch.mmio_vmx_copy_nums; /* doubleword index */ - if (di > 1) + if (offset == -1) return; - if (vcpu->arch.mmio_host_swabbed) - di = 1 - di; + val.vval = VCPU_VSX_VR(vcpu, index); + val.vsxval[offset] = gpr; + VCPU_VSX_VR(vcpu, index) = val.vval; +} - VCPU_VSX_VR(vcpu, index).u[di * 2] = hi; - VCPU_VSX_VR(vcpu, index).u[di * 2 + 1] = lo; +static inline void kvmppc_set_vmx_word(struct kvm_vcpu *vcpu, + u32 gpr32) +{ + union kvmppc_one_reg val; + int offset = kvmppc_get_vmx_word_offset(vcpu, + vcpu->arch.mmio_vmx_offset); + int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; + + if (offset == -1) + return; + + val.vval = VCPU_VSX_VR(vcpu, index); + val.vsx32val[offset] = gpr32; + VCPU_VSX_VR(vcpu, index) = val.vval; +} + +static inline void kvmppc_set_vmx_hword(struct kvm_vcpu *vcpu, + u16 gpr16) +{ + union kvmppc_one_reg val; + int offset = kvmppc_get_vmx_hword_offset(vcpu, + vcpu->arch.mmio_vmx_offset); + int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; + + if (offset == -1) + return; + + val.vval = VCPU_VSX_VR(vcpu, index); + val.vsx16val[offset] = gpr16; + VCPU_VSX_VR(vcpu, index) = val.vval; +} + +static inline void kvmppc_set_vmx_byte(struct kvm_vcpu *vcpu, + u8 gpr8) +{ + union kvmppc_one_reg val; + int offset = kvmppc_get_vmx_byte_offset(vcpu, + vcpu->arch.mmio_vmx_offset); + int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; + + if (offset == -1) + return; + + val.vval = VCPU_VSX_VR(vcpu, index); + val.vsx8val[offset] = gpr8; + VCPU_VSX_VR(vcpu, index) = val.vval; } #endif /* CONFIG_ALTIVEC */ @@ -1097,7 +1177,16 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, if (vcpu->kvm->arch.kvm_ops->giveup_ext) vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_VEC); - kvmppc_set_vmx_dword(vcpu, gpr); + if (vcpu->arch.mmio_copy_type == KVMPPC_VMX_COPY_DWORD) + kvmppc_set_vmx_dword(vcpu, gpr); + else if (vcpu->arch.mmio_copy_type == KVMPPC_VMX_COPY_WORD) + kvmppc_set_vmx_word(vcpu, gpr); + else if (vcpu->arch.mmio_copy_type == + KVMPPC_VMX_COPY_HWORD) + kvmppc_set_vmx_hword(vcpu, gpr); + else if (vcpu->arch.mmio_copy_type == + KVMPPC_VMX_COPY_BYTE) + kvmppc_set_vmx_byte(vcpu, gpr); break; #endif default: @@ -1376,14 +1465,16 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu, #endif /* CONFIG_VSX */ #ifdef CONFIG_ALTIVEC -/* handle quadword load access in two halves */ -int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int rt, int is_default_endian) +int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rt, unsigned int bytes, int is_default_endian) { enum emulation_result emulated = EMULATE_DONE; + if (vcpu->arch.mmio_vsx_copy_nums > 2) + return EMULATE_FAIL; + while (vcpu->arch.mmio_vmx_copy_nums) { - emulated = __kvmppc_handle_load(run, vcpu, rt, 8, + emulated = __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 0); if (emulated != EMULATE_DONE) @@ -1391,55 +1482,127 @@ int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->arch.paddr_accessed += run->mmio.len; vcpu->arch.mmio_vmx_copy_nums--; + vcpu->arch.mmio_vmx_offset++; } return emulated; } -static inline int kvmppc_get_vmx_data(struct kvm_vcpu *vcpu, int rs, u64 *val) +int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) { - vector128 vrs = VCPU_VSX_VR(vcpu, rs); - u32 di; - u64 w0, w1; + union kvmppc_one_reg reg; + int vmx_offset = 0; + int result = 0; - di = 2 - vcpu->arch.mmio_vmx_copy_nums; /* doubleword index */ - if (di > 1) + vmx_offset = + kvmppc_get_vmx_dword_offset(vcpu, vcpu->arch.mmio_vmx_offset); + + if (vmx_offset == -1) return -1; - if (kvmppc_need_byteswap(vcpu)) - di = 1 - di; + reg.vval = VCPU_VSX_VR(vcpu, index); + *val = reg.vsxval[vmx_offset]; - w0 = vrs.u[di * 2]; - w1 = vrs.u[di * 2 + 1]; - -#ifdef __BIG_ENDIAN - *val = (w0 << 32) | w1; -#else - *val = (w1 << 32) | w0; -#endif - return 0; + return result; } -/* handle quadword store in two halves */ -int kvmppc_handle_store128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int rs, int is_default_endian) +int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) +{ + union kvmppc_one_reg reg; + int vmx_offset = 0; + int result = 0; + + vmx_offset = + kvmppc_get_vmx_word_offset(vcpu, vcpu->arch.mmio_vmx_offset); + + if (vmx_offset == -1) + return -1; + + reg.vval = VCPU_VSX_VR(vcpu, index); + *val = reg.vsx32val[vmx_offset]; + + return result; +} + +int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) +{ + union kvmppc_one_reg reg; + int vmx_offset = 0; + int result = 0; + + vmx_offset = + kvmppc_get_vmx_hword_offset(vcpu, vcpu->arch.mmio_vmx_offset); + + if (vmx_offset == -1) + return -1; + + reg.vval = VCPU_VSX_VR(vcpu, index); + *val = reg.vsx16val[vmx_offset]; + + return result; +} + +int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val) +{ + union kvmppc_one_reg reg; + int vmx_offset = 0; + int result = 0; + + vmx_offset = + kvmppc_get_vmx_byte_offset(vcpu, vcpu->arch.mmio_vmx_offset); + + if (vmx_offset == -1) + return -1; + + reg.vval = VCPU_VSX_VR(vcpu, index); + *val = reg.vsx8val[vmx_offset]; + + return result; +} + +int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rs, unsigned int bytes, int is_default_endian) { u64 val = 0; + unsigned int index = rs & KVM_MMIO_REG_MASK; enum emulation_result emulated = EMULATE_DONE; + if (vcpu->arch.mmio_vsx_copy_nums > 2) + return EMULATE_FAIL; + vcpu->arch.io_gpr = rs; while (vcpu->arch.mmio_vmx_copy_nums) { - if (kvmppc_get_vmx_data(vcpu, rs, &val) == -1) - return EMULATE_FAIL; + switch (vcpu->arch.mmio_copy_type) { + case KVMPPC_VMX_COPY_DWORD: + if (kvmppc_get_vmx_dword(vcpu, index, &val) == -1) + return EMULATE_FAIL; - emulated = kvmppc_handle_store(run, vcpu, val, 8, + break; + case KVMPPC_VMX_COPY_WORD: + if (kvmppc_get_vmx_word(vcpu, index, &val) == -1) + return EMULATE_FAIL; + break; + case KVMPPC_VMX_COPY_HWORD: + if (kvmppc_get_vmx_hword(vcpu, index, &val) == -1) + return EMULATE_FAIL; + break; + case KVMPPC_VMX_COPY_BYTE: + if (kvmppc_get_vmx_byte(vcpu, index, &val) == -1) + return EMULATE_FAIL; + break; + default: + return EMULATE_FAIL; + } + + emulated = kvmppc_handle_store(run, vcpu, val, bytes, is_default_endian); if (emulated != EMULATE_DONE) break; vcpu->arch.paddr_accessed += run->mmio.len; vcpu->arch.mmio_vmx_copy_nums--; + vcpu->arch.mmio_vmx_offset++; } return emulated; @@ -1454,11 +1617,11 @@ static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu, vcpu->arch.paddr_accessed += run->mmio.len; if (!vcpu->mmio_is_write) { - emulated = kvmppc_handle_load128_by2x64(run, vcpu, - vcpu->arch.io_gpr, 1); + emulated = kvmppc_handle_vmx_load(run, vcpu, + vcpu->arch.io_gpr, run->mmio.len, 1); } else { - emulated = kvmppc_handle_store128_by2x64(run, vcpu, - vcpu->arch.io_gpr, 1); + emulated = kvmppc_handle_vmx_store(run, vcpu, + vcpu->arch.io_gpr, run->mmio.len, 1); } switch (emulated) { @@ -1602,8 +1765,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) } #endif #ifdef CONFIG_ALTIVEC - if (vcpu->arch.mmio_vmx_copy_nums > 0) + if (vcpu->arch.mmio_vmx_copy_nums > 0) { vcpu->arch.mmio_vmx_copy_nums--; + vcpu->arch.mmio_vmx_offset++; + } if (vcpu->arch.mmio_vmx_copy_nums > 0) { r = kvmppc_emulate_mmio_vmx_loadstore(vcpu, run); From 9617a0b33569e79567300765d659c88c1f556c5d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 May 2018 15:47:17 +1000 Subject: [PATCH 28/63] KVM: PPC: Book3S PR: Allow KVM_PPC_CONFIGURE_V3_MMU to succeed Currently, PR KVM does not implement the configure_mmu operation, and so the KVM_PPC_CONFIGURE_V3_MMU ioctl always fails with an EINVAL error. This causes recent kernels to fail to boot as a PR KVM guest on POWER9, since recent kernels booted in HPT mode do the H_REGISTER_PROC_TBL hypercall, which causes userspace (QEMU) to do KVM_PPC_CONFIGURE_V3_MMU, which fails. This implements a minimal configure_mmu operation for PR KVM. It succeeds only if the MMU is being configured for HPT mode and no process table is being registered. This is enough to get recent kernels to boot as a PR KVM guest. Reviewed-by: Greg Kurz Tested-by: Greg Kurz Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_pr.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index c74a8885427d..b1aff9f83ed0 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1687,6 +1687,17 @@ static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, return 0; } + +static int kvm_configure_mmu_pr(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return -ENODEV; + /* Require flags and process table base and size to all be zero. */ + if (cfg->flags || cfg->process_table) + return -EINVAL; + return 0; +} + #else static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, struct kvm_ppc_smmu_info *info) @@ -1788,6 +1799,7 @@ static struct kvmppc_ops kvm_ops_pr = { .arch_vm_ioctl = kvm_arch_vm_ioctl_pr, #ifdef CONFIG_PPC_BOOK3S_64 .hcall_implemented = kvmppc_hcall_impl_pr, + .configure_mmu = kvm_configure_mmu_pr, #endif .giveup_ext = kvmppc_giveup_ext, }; From 7b0e827c6970e8ca77c60ae87592204c39e41245 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 May 2018 20:07:52 +1000 Subject: [PATCH 29/63] KVM: PPC: Book3S HV: Factor fake-suspend handling out of kvmppc_save/restore_tm This splits out the handling of "fake suspend" mode, part of the hypervisor TM assist code for POWER9, and puts almost all of it in new kvmppc_save_tm_hv and kvmppc_restore_tm_hv functions. The new functions branch to kvmppc_save/restore_tm if the CPU does not require hypervisor TM assistance. With this, it will be more straightforward to move kvmppc_save_tm and kvmppc_restore_tm to another file and use them for transactional memory support in PR KVM. Additionally, it also makes the code a bit clearer and reduces the number of feature sections. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 221 +++++++++++++++--------- 1 file changed, 139 insertions(+), 82 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 5e6e493e065e..bfca999695f1 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -795,7 +795,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ - bl kvmppc_restore_tm + bl kvmppc_restore_tm_hv 91: #endif @@ -1779,7 +1779,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ - bl kvmppc_save_tm + bl kvmppc_save_tm_hv 91: #endif @@ -2683,7 +2683,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ ld r9, HSTATE_KVM_VCPU(r13) - bl kvmppc_save_tm + bl kvmppc_save_tm_hv 91: #endif @@ -2801,7 +2801,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ - bl kvmppc_restore_tm + bl kvmppc_restore_tm_hv 91: #endif @@ -3126,7 +3126,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) * This can modify all checkpointed registers, but * restores r1, r2 and r9 (vcpu pointer) before exit. */ -kvmppc_save_tm: +kvmppc_save_tm_hv: + /* See if we need to handle fake suspend mode */ +BEGIN_FTR_SECTION + b kvmppc_save_tm +END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST) + + lbz r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */ + cmpwi r0, 0 + beq kvmppc_save_tm + + /* The following code handles the fake_suspend = 1 case */ mflr r0 std r0, PPC_LR_STKOFF(r1) stdu r1, -PPC_MIN_STKFRM(r1) @@ -3137,6 +3147,74 @@ kvmppc_save_tm: rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG mtmsrd r8 + rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */ + beq 4f +BEGIN_FTR_SECTION + bl pnv_power9_force_smt4_catch +END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) + nop + + std r1, HSTATE_HOST_R1(r13) + + /* Clear the MSR RI since r1, r13 may be foobar. */ + li r5, 0 + mtmsrd r5, 1 + + /* We have to treclaim here because that's the only way to do S->N */ + li r3, TM_CAUSE_KVM_RESCHED + TRECLAIM(R3) + + /* + * We were in fake suspend, so we are not going to save the + * register state as the guest checkpointed state (since + * we already have it), therefore we can now use any volatile GPR. + */ + /* Reload PACA pointer, stack pointer and TOC. */ + GET_PACA(r13) + ld r1, HSTATE_HOST_R1(r13) + ld r2, PACATOC(r13) + + /* Set MSR RI now we have r1 and r13 back. */ + li r5, MSR_RI + mtmsrd r5, 1 + + HMT_MEDIUM + ld r6, HSTATE_DSCR(r13) + mtspr SPRN_DSCR, r6 +BEGIN_FTR_SECTION_NESTED(96) + bl pnv_power9_force_smt4_release +END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96) + nop + +4: + mfspr r3, SPRN_PSSCR + /* PSSCR_FAKE_SUSPEND is a write-only bit, but clear it anyway */ + li r0, PSSCR_FAKE_SUSPEND + andc r3, r3, r0 + mtspr SPRN_PSSCR, r3 + + /* Don't save TEXASR, use value from last exit in real suspend state */ + ld r9, HSTATE_KVM_VCPU(r13) + mfspr r5, SPRN_TFHAR + mfspr r6, SPRN_TFIAR + std r5, VCPU_TFHAR(r9) + std r6, VCPU_TFIAR(r9) + + addi r1, r1, PPC_MIN_STKFRM + ld r0, PPC_LR_STKOFF(r1) + mtlr r0 + blr + +kvmppc_save_tm: + mflr r0 + std r0, PPC_LR_STKOFF(r1) + + /* Turn on TM. */ + mfmsr r8 + li r0, 1 + rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG + mtmsrd r8 + ld r5, VCPU_MSR(r9) rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 beq 1f /* TM not active in guest. */ @@ -3145,21 +3223,9 @@ kvmppc_save_tm: li r3, TM_CAUSE_KVM_RESCHED BEGIN_FTR_SECTION - lbz r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */ - cmpwi r0, 0 - beq 3f - rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */ - beq 4f -BEGIN_FTR_SECTION_NESTED(96) - bl pnv_power9_force_smt4_catch -END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96) - nop - b 6f -3: /* Emulation of the treclaim instruction needs TEXASR before treclaim */ mfspr r6, SPRN_TEXASR std r6, VCPU_ORIG_TEXASR(r9) -6: END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) /* Clear the MSR RI since r1, r13 are all going to be foobar. */ @@ -3173,43 +3239,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) SET_SCRATCH0(r13) GET_PACA(r13) std r9, PACATMSCRATCH(r13) - - /* If doing TM emulation on POWER9 DD2.2, check for fake suspend mode */ -BEGIN_FTR_SECTION - lbz r9, HSTATE_FAKE_SUSPEND(r13) - cmpwi r9, 0 - beq 2f - /* - * We were in fake suspend, so we are not going to save the - * register state as the guest checkpointed state (since - * we already have it), therefore we can now use any volatile GPR. - */ - /* Reload stack pointer and TOC. */ - ld r1, HSTATE_HOST_R1(r13) - ld r2, PACATOC(r13) - /* Set MSR RI now we have r1 and r13 back. */ - li r5, MSR_RI - mtmsrd r5, 1 - HMT_MEDIUM - ld r6, HSTATE_DSCR(r13) - mtspr SPRN_DSCR, r6 -BEGIN_FTR_SECTION_NESTED(96) - bl pnv_power9_force_smt4_release -END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96) - nop - -4: - mfspr r3, SPRN_PSSCR - /* PSSCR_FAKE_SUSPEND is a write-only bit, but clear it anyway */ - li r0, PSSCR_FAKE_SUSPEND - andc r3, r3, r0 - mtspr SPRN_PSSCR, r3 - ld r9, HSTATE_KVM_VCPU(r13) - /* Don't save TEXASR, use value from last exit in real suspend state */ - b 11f -2: -END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) - ld r9, HSTATE_KVM_VCPU(r13) /* Get a few more GPRs free. */ @@ -3288,7 +3317,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) std r5, VCPU_TFHAR(r9) std r6, VCPU_TFIAR(r9) - addi r1, r1, PPC_MIN_STKFRM ld r0, PPC_LR_STKOFF(r1) mtlr r0 blr @@ -3299,6 +3327,61 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) * This potentially modifies all checkpointed registers. * It restores r1, r2, r4 from the PACA. */ +kvmppc_restore_tm_hv: + /* + * If we are doing TM emulation for the guest on a POWER9 DD2, + * then we don't actually do a trechkpt -- we either set up + * fake-suspend mode, or emulate a TM rollback. + */ +BEGIN_FTR_SECTION + b kvmppc_restore_tm +END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST) + mflr r0 + std r0, PPC_LR_STKOFF(r1) + + li r0, 0 + stb r0, HSTATE_FAKE_SUSPEND(r13) + + /* Turn on TM so we can restore TM SPRs */ + mfmsr r5 + li r0, 1 + rldimi r5, r0, MSR_TM_LG, 63-MSR_TM_LG + mtmsrd r5 + + /* + * The user may change these outside of a transaction, so they must + * always be context switched. + */ + ld r5, VCPU_TFHAR(r4) + ld r6, VCPU_TFIAR(r4) + ld r7, VCPU_TEXASR(r4) + mtspr SPRN_TFHAR, r5 + mtspr SPRN_TFIAR, r6 + mtspr SPRN_TEXASR, r7 + + ld r5, VCPU_MSR(r4) + rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 + beqlr /* TM not active in guest */ + + /* Make sure the failure summary is set */ + oris r7, r7, (TEXASR_FS)@h + mtspr SPRN_TEXASR, r7 + + cmpwi r5, 1 /* check for suspended state */ + bgt 10f + stb r5, HSTATE_FAKE_SUSPEND(r13) + b 9f /* and return */ +10: stdu r1, -PPC_MIN_STKFRM(r1) + /* guest is in transactional state, so simulate rollback */ + mr r3, r4 + bl kvmhv_emulate_tm_rollback + nop + ld r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */ + addi r1, r1, PPC_MIN_STKFRM +9: ld r0, PPC_LR_STKOFF(r1) + mtlr r0 + blr + kvmppc_restore_tm: mflr r0 std r0, PPC_LR_STKOFF(r1) @@ -3323,8 +3406,6 @@ kvmppc_restore_tm: mtspr SPRN_TFIAR, r6 mtspr SPRN_TEXASR, r7 - li r0, 0 - stb r0, HSTATE_FAKE_SUSPEND(r13) ld r5, VCPU_MSR(r4) rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 beqlr /* TM not active in guest */ @@ -3338,15 +3419,6 @@ kvmppc_restore_tm: oris r7, r7, (TEXASR_FS)@h mtspr SPRN_TEXASR, r7 - /* - * If we are doing TM emulation for the guest on a POWER9 DD2, - * then we don't actually do a trechkpt -- we either set up - * fake-suspend mode, or emulate a TM rollback. - */ -BEGIN_FTR_SECTION - b .Ldo_tm_fake_load -END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) - /* * We need to load up the checkpointed state for the guest. * We need to do this early as it will blow away any GPRs, VSRs and @@ -3419,25 +3491,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) /* Set the MSR RI since we have our registers back. */ li r5, MSR_RI mtmsrd r5, 1 -9: ld r0, PPC_LR_STKOFF(r1) mtlr r0 blr - -.Ldo_tm_fake_load: - cmpwi r5, 1 /* check for suspended state */ - bgt 10f - stb r5, HSTATE_FAKE_SUSPEND(r13) - b 9b /* and return */ -10: stdu r1, -PPC_MIN_STKFRM(r1) - /* guest is in transactional state, so simulate rollback */ - mr r3, r4 - bl kvmhv_emulate_tm_rollback - nop - ld r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */ - addi r1, r1, PPC_MIN_STKFRM - b 9b -#endif +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ /* * We come here if we get any exception or interrupt while we are From 009c872a8bc4d38f487a9bd62423d019e4322517 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:01:47 +0800 Subject: [PATCH 30/63] KVM: PPC: Book3S PR: Move kvmppc_save_tm/kvmppc_restore_tm to separate file It is a simple patch just for moving kvmppc_save_tm/kvmppc_restore_tm() functionalities to tm.S. There is no logic change. The reconstruct of those APIs will be done in later patches to improve readability. It is for preparation of reusing those APIs on both HV/PR PPC KVM. Some slight change during move the functions includes: - surrounds some HV KVM specific code with CONFIG_KVM_BOOK3S_HV_POSSIBLE for compilation. - use _GLOBAL() to define kvmppc_save_tm/kvmppc_restore_tm() [paulus@ozlabs.org - rebased on top of 7b0e827c6970 ("KVM: PPC: Book3S HV: Factor fake-suspend handling out of kvmppc_save/restore_tm", 2018-05-30)] Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/Makefile | 3 + arch/powerpc/kvm/book3s_hv_rmhandlers.S | 231 -------------------- arch/powerpc/kvm/tm.S | 279 ++++++++++++++++++++++++ 3 files changed, 282 insertions(+), 231 deletions(-) create mode 100644 arch/powerpc/kvm/tm.S diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 4b19da8c87ae..f872c04bb5b1 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -63,6 +63,9 @@ kvm-pr-y := \ book3s_64_mmu.o \ book3s_32_mmu.o +kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ + tm.o + ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_rmhandlers.o diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index bfca999695f1..8e016598692e 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -39,8 +39,6 @@ BEGIN_FTR_SECTION; \ extsw reg, reg; \ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) -#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) - /* Values in HSTATE_NAPPING(r13) */ #define NAPPING_CEDE 1 #define NAPPING_NOVCPU 2 @@ -3205,122 +3203,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96) mtlr r0 blr -kvmppc_save_tm: - mflr r0 - std r0, PPC_LR_STKOFF(r1) - - /* Turn on TM. */ - mfmsr r8 - li r0, 1 - rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG - mtmsrd r8 - - ld r5, VCPU_MSR(r9) - rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 - beq 1f /* TM not active in guest. */ - - std r1, HSTATE_HOST_R1(r13) - li r3, TM_CAUSE_KVM_RESCHED - -BEGIN_FTR_SECTION - /* Emulation of the treclaim instruction needs TEXASR before treclaim */ - mfspr r6, SPRN_TEXASR - std r6, VCPU_ORIG_TEXASR(r9) -END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) - - /* Clear the MSR RI since r1, r13 are all going to be foobar. */ - li r5, 0 - mtmsrd r5, 1 - - /* All GPRs are volatile at this point. */ - TRECLAIM(R3) - - /* Temporarily store r13 and r9 so we have some regs to play with */ - SET_SCRATCH0(r13) - GET_PACA(r13) - std r9, PACATMSCRATCH(r13) - ld r9, HSTATE_KVM_VCPU(r13) - - /* Get a few more GPRs free. */ - std r29, VCPU_GPRS_TM(29)(r9) - std r30, VCPU_GPRS_TM(30)(r9) - std r31, VCPU_GPRS_TM(31)(r9) - - /* Save away PPR and DSCR soon so don't run with user values. */ - mfspr r31, SPRN_PPR - HMT_MEDIUM - mfspr r30, SPRN_DSCR - ld r29, HSTATE_DSCR(r13) - mtspr SPRN_DSCR, r29 - - /* Save all but r9, r13 & r29-r31 */ - reg = 0 - .rept 29 - .if (reg != 9) && (reg != 13) - std reg, VCPU_GPRS_TM(reg)(r9) - .endif - reg = reg + 1 - .endr - /* ... now save r13 */ - GET_SCRATCH0(r4) - std r4, VCPU_GPRS_TM(13)(r9) - /* ... and save r9 */ - ld r4, PACATMSCRATCH(r13) - std r4, VCPU_GPRS_TM(9)(r9) - - /* Reload stack pointer and TOC. */ - ld r1, HSTATE_HOST_R1(r13) - ld r2, PACATOC(r13) - - /* Set MSR RI now we have r1 and r13 back. */ - li r5, MSR_RI - mtmsrd r5, 1 - - /* Save away checkpinted SPRs. */ - std r31, VCPU_PPR_TM(r9) - std r30, VCPU_DSCR_TM(r9) - mflr r5 - mfcr r6 - mfctr r7 - mfspr r8, SPRN_AMR - mfspr r10, SPRN_TAR - mfxer r11 - std r5, VCPU_LR_TM(r9) - stw r6, VCPU_CR_TM(r9) - std r7, VCPU_CTR_TM(r9) - std r8, VCPU_AMR_TM(r9) - std r10, VCPU_TAR_TM(r9) - std r11, VCPU_XER_TM(r9) - - /* Restore r12 as trap number. */ - lwz r12, VCPU_TRAP(r9) - - /* Save FP/VSX. */ - addi r3, r9, VCPU_FPRS_TM - bl store_fp_state - addi r3, r9, VCPU_VRS_TM - bl store_vr_state - mfspr r6, SPRN_VRSAVE - stw r6, VCPU_VRSAVE_TM(r9) -1: - /* - * We need to save these SPRs after the treclaim so that the software - * error code is recorded correctly in the TEXASR. Also the user may - * change these outside of a transaction, so they must always be - * context switched. - */ - mfspr r7, SPRN_TEXASR - std r7, VCPU_TEXASR(r9) -11: - mfspr r5, SPRN_TFHAR - mfspr r6, SPRN_TFIAR - std r5, VCPU_TFHAR(r9) - std r6, VCPU_TFIAR(r9) - - ld r0, PPC_LR_STKOFF(r1) - mtlr r0 - blr - /* * Restore transactional state and TM-related registers. * Called with r4 pointing to the vcpu struct. @@ -3381,119 +3263,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST) 9: ld r0, PPC_LR_STKOFF(r1) mtlr r0 blr - -kvmppc_restore_tm: - mflr r0 - std r0, PPC_LR_STKOFF(r1) - - /* Turn on TM/FP/VSX/VMX so we can restore them. */ - mfmsr r5 - li r6, MSR_TM >> 32 - sldi r6, r6, 32 - or r5, r5, r6 - ori r5, r5, MSR_FP - oris r5, r5, (MSR_VEC | MSR_VSX)@h - mtmsrd r5 - - /* - * The user may change these outside of a transaction, so they must - * always be context switched. - */ - ld r5, VCPU_TFHAR(r4) - ld r6, VCPU_TFIAR(r4) - ld r7, VCPU_TEXASR(r4) - mtspr SPRN_TFHAR, r5 - mtspr SPRN_TFIAR, r6 - mtspr SPRN_TEXASR, r7 - - ld r5, VCPU_MSR(r4) - rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 - beqlr /* TM not active in guest */ - std r1, HSTATE_HOST_R1(r13) - - /* Make sure the failure summary is set, otherwise we'll program check - * when we trechkpt. It's possible that this might have been not set - * on a kvmppc_set_one_reg() call but we shouldn't let this crash the - * host. - */ - oris r7, r7, (TEXASR_FS)@h - mtspr SPRN_TEXASR, r7 - - /* - * We need to load up the checkpointed state for the guest. - * We need to do this early as it will blow away any GPRs, VSRs and - * some SPRs. - */ - - mr r31, r4 - addi r3, r31, VCPU_FPRS_TM - bl load_fp_state - addi r3, r31, VCPU_VRS_TM - bl load_vr_state - mr r4, r31 - lwz r7, VCPU_VRSAVE_TM(r4) - mtspr SPRN_VRSAVE, r7 - - ld r5, VCPU_LR_TM(r4) - lwz r6, VCPU_CR_TM(r4) - ld r7, VCPU_CTR_TM(r4) - ld r8, VCPU_AMR_TM(r4) - ld r9, VCPU_TAR_TM(r4) - ld r10, VCPU_XER_TM(r4) - mtlr r5 - mtcr r6 - mtctr r7 - mtspr SPRN_AMR, r8 - mtspr SPRN_TAR, r9 - mtxer r10 - - /* - * Load up PPR and DSCR values but don't put them in the actual SPRs - * till the last moment to avoid running with userspace PPR and DSCR for - * too long. - */ - ld r29, VCPU_DSCR_TM(r4) - ld r30, VCPU_PPR_TM(r4) - - std r2, PACATMSCRATCH(r13) /* Save TOC */ - - /* Clear the MSR RI since r1, r13 are all going to be foobar. */ - li r5, 0 - mtmsrd r5, 1 - - /* Load GPRs r0-r28 */ - reg = 0 - .rept 29 - ld reg, VCPU_GPRS_TM(reg)(r31) - reg = reg + 1 - .endr - - mtspr SPRN_DSCR, r29 - mtspr SPRN_PPR, r30 - - /* Load final GPRs */ - ld 29, VCPU_GPRS_TM(29)(r31) - ld 30, VCPU_GPRS_TM(30)(r31) - ld 31, VCPU_GPRS_TM(31)(r31) - - /* TM checkpointed state is now setup. All GPRs are now volatile. */ - TRECHKPT - - /* Now let's get back the state we need. */ - HMT_MEDIUM - GET_PACA(r13) - ld r29, HSTATE_DSCR(r13) - mtspr SPRN_DSCR, r29 - ld r4, HSTATE_KVM_VCPU(r13) - ld r1, HSTATE_HOST_R1(r13) - ld r2, PACATMSCRATCH(r13) - - /* Set the MSR RI since we have our registers back. */ - li r5, MSR_RI - mtmsrd r5, 1 - ld r0, PPC_LR_STKOFF(r1) - mtlr r0 - blr #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ /* diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S new file mode 100644 index 000000000000..ba97789c41ca --- /dev/null +++ b/arch/powerpc/kvm/tm.S @@ -0,0 +1,279 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Derived from book3s_hv_rmhandlers.S, which is: + * + * Copyright 2011 Paul Mackerras, IBM Corp. + * + */ + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) + +/* + * Save transactional state and TM-related registers. + * Called with r9 pointing to the vcpu struct. + * This can modify all checkpointed registers, but + * restores r1, r2 and r9 (vcpu pointer) before exit. + */ +_GLOBAL(kvmppc_save_tm) + mflr r0 + std r0, PPC_LR_STKOFF(r1) + + /* Turn on TM. */ + mfmsr r8 + li r0, 1 + rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG + mtmsrd r8 + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + ld r5, VCPU_MSR(r9) + rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 + beq 1f /* TM not active in guest. */ +#endif + + std r1, HSTATE_HOST_R1(r13) + li r3, TM_CAUSE_KVM_RESCHED + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +BEGIN_FTR_SECTION + /* Emulation of the treclaim instruction needs TEXASR before treclaim */ + mfspr r6, SPRN_TEXASR + std r6, VCPU_ORIG_TEXASR(r9) +END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) +#endif + + /* Clear the MSR RI since r1, r13 are all going to be foobar. */ + li r5, 0 + mtmsrd r5, 1 + + /* All GPRs are volatile at this point. */ + TRECLAIM(R3) + + /* Temporarily store r13 and r9 so we have some regs to play with */ + SET_SCRATCH0(r13) + GET_PACA(r13) + std r9, PACATMSCRATCH(r13) +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + ld r9, HSTATE_KVM_VCPU(r13) +#endif + + /* Get a few more GPRs free. */ + std r29, VCPU_GPRS_TM(29)(r9) + std r30, VCPU_GPRS_TM(30)(r9) + std r31, VCPU_GPRS_TM(31)(r9) + + /* Save away PPR and DSCR soon so don't run with user values. */ + mfspr r31, SPRN_PPR + HMT_MEDIUM + mfspr r30, SPRN_DSCR +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + ld r29, HSTATE_DSCR(r13) + mtspr SPRN_DSCR, r29 +#endif + + /* Save all but r9, r13 & r29-r31 */ + reg = 0 + .rept 29 + .if (reg != 9) && (reg != 13) + std reg, VCPU_GPRS_TM(reg)(r9) + .endif + reg = reg + 1 + .endr + /* ... now save r13 */ + GET_SCRATCH0(r4) + std r4, VCPU_GPRS_TM(13)(r9) + /* ... and save r9 */ + ld r4, PACATMSCRATCH(r13) + std r4, VCPU_GPRS_TM(9)(r9) + + /* Reload stack pointer and TOC. */ + ld r1, HSTATE_HOST_R1(r13) + ld r2, PACATOC(r13) + + /* Set MSR RI now we have r1 and r13 back. */ + li r5, MSR_RI + mtmsrd r5, 1 + + /* Save away checkpinted SPRs. */ + std r31, VCPU_PPR_TM(r9) + std r30, VCPU_DSCR_TM(r9) + mflr r5 + mfcr r6 + mfctr r7 + mfspr r8, SPRN_AMR + mfspr r10, SPRN_TAR + mfxer r11 + std r5, VCPU_LR_TM(r9) + stw r6, VCPU_CR_TM(r9) + std r7, VCPU_CTR_TM(r9) + std r8, VCPU_AMR_TM(r9) + std r10, VCPU_TAR_TM(r9) + std r11, VCPU_XER_TM(r9) + + /* Restore r12 as trap number. */ + lwz r12, VCPU_TRAP(r9) + + /* Save FP/VSX. */ + addi r3, r9, VCPU_FPRS_TM + bl store_fp_state + addi r3, r9, VCPU_VRS_TM + bl store_vr_state + mfspr r6, SPRN_VRSAVE + stw r6, VCPU_VRSAVE_TM(r9) +1: + /* + * We need to save these SPRs after the treclaim so that the software + * error code is recorded correctly in the TEXASR. Also the user may + * change these outside of a transaction, so they must always be + * context switched. + */ + mfspr r7, SPRN_TEXASR + std r7, VCPU_TEXASR(r9) +11: + mfspr r5, SPRN_TFHAR + mfspr r6, SPRN_TFIAR + std r5, VCPU_TFHAR(r9) + std r6, VCPU_TFIAR(r9) + + ld r0, PPC_LR_STKOFF(r1) + mtlr r0 + blr + +/* + * Restore transactional state and TM-related registers. + * Called with r4 pointing to the vcpu struct. + * This potentially modifies all checkpointed registers. + * It restores r1, r2, r4 from the PACA. + */ +_GLOBAL(kvmppc_restore_tm) + mflr r0 + std r0, PPC_LR_STKOFF(r1) + + /* Turn on TM/FP/VSX/VMX so we can restore them. */ + mfmsr r5 + li r6, MSR_TM >> 32 + sldi r6, r6, 32 + or r5, r5, r6 + ori r5, r5, MSR_FP + oris r5, r5, (MSR_VEC | MSR_VSX)@h + mtmsrd r5 + + /* + * The user may change these outside of a transaction, so they must + * always be context switched. + */ + ld r5, VCPU_TFHAR(r4) + ld r6, VCPU_TFIAR(r4) + ld r7, VCPU_TEXASR(r4) + mtspr SPRN_TFHAR, r5 + mtspr SPRN_TFIAR, r6 + mtspr SPRN_TEXASR, r7 + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + ld r5, VCPU_MSR(r4) + rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 + beqlr /* TM not active in guest */ +#endif + std r1, HSTATE_HOST_R1(r13) + + /* Make sure the failure summary is set, otherwise we'll program check + * when we trechkpt. It's possible that this might have been not set + * on a kvmppc_set_one_reg() call but we shouldn't let this crash the + * host. + */ + oris r7, r7, (TEXASR_FS)@h + mtspr SPRN_TEXASR, r7 + + /* + * We need to load up the checkpointed state for the guest. + * We need to do this early as it will blow away any GPRs, VSRs and + * some SPRs. + */ + + mr r31, r4 + addi r3, r31, VCPU_FPRS_TM + bl load_fp_state + addi r3, r31, VCPU_VRS_TM + bl load_vr_state + mr r4, r31 + lwz r7, VCPU_VRSAVE_TM(r4) + mtspr SPRN_VRSAVE, r7 + + ld r5, VCPU_LR_TM(r4) + lwz r6, VCPU_CR_TM(r4) + ld r7, VCPU_CTR_TM(r4) + ld r8, VCPU_AMR_TM(r4) + ld r9, VCPU_TAR_TM(r4) + ld r10, VCPU_XER_TM(r4) + mtlr r5 + mtcr r6 + mtctr r7 + mtspr SPRN_AMR, r8 + mtspr SPRN_TAR, r9 + mtxer r10 + + /* + * Load up PPR and DSCR values but don't put them in the actual SPRs + * till the last moment to avoid running with userspace PPR and DSCR for + * too long. + */ + ld r29, VCPU_DSCR_TM(r4) + ld r30, VCPU_PPR_TM(r4) + + std r2, PACATMSCRATCH(r13) /* Save TOC */ + + /* Clear the MSR RI since r1, r13 are all going to be foobar. */ + li r5, 0 + mtmsrd r5, 1 + + /* Load GPRs r0-r28 */ + reg = 0 + .rept 29 + ld reg, VCPU_GPRS_TM(reg)(r31) + reg = reg + 1 + .endr + + mtspr SPRN_DSCR, r29 + mtspr SPRN_PPR, r30 + + /* Load final GPRs */ + ld 29, VCPU_GPRS_TM(29)(r31) + ld 30, VCPU_GPRS_TM(30)(r31) + ld 31, VCPU_GPRS_TM(31)(r31) + + /* TM checkpointed state is now setup. All GPRs are now volatile. */ + TRECHKPT + + /* Now let's get back the state we need. */ + HMT_MEDIUM + GET_PACA(r13) +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + ld r29, HSTATE_DSCR(r13) + mtspr SPRN_DSCR, r29 + ld r4, HSTATE_KVM_VCPU(r13) +#endif + ld r1, HSTATE_HOST_R1(r13) + ld r2, PACATMSCRATCH(r13) + + /* Set the MSR RI since we have our registers back. */ + li r5, MSR_RI + mtmsrd r5, 1 + ld r0, PPC_LR_STKOFF(r1) + mtlr r0 + blr +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ From 6f597c6b63b6f3675914b5ec8fcd008a58678650 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:01:48 +0800 Subject: [PATCH 31/63] KVM: PPC: Book3S PR: Add guest MSR parameter for kvmppc_save_tm()/kvmppc_restore_tm() HV KVM and PR KVM need different MSR source to indicate whether treclaim. or trecheckpoint. is necessary. This patch add new parameter (guest MSR) for these kvmppc_save_tm/ kvmppc_restore_tm() APIs: - For HV KVM, it is VCPU_MSR - For PR KVM, it is current host MSR or VCPU_SHADOW_SRR1 This enhancement enables these 2 APIs to be reused by PR KVM later. And the patch keeps HV KVM logic unchanged. This patch also reworks kvmppc_save_tm()/kvmppc_restore_tm() to have a clean ABI: r3 for vcpu and r4 for guest_msr. During kvmppc_save_tm/kvmppc_restore_tm(), the R1 need to be saved or restored. Currently the R1 is saved into HSTATE_HOST_R1. In PR KVM, we are going to add a C function wrapper for kvmppc_save_tm/kvmppc_restore_tm() where the R1 will be incremented with added stackframe and save into HSTATE_HOST_R1. There are several places in HV KVM to load HSTATE_HOST_R1 as R1, and we don't want to bring risk or confusion by TM code. This patch will use HSTATE_SCRATCH2 to save/restore R1 in kvmppc_save_tm/kvmppc_restore_tm() to avoid future confusion, since the r1 is actually a temporary/scratch value to be saved/stored. [paulus@ozlabs.org - rebased on top of 7b0e827c6970 ("KVM: PPC: Book3S HV: Factor fake-suspend handling out of kvmppc_save/restore_tm", 2018-05-30)] Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 33 +++++++----- arch/powerpc/kvm/tm.S | 71 +++++++++++++------------ 2 files changed, 57 insertions(+), 47 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 8e016598692e..75e3bbf8c957 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -793,7 +793,10 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ + mr r3, r4 + ld r4, VCPU_MSR(r3) bl kvmppc_restore_tm_hv + ld r4, HSTATE_KVM_VCPU(r13) 91: #endif @@ -1777,7 +1780,10 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ + mr r3, r9 + ld r4, VCPU_MSR(r3) bl kvmppc_save_tm_hv + ld r9, HSTATE_KVM_VCPU(r13) 91: #endif @@ -2680,7 +2686,8 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ - ld r9, HSTATE_KVM_VCPU(r13) + ld r3, HSTATE_KVM_VCPU(r13) + ld r4, VCPU_MSR(r3) bl kvmppc_save_tm_hv 91: #endif @@ -2799,7 +2806,10 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ + mr r3, r4 + ld r4, VCPU_MSR(r3) bl kvmppc_restore_tm_hv + ld r4, HSTATE_KVM_VCPU(r13) 91: #endif @@ -3120,9 +3130,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Save transactional state and TM-related registers. - * Called with r9 pointing to the vcpu struct. + * Called with r3 pointing to the vcpu struct and r4 containing + * the guest MSR value. * This can modify all checkpointed registers, but - * restores r1, r2 and r9 (vcpu pointer) before exit. + * restores r1 and r2 before exit. */ kvmppc_save_tm_hv: /* See if we need to handle fake suspend mode */ @@ -3205,9 +3216,10 @@ END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96) /* * Restore transactional state and TM-related registers. - * Called with r4 pointing to the vcpu struct. + * Called with r3 pointing to the vcpu struct + * and r4 containing the guest MSR value. * This potentially modifies all checkpointed registers. - * It restores r1, r2, r4 from the PACA. + * It restores r1 and r2 from the PACA. */ kvmppc_restore_tm_hv: /* @@ -3234,15 +3246,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST) * The user may change these outside of a transaction, so they must * always be context switched. */ - ld r5, VCPU_TFHAR(r4) - ld r6, VCPU_TFIAR(r4) - ld r7, VCPU_TEXASR(r4) + ld r5, VCPU_TFHAR(r3) + ld r6, VCPU_TFIAR(r3) + ld r7, VCPU_TEXASR(r3) mtspr SPRN_TFHAR, r5 mtspr SPRN_TFIAR, r6 mtspr SPRN_TEXASR, r7 - ld r5, VCPU_MSR(r4) - rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 + rldicl. r5, r4, 64 - MSR_TS_S_LG, 62 beqlr /* TM not active in guest */ /* Make sure the failure summary is set */ @@ -3255,10 +3266,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST) b 9f /* and return */ 10: stdu r1, -PPC_MIN_STKFRM(r1) /* guest is in transactional state, so simulate rollback */ - mr r3, r4 bl kvmhv_emulate_tm_rollback nop - ld r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */ addi r1, r1, PPC_MIN_STKFRM 9: ld r0, PPC_LR_STKOFF(r1) mtlr r0 diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S index ba97789c41ca..f027b5a0c0f0 100644 --- a/arch/powerpc/kvm/tm.S +++ b/arch/powerpc/kvm/tm.S @@ -26,9 +26,12 @@ /* * Save transactional state and TM-related registers. - * Called with r9 pointing to the vcpu struct. + * Called with: + * - r3 pointing to the vcpu struct + * - r4 points to the MSR with current TS bits: + * (For HV KVM, it is VCPU_MSR ; For PR KVM, it is host MSR). * This can modify all checkpointed registers, but - * restores r1, r2 and r9 (vcpu pointer) before exit. + * restores r1, r2 before exit. */ _GLOBAL(kvmppc_save_tm) mflr r0 @@ -40,20 +43,17 @@ _GLOBAL(kvmppc_save_tm) rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG mtmsrd r8 -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - ld r5, VCPU_MSR(r9) - rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 + rldicl. r4, r4, 64 - MSR_TS_S_LG, 62 beq 1f /* TM not active in guest. */ -#endif - std r1, HSTATE_HOST_R1(r13) - li r3, TM_CAUSE_KVM_RESCHED + std r1, HSTATE_SCRATCH2(r13) + std r3, HSTATE_SCRATCH1(r13) #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE BEGIN_FTR_SECTION /* Emulation of the treclaim instruction needs TEXASR before treclaim */ mfspr r6, SPRN_TEXASR - std r6, VCPU_ORIG_TEXASR(r9) + std r6, VCPU_ORIG_TEXASR(r3) END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) #endif @@ -61,6 +61,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) li r5, 0 mtmsrd r5, 1 + li r3, TM_CAUSE_KVM_RESCHED + /* All GPRs are volatile at this point. */ TRECLAIM(R3) @@ -68,9 +70,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) SET_SCRATCH0(r13) GET_PACA(r13) std r9, PACATMSCRATCH(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - ld r9, HSTATE_KVM_VCPU(r13) -#endif + ld r9, HSTATE_SCRATCH1(r13) /* Get a few more GPRs free. */ std r29, VCPU_GPRS_TM(29)(r9) @@ -102,7 +102,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) std r4, VCPU_GPRS_TM(9)(r9) /* Reload stack pointer and TOC. */ - ld r1, HSTATE_HOST_R1(r13) + ld r1, HSTATE_SCRATCH2(r13) ld r2, PACATOC(r13) /* Set MSR RI now we have r1 and r13 back. */ @@ -156,9 +156,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) /* * Restore transactional state and TM-related registers. - * Called with r4 pointing to the vcpu struct. + * Called with: + * - r3 pointing to the vcpu struct. + * - r4 is the guest MSR with desired TS bits: + * For HV KVM, it is VCPU_MSR + * For PR KVM, it is provided by caller * This potentially modifies all checkpointed registers. - * It restores r1, r2, r4 from the PACA. + * It restores r1, r2 from the PACA. */ _GLOBAL(kvmppc_restore_tm) mflr r0 @@ -177,19 +181,17 @@ _GLOBAL(kvmppc_restore_tm) * The user may change these outside of a transaction, so they must * always be context switched. */ - ld r5, VCPU_TFHAR(r4) - ld r6, VCPU_TFIAR(r4) - ld r7, VCPU_TEXASR(r4) + ld r5, VCPU_TFHAR(r3) + ld r6, VCPU_TFIAR(r3) + ld r7, VCPU_TEXASR(r3) mtspr SPRN_TFHAR, r5 mtspr SPRN_TFIAR, r6 mtspr SPRN_TEXASR, r7 -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - ld r5, VCPU_MSR(r4) + mr r5, r4 rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 beqlr /* TM not active in guest */ -#endif - std r1, HSTATE_HOST_R1(r13) + std r1, HSTATE_SCRATCH2(r13) /* Make sure the failure summary is set, otherwise we'll program check * when we trechkpt. It's possible that this might have been not set @@ -205,21 +207,21 @@ _GLOBAL(kvmppc_restore_tm) * some SPRs. */ - mr r31, r4 + mr r31, r3 addi r3, r31, VCPU_FPRS_TM bl load_fp_state addi r3, r31, VCPU_VRS_TM bl load_vr_state - mr r4, r31 - lwz r7, VCPU_VRSAVE_TM(r4) + mr r3, r31 + lwz r7, VCPU_VRSAVE_TM(r3) mtspr SPRN_VRSAVE, r7 - ld r5, VCPU_LR_TM(r4) - lwz r6, VCPU_CR_TM(r4) - ld r7, VCPU_CTR_TM(r4) - ld r8, VCPU_AMR_TM(r4) - ld r9, VCPU_TAR_TM(r4) - ld r10, VCPU_XER_TM(r4) + ld r5, VCPU_LR_TM(r3) + lwz r6, VCPU_CR_TM(r3) + ld r7, VCPU_CTR_TM(r3) + ld r8, VCPU_AMR_TM(r3) + ld r9, VCPU_TAR_TM(r3) + ld r10, VCPU_XER_TM(r3) mtlr r5 mtcr r6 mtctr r7 @@ -232,8 +234,8 @@ _GLOBAL(kvmppc_restore_tm) * till the last moment to avoid running with userspace PPR and DSCR for * too long. */ - ld r29, VCPU_DSCR_TM(r4) - ld r30, VCPU_PPR_TM(r4) + ld r29, VCPU_DSCR_TM(r3) + ld r30, VCPU_PPR_TM(r3) std r2, PACATMSCRATCH(r13) /* Save TOC */ @@ -265,9 +267,8 @@ _GLOBAL(kvmppc_restore_tm) #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE ld r29, HSTATE_DSCR(r13) mtspr SPRN_DSCR, r29 - ld r4, HSTATE_KVM_VCPU(r13) #endif - ld r1, HSTATE_HOST_R1(r13) + ld r1, HSTATE_SCRATCH2(r13) ld r2, PACATMSCRATCH(r13) /* Set the MSR RI since we have our registers back. */ From 7f386af7bdb1a45bb04fb02d7b751809d63e5b09 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:01:49 +0800 Subject: [PATCH 32/63] KVM: PPC: Book3S PR: Turn on FP/VSX/VMX MSR bits in kvmppc_save_tm() kvmppc_save_tm() invokes store_fp_state/store_vr_state(). So it is mandatory to turn on FP/VSX/VMX MSR bits for its execution, just like what kvmppc_restore_tm() did. Previously HV KVM has turned the bits on outside of function kvmppc_save_tm(). Now we include this bit change in kvmppc_save_tm() so that the logic is cleaner. And PR KVM can reuse it later. Signed-off-by: Simon Guo Reviewed-by: Paul Mackerras Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/tm.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S index f027b5a0c0f0..2760d7acd371 100644 --- a/arch/powerpc/kvm/tm.S +++ b/arch/powerpc/kvm/tm.S @@ -41,6 +41,8 @@ _GLOBAL(kvmppc_save_tm) mfmsr r8 li r0, 1 rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG + ori r8, r8, MSR_FP + oris r8, r8, (MSR_VEC | MSR_VSX)@h mtmsrd r8 rldicl. r4, r4, 64 - MSR_TS_S_LG, 62 From caa3be92bebc5b87a221900ac408aa99b0badf3d Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:01:50 +0800 Subject: [PATCH 33/63] KVM: PPC: Book3S PR: Add C function wrapper for _kvmppc_save/restore_tm() Currently __kvmppc_save/restore_tm() APIs can only be invoked from assembly function. This patch adds C function wrappers for them so that they can be safely called from C function. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/asm-prototypes.h | 6 ++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 6 +- arch/powerpc/kvm/tm.S | 94 ++++++++++++++++++++++- 3 files changed, 101 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index dfdcb2374c28..5da683bebc7f 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -141,7 +141,13 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip); void pnv_power9_force_smt4_catch(void); void pnv_power9_force_smt4_release(void); +/* Transaction memory related */ void tm_enable(void); void tm_disable(void); void tm_abort(uint8_t cause); + +struct kvm_vcpu; +void _kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); +void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); + #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 75e3bbf8c957..af631d8303f6 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -3138,12 +3138,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) kvmppc_save_tm_hv: /* See if we need to handle fake suspend mode */ BEGIN_FTR_SECTION - b kvmppc_save_tm + b __kvmppc_save_tm END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST) lbz r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */ cmpwi r0, 0 - beq kvmppc_save_tm + beq __kvmppc_save_tm /* The following code handles the fake_suspend = 1 case */ mflr r0 @@ -3228,7 +3228,7 @@ kvmppc_restore_tm_hv: * fake-suspend mode, or emulate a TM rollback. */ BEGIN_FTR_SECTION - b kvmppc_restore_tm + b __kvmppc_restore_tm END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST) mflr r0 std r0, PPC_LR_STKOFF(r1) diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S index 2760d7acd371..4a68dd4050a4 100644 --- a/arch/powerpc/kvm/tm.S +++ b/arch/powerpc/kvm/tm.S @@ -33,7 +33,7 @@ * This can modify all checkpointed registers, but * restores r1, r2 before exit. */ -_GLOBAL(kvmppc_save_tm) +_GLOBAL(__kvmppc_save_tm) mflr r0 std r0, PPC_LR_STKOFF(r1) @@ -156,6 +156,52 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) mtlr r0 blr +/* + * _kvmppc_save_tm_pr() is a wrapper around __kvmppc_save_tm(), so that it can + * be invoked from C function by PR KVM only. + */ +_GLOBAL(_kvmppc_save_tm_pr) + mflr r5 + std r5, PPC_LR_STKOFF(r1) + stdu r1, -SWITCH_FRAME_SIZE(r1) + SAVE_NVGPRS(r1) + + /* save MSR since TM/math bits might be impacted + * by __kvmppc_save_tm(). + */ + mfmsr r5 + SAVE_GPR(5, r1) + + /* also save DSCR/CR so that it can be recovered later */ + mfspr r6, SPRN_DSCR + SAVE_GPR(6, r1) + + mfcr r7 + stw r7, _CCR(r1) + + bl __kvmppc_save_tm + + ld r7, _CCR(r1) + mtcr r7 + + REST_GPR(6, r1) + mtspr SPRN_DSCR, r6 + + /* need preserve current MSR's MSR_TS bits */ + REST_GPR(5, r1) + mfmsr r6 + rldicl r6, r6, 64 - MSR_TS_S_LG, 62 + rldimi r5, r6, MSR_TS_S_LG, 63 - MSR_TS_T_LG + mtmsrd r5 + + REST_NVGPRS(r1) + addi r1, r1, SWITCH_FRAME_SIZE + ld r5, PPC_LR_STKOFF(r1) + mtlr r5 + blr + +EXPORT_SYMBOL_GPL(_kvmppc_save_tm_pr); + /* * Restore transactional state and TM-related registers. * Called with: @@ -166,7 +212,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) * This potentially modifies all checkpointed registers. * It restores r1, r2 from the PACA. */ -_GLOBAL(kvmppc_restore_tm) +_GLOBAL(__kvmppc_restore_tm) mflr r0 std r0, PPC_LR_STKOFF(r1) @@ -279,4 +325,48 @@ _GLOBAL(kvmppc_restore_tm) ld r0, PPC_LR_STKOFF(r1) mtlr r0 blr + +/* + * _kvmppc_restore_tm_pr() is a wrapper around __kvmppc_restore_tm(), so that it + * can be invoked from C function by PR KVM only. + */ +_GLOBAL(_kvmppc_restore_tm_pr) + mflr r5 + std r5, PPC_LR_STKOFF(r1) + stdu r1, -SWITCH_FRAME_SIZE(r1) + SAVE_NVGPRS(r1) + + /* save MSR to avoid TM/math bits change */ + mfmsr r5 + SAVE_GPR(5, r1) + + /* also save DSCR/CR so that it can be recovered later */ + mfspr r6, SPRN_DSCR + SAVE_GPR(6, r1) + + mfcr r7 + stw r7, _CCR(r1) + + bl __kvmppc_restore_tm + + ld r7, _CCR(r1) + mtcr r7 + + REST_GPR(6, r1) + mtspr SPRN_DSCR, r6 + + /* need preserve current MSR's MSR_TS bits */ + REST_GPR(5, r1) + mfmsr r6 + rldicl r6, r6, 64 - MSR_TS_S_LG, 62 + rldimi r5, r6, MSR_TS_S_LG, 63 - MSR_TS_T_LG + mtmsrd r5 + + REST_NVGPRS(r1) + addi r1, r1, SWITCH_FRAME_SIZE + ld r5, PPC_LR_STKOFF(r1) + mtlr r5 + blr + +EXPORT_SYMBOL_GPL(_kvmppc_restore_tm_pr); #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ From 25ddda07b6e55a12065631e20f7f1e198230502f Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:01:51 +0800 Subject: [PATCH 34/63] KVM: PPC: Book3S PR: Transition to Suspended state when injecting interrupt This patch simulates interrupt behavior per Power ISA while injecting interrupt in PR KVM: - When interrupt happens, transactional state should be suspended. kvmppc_mmu_book3s_64_reset_msr() will be invoked when injecting an interrupt. This patch performs this ISA logic in kvmppc_mmu_book3s_64_reset_msr(). Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index a93d719edc90..cf9d686e8162 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -38,7 +38,16 @@ static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu) { - kvmppc_set_msr(vcpu, vcpu->arch.intr_msr); + unsigned long msr = vcpu->arch.intr_msr; + unsigned long cur_msr = kvmppc_get_msr(vcpu); + + /* If transactional, change to suspend mode on IRQ delivery */ + if (MSR_TM_TRANSACTIONAL(cur_msr)) + msr |= MSR_TS_S; + else + msr |= cur_msr & MSR_TS_MASK; + + kvmppc_set_msr(vcpu, msr); } static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe( From 901938add3bd598bf641672a85e644ac07e77e9a Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:01:52 +0800 Subject: [PATCH 35/63] KVM: PPC: Book3S PR: Pass through MSR TM and TS bits to shadow_msr PowerPC TM functionality needs MSR TM/TS bits support in hardware level. Guest TM functionality can not be emulated with "fake" MSR (msr in magic page) TS bits. This patch syncs TM/TS bits in shadow_msr with the MSR value in magic page, so that the MSR TS value which guest sees is consistent with actual MSR bits running in guest. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_pr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index b1aff9f83ed0..0a892be0abcb 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -312,7 +312,12 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) ulong smsr = guest_msr; /* Guest MSR values */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE | + MSR_TM | MSR_TS_MASK; +#else smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE; +#endif /* Process MSR values */ smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE; /* External providers the guest reserved */ From 95757bfc72e9f08905bf6b68d5b8903db205e681 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:01:53 +0800 Subject: [PATCH 36/63] KVM: PPC: Book3S PR: Sync TM bits to shadow msr for problem state guest MSR TS bits can be modified with non-privileged instruction such as tbegin./tend. That means guest can change MSR value "silently" without notifying host. It is necessary to sync the TM bits to host so that host can calculate shadow msr correctly. Note, privileged mode in the guest will always fail transactions so we only take care of problem state mode in the guest. The logic is put into kvmppc_copy_from_svcpu() so that kvmppc_handle_exit_pr() can use correct MSR TM bits even when preemption occurs. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_pr.c | 73 ++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 0a892be0abcb..9369cd321417 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -182,10 +182,36 @@ void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu) svcpu_put(svcpu); } +static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) +{ + ulong guest_msr = kvmppc_get_msr(vcpu); + ulong smsr = guest_msr; + + /* Guest MSR values */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE | + MSR_TM | MSR_TS_MASK; +#else + smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE; +#endif + /* Process MSR values */ + smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE; + /* External providers the guest reserved */ + smsr |= (guest_msr & vcpu->arch.guest_owned_ext); + /* 64-bit Process MSR values */ +#ifdef CONFIG_PPC_BOOK3S_64 + smsr |= MSR_ISF | MSR_HV; +#endif + vcpu->arch.shadow_msr = smsr; +} + /* Copy data touched by real-mode code from shadow vcpu back to vcpu */ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu) { struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + ulong old_msr; +#endif /* * Maybe we were already preempted and synced the svcpu from @@ -228,6 +254,30 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu) to_book3s(vcpu)->vtb += get_vtb() - vcpu->arch.entry_vtb; if (cpu_has_feature(CPU_FTR_ARCH_207S)) vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic; + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * Unlike other MSR bits, MSR[TS]bits can be changed at guest without + * notifying host: + * modified by unprivileged instructions like "tbegin"/"tend"/ + * "tresume"/"tsuspend" in PR KVM guest. + * + * It is necessary to sync here to calculate a correct shadow_msr. + * + * privileged guest's tbegin will be failed at present. So we + * only take care of problem state guest. + */ + old_msr = kvmppc_get_msr(vcpu); + if (unlikely((old_msr & MSR_PR) && + (vcpu->arch.shadow_srr1 & (MSR_TS_MASK)) != + (old_msr & (MSR_TS_MASK)))) { + old_msr &= ~(MSR_TS_MASK); + old_msr |= (vcpu->arch.shadow_srr1 & (MSR_TS_MASK)); + kvmppc_set_msr_fast(vcpu, old_msr); + kvmppc_recalc_shadow_msr(vcpu); + } +#endif + svcpu->in_use = false; out: @@ -306,29 +356,6 @@ static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte) /*****************************************/ -static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) -{ - ulong guest_msr = kvmppc_get_msr(vcpu); - ulong smsr = guest_msr; - - /* Guest MSR values */ -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE | - MSR_TM | MSR_TS_MASK; -#else - smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE; -#endif - /* Process MSR values */ - smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE; - /* External providers the guest reserved */ - smsr |= (guest_msr & vcpu->arch.guest_owned_ext); - /* 64-bit Process MSR values */ -#ifdef CONFIG_PPC_BOOK3S_64 - smsr |= MSR_ISF | MSR_HV; -#endif - vcpu->arch.shadow_msr = smsr; -} - static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) { ulong old_msr = kvmppc_get_msr(vcpu); From 401a89e9375c011de4e3271d50f27648b734a7cb Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:01:54 +0800 Subject: [PATCH 37/63] KVM: PPC: Book3S PR: Implement RFID TM behavior to suppress change from S0 to N0 According to ISA specification for RFID, in MSR TM disabled and TS suspended state (S0), if the target MSR is TM disabled and TS state is inactive (N0), rfid should suppress this update. This patch makes the RFID emulation of PR KVM consistent with this. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_emulate.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 68d68983948e..2eb457bc7b6e 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -117,11 +117,28 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, case 19: switch (get_xop(inst)) { case OP_19_XOP_RFID: - case OP_19_XOP_RFI: + case OP_19_XOP_RFI: { + unsigned long srr1 = kvmppc_get_srr1(vcpu); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + unsigned long cur_msr = kvmppc_get_msr(vcpu); + + /* + * add rules to fit in ISA specification regarding TM + * state transistion in TM disable/Suspended state, + * and target TM state is TM inactive(00) state. (the + * change should be suppressed). + */ + if (((cur_msr & MSR_TM) == 0) && + ((srr1 & MSR_TM) == 0) && + MSR_TM_SUSPENDED(cur_msr) && + !MSR_TM_ACTIVE(srr1)) + srr1 |= MSR_TS_S; +#endif kvmppc_set_pc(vcpu, kvmppc_get_srr0(vcpu)); - kvmppc_set_msr(vcpu, kvmppc_get_srr1(vcpu)); + kvmppc_set_msr(vcpu, srr1); *advance = 0; break; + } default: emulated = EMULATE_FAIL; From 36383a0862b68fc14b63dd6c93c64f1f82b6e8a9 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:01:55 +0800 Subject: [PATCH 38/63] KVM: PPC: Book3S PR: Avoid changing TS bits when exiting guest PR KVM host usually runs with TM enabled in its host MSR value, and with non-transactional TS value. When a guest with TM active traps into PR KVM host, the rfid at the tail of kvmppc_interrupt_pr() will try to switch TS bits from S0 (Suspended & TM disabled) to N1 (Non-transactional & TM enabled). That will leads to TM Bad Thing interrupt. This patch manually sets target TS bits unchanged to avoid this exception. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_segment.S | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 93a180ceefad..98ccc7ec5d48 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -383,6 +383,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) */ PPC_LL r6, HSTATE_HOST_MSR(r13) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * We don't want to change MSR[TS] bits via rfi here. + * The actual TM handling logic will be in host with + * recovered DR/IR bits after HSTATE_VMHANDLER. + * And MSR_TM can be enabled in HOST_MSR so rfid may + * not suppress this change and can lead to exception. + * Manually set MSR to prevent TS state change here. + */ + mfmsr r7 + rldicl r7, r7, 64 - MSR_TS_S_LG, 62 + rldimi r6, r7, MSR_TS_S_LG, 63 - MSR_TS_T_LG +#endif PPC_LL r8, HSTATE_VMHANDLER(r13) #ifdef CONFIG_PPC64 From de7ad932190c6af17bc9075ddd40a084990c5eb3 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:01:56 +0800 Subject: [PATCH 39/63] KVM: PPC: Book3S PR: Add new kvmppc_copyto/from_vcpu_tm APIs This patch adds 2 new APIs: kvmppc_copyto_vcpu_tm() and kvmppc_copyfrom_vcpu_tm(). These 2 APIs will be used to copy from/to TM data between VCPU_TM/VCPU area. PR KVM will use these APIs for treclaim. or trechkpt. emulation. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_emulate.c | 41 +++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 2eb457bc7b6e..f81a921e0865 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -87,6 +87,47 @@ static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level) return true; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static inline void kvmppc_copyto_vcpu_tm(struct kvm_vcpu *vcpu) +{ + memcpy(&vcpu->arch.gpr_tm[0], &vcpu->arch.regs.gpr[0], + sizeof(vcpu->arch.gpr_tm)); + memcpy(&vcpu->arch.fp_tm, &vcpu->arch.fp, + sizeof(struct thread_fp_state)); + memcpy(&vcpu->arch.vr_tm, &vcpu->arch.vr, + sizeof(struct thread_vr_state)); + vcpu->arch.ppr_tm = vcpu->arch.ppr; + vcpu->arch.dscr_tm = vcpu->arch.dscr; + vcpu->arch.amr_tm = vcpu->arch.amr; + vcpu->arch.ctr_tm = vcpu->arch.regs.ctr; + vcpu->arch.tar_tm = vcpu->arch.tar; + vcpu->arch.lr_tm = vcpu->arch.regs.link; + vcpu->arch.cr_tm = vcpu->arch.cr; + vcpu->arch.xer_tm = vcpu->arch.regs.xer; + vcpu->arch.vrsave_tm = vcpu->arch.vrsave; +} + +static inline void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu) +{ + memcpy(&vcpu->arch.regs.gpr[0], &vcpu->arch.gpr_tm[0], + sizeof(vcpu->arch.regs.gpr)); + memcpy(&vcpu->arch.fp, &vcpu->arch.fp_tm, + sizeof(struct thread_fp_state)); + memcpy(&vcpu->arch.vr, &vcpu->arch.vr_tm, + sizeof(struct thread_vr_state)); + vcpu->arch.ppr = vcpu->arch.ppr_tm; + vcpu->arch.dscr = vcpu->arch.dscr_tm; + vcpu->arch.amr = vcpu->arch.amr_tm; + vcpu->arch.regs.ctr = vcpu->arch.ctr_tm; + vcpu->arch.tar = vcpu->arch.tar_tm; + vcpu->arch.regs.link = vcpu->arch.lr_tm; + vcpu->arch.cr = vcpu->arch.cr_tm; + vcpu->arch.regs.xer = vcpu->arch.xer_tm; + vcpu->arch.vrsave = vcpu->arch.vrsave_tm; +} + +#endif + int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { From 66c33e796cf9d5f7150bf4c701786d0527b594b6 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:01:57 +0800 Subject: [PATCH 40/63] KVM: PPC: Book3S PR: Add kvmppc_save/restore_tm_sprs() APIs This patch adds 2 new APIs, kvmppc_save_tm_sprs() and kvmppc_restore_tm_sprs(), for the purpose of TEXASR/TFIAR/TFHAR save/restore. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_pr.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 9369cd321417..92e467ebadf0 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "book3s.h" @@ -284,6 +285,27 @@ out: svcpu_put(svcpu); } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) +{ + tm_enable(); + vcpu->arch.tfhar = mfspr(SPRN_TFHAR); + vcpu->arch.texasr = mfspr(SPRN_TEXASR); + vcpu->arch.tfiar = mfspr(SPRN_TFIAR); + tm_disable(); +} + +static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) +{ + tm_enable(); + mtspr(SPRN_TFHAR, vcpu->arch.tfhar); + mtspr(SPRN_TEXASR, vcpu->arch.texasr); + mtspr(SPRN_TFIAR, vcpu->arch.tfiar); + tm_disable(); +} + +#endif + static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu) { int r = 1; /* Indicate we want to get back into the guest */ From 8d2e2fc5e082a7b3f858cefb6e65700f28d2955e Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:01:58 +0800 Subject: [PATCH 41/63] KVM: PPC: Book3S PR: Add transaction memory save/restore skeleton The transaction memory checkpoint area save/restore behavior is triggered when VCPU qemu process is switching out/into CPU, i.e. at kvmppc_core_vcpu_put_pr() and kvmppc_core_vcpu_load_pr(). MSR TM active state is determined by TS bits: active: 10(transactional) or 01 (suspended) inactive: 00 (non-transactional) We don't "fake" TM functionality for guest. We "sync" guest virtual MSR TM active state(10 or 01) with shadow MSR. That is to say, we don't emulate a transactional guest with a TM inactive MSR. TM SPR support(TFIAR/TFAR/TEXASR) has already been supported by commit 9916d57e64a4 ("KVM: PPC: Book3S PR: Expose TM registers"). Math register support (FPR/VMX/VSX) will be done at subsequent patch. Whether TM context need to be saved/restored can be determined by kvmppc_get_msr() TM active state: * TM active - save/restore TM context * TM inactive - no need to do so and only save/restore TM SPRs. Signed-off-by: Simon Guo Suggested-by: Paul Mackerras Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s.h | 9 +++++++++ arch/powerpc/include/asm/kvm_host.h | 1 - arch/powerpc/kvm/book3s_pr.c | 27 +++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 20d3d5a87296..fc15ad9dfc3b 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -257,6 +257,15 @@ extern int kvmppc_hcall_impl_pr(unsigned long cmd); extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd); extern void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu); extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu); + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu); +void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu); +#else +static inline void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu) {} +static inline void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu) {} +#endif + extern int kvm_irq_bypass; static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 8dc5e439b387..fa4efa7e88f7 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -627,7 +627,6 @@ struct kvm_vcpu_arch { struct thread_vr_state vr_tm; u32 vrsave_tm; /* also USPRG0 */ - #endif #ifdef CONFIG_KVM_EXIT_TIMING diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 92e467ebadf0..a14721f034fb 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "book3s.h" @@ -115,6 +116,8 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) if (kvmppc_is_split_real(vcpu)) kvmppc_fixup_split_real(vcpu); + + kvmppc_restore_tm_pr(vcpu); } static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) @@ -134,6 +137,7 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); + kvmppc_save_tm_pr(vcpu); /* Enable AIL if supported */ if (cpu_has_feature(CPU_FTR_HVMODE) && @@ -304,6 +308,29 @@ static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) tm_disable(); } +void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu) +{ + if (!(MSR_TM_ACTIVE(kvmppc_get_msr(vcpu)))) { + kvmppc_save_tm_sprs(vcpu); + return; + } + + preempt_disable(); + _kvmppc_save_tm_pr(vcpu, mfmsr()); + preempt_enable(); +} + +void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu) +{ + if (!MSR_TM_ACTIVE(kvmppc_get_msr(vcpu))) { + kvmppc_restore_tm_sprs(vcpu); + return; + } + + preempt_disable(); + _kvmppc_restore_tm_pr(vcpu, kvmppc_get_msr(vcpu)); + preempt_enable(); +} #endif static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu) From 13989b65ebb74c05c577dbbcc111e1fdd7da763a Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:01:59 +0800 Subject: [PATCH 42/63] KVM: PPC: Book3S PR: Add math support for PR KVM HTM The math registers will be saved into vcpu->arch.fp/vr and corresponding vcpu->arch.fp_tm/vr_tm area. We flush or giveup the math regs into vcpu->arch.fp/vr before saving transaction. After transaction is restored, the math regs will be loaded back into regs. If there is a FP/VEC/VSX unavailable exception during transaction active state, the math checkpoint content might be incorrect and we need to do treclaim./load the correct checkpoint val/trechkpt. sequence to retry the transaction. That will make our solution complicated. To solve this issue, we always make the hardware guest MSR math bits (shadow_msr) consistent with the MSR val which guest sees (kvmppc_get_msr()) when guest msr is with tm enabled. Then all FP/VEC/VSX unavailable exception can be delivered to guest and guest handles the exception by itself. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_pr.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index a14721f034fb..dcb577fde9cd 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -308,6 +308,28 @@ static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) tm_disable(); } +/* loadup math bits which is enabled at kvmppc_get_msr() but not enabled at + * hardware. + */ +static void kvmppc_handle_lost_math_exts(struct kvm_vcpu *vcpu) +{ + ulong exit_nr; + ulong ext_diff = (kvmppc_get_msr(vcpu) & ~vcpu->arch.guest_owned_ext) & + (MSR_FP | MSR_VEC | MSR_VSX); + + if (!ext_diff) + return; + + if (ext_diff == MSR_FP) + exit_nr = BOOK3S_INTERRUPT_FP_UNAVAIL; + else if (ext_diff == MSR_VEC) + exit_nr = BOOK3S_INTERRUPT_ALTIVEC; + else + exit_nr = BOOK3S_INTERRUPT_VSX; + + kvmppc_handle_ext(vcpu, exit_nr, ext_diff); +} + void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu) { if (!(MSR_TM_ACTIVE(kvmppc_get_msr(vcpu)))) { @@ -315,6 +337,8 @@ void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu) return; } + kvmppc_giveup_ext(vcpu, MSR_VSX); + preempt_disable(); _kvmppc_save_tm_pr(vcpu, mfmsr()); preempt_enable(); @@ -324,12 +348,18 @@ void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu) { if (!MSR_TM_ACTIVE(kvmppc_get_msr(vcpu))) { kvmppc_restore_tm_sprs(vcpu); + if (kvmppc_get_msr(vcpu) & MSR_TM) + kvmppc_handle_lost_math_exts(vcpu); return; } preempt_disable(); _kvmppc_restore_tm_pr(vcpu, kvmppc_get_msr(vcpu)); preempt_enable(); + + if (kvmppc_get_msr(vcpu) & MSR_TM) + kvmppc_handle_lost_math_exts(vcpu); + } #endif @@ -468,6 +498,11 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) /* Preload FPU if it's enabled */ if (kvmppc_get_msr(vcpu) & MSR_FP) kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (kvmppc_get_msr(vcpu) & MSR_TM) + kvmppc_handle_lost_math_exts(vcpu); +#endif } void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr) From 533082ae86e2f1ff6cb9eca7a25202a81fc0567e Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:02:00 +0800 Subject: [PATCH 43/63] KVM: PPC: Book3S PR: Emulate mtspr/mfspr using active TM SPRs The mfspr/mtspr on TM SPRs(TEXASR/TFIAR/TFHAR) are non-privileged instructions and can be executed by PR KVM guest in problem state without trapping into the host. We only emulate mtspr/mfspr texasr/tfiar/tfhar in guest PR=0 state. When we are emulating mtspr tm sprs in guest PR=0 state, the emulation result needs to be visible to guest PR=1 state. That is, the actual TM SPR val should be loaded into actual registers. We already flush TM SPRs into vcpu when switching out of CPU, and load TM SPRs when switching back. This patch corrects mfspr()/mtspr() emulation for TM SPRs to make the actual source/dest be the actual TM SPRs. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s.h | 1 + arch/powerpc/kvm/book3s_emulate.c | 58 ++++++++++++++++++++++----- arch/powerpc/kvm/book3s_pr.c | 2 +- 3 files changed, 50 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index fc15ad9dfc3b..43e8bb18c2d7 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -210,6 +210,7 @@ extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags); +extern void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac); extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index f81a921e0865..c4e3ec63f253 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -24,6 +24,7 @@ #include #include #include "book3s.h" +#include #define OP_19_XOP_RFID 18 #define OP_19_XOP_RFI 50 @@ -523,13 +524,38 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) break; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM case SPRN_TFHAR: - vcpu->arch.tfhar = spr_val; - break; case SPRN_TEXASR: - vcpu->arch.texasr = spr_val; - break; case SPRN_TFIAR: - vcpu->arch.tfiar = spr_val; + if (!cpu_has_feature(CPU_FTR_TM)) + break; + + if (!(kvmppc_get_msr(vcpu) & MSR_TM)) { + kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG); + emulated = EMULATE_AGAIN; + break; + } + + if (MSR_TM_ACTIVE(kvmppc_get_msr(vcpu)) && + !((MSR_TM_SUSPENDED(kvmppc_get_msr(vcpu))) && + (sprn == SPRN_TFHAR))) { + /* it is illegal to mtspr() TM regs in + * other than non-transactional state, with + * the exception of TFHAR in suspend state. + */ + kvmppc_core_queue_program(vcpu, SRR1_PROGTM); + emulated = EMULATE_AGAIN; + break; + } + + tm_enable(); + if (sprn == SPRN_TFHAR) + mtspr(SPRN_TFHAR, spr_val); + else if (sprn == SPRN_TEXASR) + mtspr(SPRN_TEXASR, spr_val); + else + mtspr(SPRN_TFIAR, spr_val); + tm_disable(); + break; #endif #endif @@ -676,13 +702,25 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val break; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM case SPRN_TFHAR: - *spr_val = vcpu->arch.tfhar; - break; case SPRN_TEXASR: - *spr_val = vcpu->arch.texasr; - break; case SPRN_TFIAR: - *spr_val = vcpu->arch.tfiar; + if (!cpu_has_feature(CPU_FTR_TM)) + break; + + if (!(kvmppc_get_msr(vcpu) & MSR_TM)) { + kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG); + emulated = EMULATE_AGAIN; + break; + } + + tm_enable(); + if (sprn == SPRN_TFHAR) + *spr_val = mfspr(SPRN_TFHAR); + else if (sprn == SPRN_TEXASR) + *spr_val = mfspr(SPRN_TEXASR); + else if (sprn == SPRN_TFIAR) + *spr_val = mfspr(SPRN_TFIAR); + tm_disable(); break; #endif #endif diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index dcb577fde9cd..c0f45c83f683 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -918,7 +918,7 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu) #ifdef CONFIG_PPC_BOOK3S_64 -static void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac) +void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac) { /* Inject the Interrupt Cause field and trigger a guest interrupt */ vcpu->arch.fscr &= ~(0xffULL << 56); From 5706340a339283fe60d55ddc72ee7728a571a834 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:02:01 +0800 Subject: [PATCH 44/63] KVM: PPC: Book3S PR: Always fail transactions in guest privileged state Currently the kernel doesn't use transaction memory. And there is an issue for privileged state in the guest that: tbegin/tsuspend/tresume/tabort TM instructions can impact MSR TM bits without trapping into the PR host. So following code will lead to a false mfmsr result: tbegin <- MSR bits update to Transaction active. beq <- failover handler branch mfmsr <- still read MSR bits from magic page with transaction inactive. It is not an issue for non-privileged guest state since its mfmsr is not patched with magic page and will always trap into the PR host. This patch will always fail tbegin attempt for privileged state in the guest, so that the above issue is prevented. It is benign since currently (guest) kernel doesn't initiate a transaction. Test case: https://github.com/justdoitqd/publicFiles/blob/master/test_tbegin_pr.c Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s.h | 2 ++ arch/powerpc/kvm/book3s_emulate.c | 40 +++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_pr.c | 11 +++++++- 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 43e8bb18c2d7..c1cea8222d51 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -262,9 +262,11 @@ extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu); void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu); +void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu); #else static inline void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu) {} static inline void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu) {} +static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {} #endif extern int kvm_irq_bypass; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index c4e3ec63f253..570339b03feb 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "book3s.h" #include @@ -48,6 +49,8 @@ #define OP_31_XOP_EIOIO 854 #define OP_31_XOP_SLBMFEE 915 +#define OP_31_XOP_TBEGIN 654 + /* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */ #define OP_31_XOP_DCBZ 1010 @@ -363,6 +366,43 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, break; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + case OP_31_XOP_TBEGIN: + { + if (!cpu_has_feature(CPU_FTR_TM)) + break; + + if (!(kvmppc_get_msr(vcpu) & MSR_TM)) { + kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG); + emulated = EMULATE_AGAIN; + break; + } + + if (!(kvmppc_get_msr(vcpu) & MSR_PR)) { + preempt_disable(); + vcpu->arch.cr = (CR0_TBEGIN_FAILURE | + (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT))); + + vcpu->arch.texasr = (TEXASR_FS | TEXASR_EXACT | + (((u64)(TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT)) + << TEXASR_FC_LG)); + + if ((inst >> 21) & 0x1) + vcpu->arch.texasr |= TEXASR_ROT; + + if (kvmppc_get_msr(vcpu) & MSR_HV) + vcpu->arch.texasr |= TEXASR_HV; + + vcpu->arch.tfhar = kvmppc_get_pc(vcpu) + 4; + vcpu->arch.tfiar = kvmppc_get_pc(vcpu); + + kvmppc_restore_tm_sprs(vcpu); + preempt_enable(); + } else + emulated = EMULATE_FAIL; + break; + } +#endif default: emulated = EMULATE_FAIL; } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index c0f45c83f683..cc26be87e3b8 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -206,6 +206,15 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) /* 64-bit Process MSR values */ #ifdef CONFIG_PPC_BOOK3S_64 smsr |= MSR_ISF | MSR_HV; +#endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * in guest privileged state, we want to fail all TM transactions. + * So disable MSR TM bit so that all tbegin. will be able to be + * trapped into host. + */ + if (!(guest_msr & MSR_PR)) + smsr &= ~MSR_TM; #endif vcpu->arch.shadow_msr = smsr; } @@ -299,7 +308,7 @@ static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) tm_disable(); } -static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) +void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) { tm_enable(); mtspr(SPRN_TFHAR, vcpu->arch.tfhar); From 19c585eb45e360db43790c6724a3ea929ea03de3 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:02:02 +0800 Subject: [PATCH 45/63] KVM: PPC: Book3S PR: Restore NV regs after emulating mfspr from TM SPRs Currently kvmppc_handle_fac() will not update NV GPRs and thus it can return with GUEST_RESUME. However PR KVM guest always disables MSR_TM bit in privileged state. If PR privileged-state guest is trying to read TM SPRs, it will trigger TM facility unavailable exception and fall into kvmppc_handle_fac(). Then the emulation will be done by kvmppc_core_emulate_mfspr_pr(). The mfspr instruction can include a RT with NV reg. So it is necessary to restore NV GPRs at this case, to reflect the update to NV RT. This patch make kvmppc_handle_fac() return GUEST_RESUME_NV for TM facility unavailable exceptions in guest privileged state. Signed-off-by: Simon Guo Reviewed-by: Paul Mackerras Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_pr.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index cc26be87e3b8..bb9209947db9 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -989,6 +989,18 @@ static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac) break; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Since we disabled MSR_TM at privilege state, the mfspr instruction + * for TM spr can trigger TM fac unavailable. In this case, the + * emulation is handled by kvmppc_emulate_fac(), which invokes + * kvmppc_emulate_mfspr() finally. But note the mfspr can include + * RT for NV registers. So it need to restore those NV reg to reflect + * the update. + */ + if ((fac == FSCR_TM_LG) && !(kvmppc_get_msr(vcpu) & MSR_PR)) + return RESUME_GUEST_NV; +#endif + return RESUME_GUEST; } @@ -1350,8 +1362,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, } #ifdef CONFIG_PPC_BOOK3S_64 case BOOK3S_INTERRUPT_FAC_UNAVAIL: - kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56); - r = RESUME_GUEST; + r = kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56); break; #endif case BOOK3S_INTERRUPT_MACHINE_CHECK: From 03c81682a90b3fc3434d3e33f3f4c7f1f39d65cd Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:02:03 +0800 Subject: [PATCH 46/63] KVM: PPC: Book3S PR: Add emulation for treclaim. This patch adds support for "treclaim." emulation when PR KVM guest executes treclaim. and traps to host. We will firstly do treclaim. and save TM checkpoint. Then it is necessary to update vcpu current reg content with checkpointed vals. When rfid into guest again, those vcpu current reg content (now the checkpoint vals) will be loaded into regs. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_emulate.c | 76 +++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 570339b03feb..04c29e01b391 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -51,6 +51,8 @@ #define OP_31_XOP_TBEGIN 654 +#define OP_31_XOP_TRECLAIM 942 + /* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */ #define OP_31_XOP_DCBZ 1010 @@ -130,6 +132,46 @@ static inline void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu) vcpu->arch.vrsave = vcpu->arch.vrsave_tm; } +static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val) +{ + unsigned long guest_msr = kvmppc_get_msr(vcpu); + int fc_val = ra_val ? ra_val : 1; + + /* CR0 = 0 | MSR[TS] | 0 */ + vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) | + (((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1)) + << CR0_SHIFT); + + preempt_disable(); + kvmppc_save_tm_pr(vcpu); + kvmppc_copyfrom_vcpu_tm(vcpu); + + tm_enable(); + vcpu->arch.texasr = mfspr(SPRN_TEXASR); + /* failure recording depends on Failure Summary bit */ + if (!(vcpu->arch.texasr & TEXASR_FS)) { + vcpu->arch.texasr &= ~TEXASR_FC; + vcpu->arch.texasr |= ((u64)fc_val << TEXASR_FC_LG); + + vcpu->arch.texasr &= ~(TEXASR_PR | TEXASR_HV); + if (kvmppc_get_msr(vcpu) & MSR_PR) + vcpu->arch.texasr |= TEXASR_PR; + + if (kvmppc_get_msr(vcpu) & MSR_HV) + vcpu->arch.texasr |= TEXASR_HV; + + vcpu->arch.tfiar = kvmppc_get_pc(vcpu); + mtspr(SPRN_TEXASR, vcpu->arch.texasr); + mtspr(SPRN_TFIAR, vcpu->arch.tfiar); + } + tm_disable(); + /* + * treclaim need quit to non-transactional state. + */ + guest_msr &= ~(MSR_TS_MASK); + kvmppc_set_msr(vcpu, guest_msr); + preempt_enable(); +} #endif int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -402,6 +444,40 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, emulated = EMULATE_FAIL; break; } + case OP_31_XOP_TRECLAIM: + { + ulong guest_msr = kvmppc_get_msr(vcpu); + unsigned long ra_val = 0; + + if (!cpu_has_feature(CPU_FTR_TM)) + break; + + if (!(kvmppc_get_msr(vcpu) & MSR_TM)) { + kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG); + emulated = EMULATE_AGAIN; + break; + } + + /* generate interrupts based on priorities */ + if (guest_msr & MSR_PR) { + /* Privileged Instruction type Program Interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV); + emulated = EMULATE_AGAIN; + break; + } + + if (!MSR_TM_ACTIVE(guest_msr)) { + /* TM bad thing interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGTM); + emulated = EMULATE_AGAIN; + break; + } + + if (ra) + ra_val = kvmppc_get_gpr(vcpu, ra); + kvmppc_emulate_treclaim(vcpu, ra_val); + break; + } #endif default: emulated = EMULATE_FAIL; From e32c53d1cf0022af180548474f4c29c189d37d93 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:02:04 +0800 Subject: [PATCH 47/63] KVM: PPC: Book3S PR: Add emulation for trechkpt. This patch adds host emulation when guest PR KVM executes "trechkpt.", which is a privileged instruction and will trap into host. We firstly copy vcpu ongoing content into vcpu tm checkpoint content, then perform kvmppc_restore_tm_pr() to do trechkpt. with updated vcpu tm checkpoint values. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s.h | 2 + arch/powerpc/kvm/book3s_emulate.c | 61 +++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_pr.c | 2 +- 3 files changed, 64 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index c1cea8222d51..2940de7bac08 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -262,10 +262,12 @@ extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu); void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu); +void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu); void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu); #else static inline void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu) {} static inline void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu) {} +static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) {} static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {} #endif diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 04c29e01b391..b7530cf58834 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -52,6 +52,7 @@ #define OP_31_XOP_TBEGIN 654 #define OP_31_XOP_TRECLAIM 942 +#define OP_31_XOP_TRCHKPT 1006 /* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */ #define OP_31_XOP_DCBZ 1010 @@ -172,6 +173,29 @@ static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val) kvmppc_set_msr(vcpu, guest_msr); preempt_enable(); } + +static void kvmppc_emulate_trchkpt(struct kvm_vcpu *vcpu) +{ + unsigned long guest_msr = kvmppc_get_msr(vcpu); + + preempt_disable(); + /* + * need flush FP/VEC/VSX to vcpu save area before + * copy. + */ + kvmppc_giveup_ext(vcpu, MSR_VSX); + kvmppc_copyto_vcpu_tm(vcpu); + kvmppc_save_tm_sprs(vcpu); + + /* + * as a result of trecheckpoint. set TS to suspended. + */ + guest_msr &= ~(MSR_TS_MASK); + guest_msr |= MSR_TS_S; + kvmppc_set_msr(vcpu, guest_msr); + kvmppc_restore_tm_pr(vcpu); + preempt_enable(); +} #endif int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -478,6 +502,43 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_emulate_treclaim(vcpu, ra_val); break; } + case OP_31_XOP_TRCHKPT: + { + ulong guest_msr = kvmppc_get_msr(vcpu); + unsigned long texasr; + + if (!cpu_has_feature(CPU_FTR_TM)) + break; + + if (!(kvmppc_get_msr(vcpu) & MSR_TM)) { + kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG); + emulated = EMULATE_AGAIN; + break; + } + + /* generate interrupt based on priorities */ + if (guest_msr & MSR_PR) { + /* Privileged Instruction type Program Intr */ + kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV); + emulated = EMULATE_AGAIN; + break; + } + + tm_enable(); + texasr = mfspr(SPRN_TEXASR); + tm_disable(); + + if (MSR_TM_ACTIVE(guest_msr) || + !(texasr & (TEXASR_FS))) { + /* TM bad thing interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGTM); + emulated = EMULATE_AGAIN; + break; + } + + kvmppc_emulate_trchkpt(vcpu); + break; + } #endif default: emulated = EMULATE_FAIL; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index bb9209947db9..a275f8b3a4a0 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -299,7 +299,7 @@ out: } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) +void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) { tm_enable(); vcpu->arch.tfhar = mfspr(SPRN_TFHAR); From 26798f88d58dff1b61abf04becf5055e6f860d4f Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:02:05 +0800 Subject: [PATCH 48/63] KVM: PPC: Book3S PR: Add emulation for tabort. in privileged state Currently privileged-state guest will be run with TM disabled. Although the privileged-state guest cannot initiate a new transaction, it can use tabort to terminate its problem state's transaction. So it is still necessary to emulate tabort. for privileged-state guest. Tested with: https://github.com/justdoitqd/publicFiles/blob/master/test_tabort.c Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_emulate.c | 68 +++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index b7530cf58834..34f910e03972 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -50,6 +50,7 @@ #define OP_31_XOP_SLBMFEE 915 #define OP_31_XOP_TBEGIN 654 +#define OP_31_XOP_TABORT 910 #define OP_31_XOP_TRECLAIM 942 #define OP_31_XOP_TRCHKPT 1006 @@ -196,6 +197,47 @@ static void kvmppc_emulate_trchkpt(struct kvm_vcpu *vcpu) kvmppc_restore_tm_pr(vcpu); preempt_enable(); } + +/* emulate tabort. at guest privilege state */ +static void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) +{ + /* currently we only emulate tabort. but no emulation of other + * tabort variants since there is no kernel usage of them at + * present. + */ + unsigned long guest_msr = kvmppc_get_msr(vcpu); + + preempt_disable(); + tm_enable(); + tm_abort(ra_val); + + /* CR0 = 0 | MSR[TS] | 0 */ + vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) | + (((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1)) + << CR0_SHIFT); + + vcpu->arch.texasr = mfspr(SPRN_TEXASR); + /* failure recording depends on Failure Summary bit, + * and tabort will be treated as nops in non-transactional + * state. + */ + if (!(vcpu->arch.texasr & TEXASR_FS) && + MSR_TM_ACTIVE(guest_msr)) { + vcpu->arch.texasr &= ~(TEXASR_PR | TEXASR_HV); + if (guest_msr & MSR_PR) + vcpu->arch.texasr |= TEXASR_PR; + + if (guest_msr & MSR_HV) + vcpu->arch.texasr |= TEXASR_HV; + + vcpu->arch.tfiar = kvmppc_get_pc(vcpu); + mtspr(SPRN_TEXASR, vcpu->arch.texasr); + mtspr(SPRN_TFIAR, vcpu->arch.tfiar); + } + tm_disable(); + preempt_enable(); +} + #endif int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -468,6 +510,32 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, emulated = EMULATE_FAIL; break; } + case OP_31_XOP_TABORT: + { + ulong guest_msr = kvmppc_get_msr(vcpu); + unsigned long ra_val = 0; + + if (!cpu_has_feature(CPU_FTR_TM)) + break; + + if (!(kvmppc_get_msr(vcpu) & MSR_TM)) { + kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG); + emulated = EMULATE_AGAIN; + break; + } + + /* only emulate for privilege guest, since problem state + * guest can run with TM enabled and we don't expect to + * trap at here for that case. + */ + WARN_ON(guest_msr & MSR_PR); + + if (ra) + ra_val = kvmppc_get_gpr(vcpu, ra); + + kvmppc_emulate_tabort(vcpu, ra_val); + break; + } case OP_31_XOP_TRECLAIM: { ulong guest_msr = kvmppc_get_msr(vcpu); From 68ab07b985764ec5be816e7054a84b7ad121afc7 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:02:06 +0800 Subject: [PATCH 49/63] KVM: PPC: Book3S PR: Add guard code to prevent returning to guest with PR=0 and Transactional state Currently PR KVM doesn't support transaction memory in guest privileged state. This patch adds a check at setting guest msr, so that we can never return to guest with PR=0 and TS=0b10. A tabort will be emulated to indicate this and fail transaction immediately. [paulus@ozlabs.org - don't change the TM_CAUSE_MISC definition, instead use TM_CAUSE_KVM_FAC_UNAV.] Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s.h | 6 ++++++ arch/powerpc/kvm/book3s_emulate.c | 2 +- arch/powerpc/kvm/book3s_pr.c | 13 ++++++++++++- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 4ad5e287b8bc..14ef03501d21 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -31,4 +31,10 @@ extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, extern int kvmppc_book3s_init_pr(void); extern void kvmppc_book3s_exit_pr(void); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val); +#else +static inline void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) {} +#endif + #endif diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 34f910e03972..67d0fb40e8b2 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -199,7 +199,7 @@ static void kvmppc_emulate_trchkpt(struct kvm_vcpu *vcpu) } /* emulate tabort. at guest privilege state */ -static void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) +void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) { /* currently we only emulate tabort. but no emulation of other * tabort variants since there is no kernel usage of them at diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index a275f8b3a4a0..ad0a2ee8d8b1 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -446,12 +446,23 @@ static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte) static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) { - ulong old_msr = kvmppc_get_msr(vcpu); + ulong old_msr; #ifdef EXIT_DEBUG printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); #endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* We should never target guest MSR to TS=10 && PR=0, + * since we always fail transaction for guest privilege + * state. + */ + if (!(msr & MSR_PR) && MSR_TM_TRANSACTIONAL(msr)) + kvmppc_emulate_tabort(vcpu, + TM_CAUSE_KVM_FAC_UNAV | TM_CAUSE_PERSISTENT); +#endif + + old_msr = kvmppc_get_msr(vcpu); msr &= to_book3s(vcpu)->msr_mask; kvmppc_set_msr_fast(vcpu, msr); kvmppc_recalc_shadow_msr(vcpu); From 7284ca8a5eaee311d2e4aec73b2df9bd57e0cdcb Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:02:07 +0800 Subject: [PATCH 50/63] KVM: PPC: Book3S PR: Support TAR handling for PR KVM HTM Currently guest kernel doesn't handle TAR facility unavailable and it always runs with TAR bit on. PR KVM will lazily enable TAR. TAR is not a frequent-use register and it is not included in SVCPU struct. Due to the above, the checkpointed TAR val might be a bogus TAR val. To solve this issue, we will make vcpu->arch.fscr tar bit consistent with shadow_fscr when TM is enabled. At the end of emulating treclaim., the correct TAR val need to be loaded into the register if FSCR_TAR bit is on. At the beginning of emulating trechkpt., TAR needs to be flushed so that the right tar val can be copied into tar_tm. Tested with: tools/testing/selftests/powerpc/tm/tm-tar tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar (remove DSCR/PPR related testing). Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s.h | 2 ++ arch/powerpc/kvm/book3s_emulate.c | 4 ++++ arch/powerpc/kvm/book3s_pr.c | 23 ++++++++++++++++++----- arch/powerpc/kvm/tm.S | 16 ++++++++++++++-- 4 files changed, 38 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 2940de7bac08..1f345a0b6ba2 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -271,6 +271,8 @@ static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) {} static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {} #endif +void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); + extern int kvm_irq_bypass; static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 67d0fb40e8b2..fdbc695038dc 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -173,6 +173,9 @@ static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val) guest_msr &= ~(MSR_TS_MASK); kvmppc_set_msr(vcpu, guest_msr); preempt_enable(); + + if (vcpu->arch.shadow_fscr & FSCR_TAR) + mtspr(SPRN_TAR, vcpu->arch.tar); } static void kvmppc_emulate_trchkpt(struct kvm_vcpu *vcpu) @@ -185,6 +188,7 @@ static void kvmppc_emulate_trchkpt(struct kvm_vcpu *vcpu) * copy. */ kvmppc_giveup_ext(vcpu, MSR_VSX); + kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); kvmppc_copyto_vcpu_tm(vcpu); kvmppc_save_tm_sprs(vcpu); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index ad0a2ee8d8b1..eaf0c4f03c47 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -55,7 +55,9 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, ulong msr); -static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); +#ifdef CONFIG_PPC_BOOK3S_64 +static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac); +#endif /* Some compatibility defines */ #ifdef CONFIG_PPC_BOOK3S_32 @@ -346,6 +348,7 @@ void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu) return; } + kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); kvmppc_giveup_ext(vcpu, MSR_VSX); preempt_disable(); @@ -357,8 +360,11 @@ void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu) { if (!MSR_TM_ACTIVE(kvmppc_get_msr(vcpu))) { kvmppc_restore_tm_sprs(vcpu); - if (kvmppc_get_msr(vcpu) & MSR_TM) + if (kvmppc_get_msr(vcpu) & MSR_TM) { kvmppc_handle_lost_math_exts(vcpu); + if (vcpu->arch.fscr & FSCR_TAR) + kvmppc_handle_fac(vcpu, FSCR_TAR_LG); + } return; } @@ -366,9 +372,11 @@ void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu) _kvmppc_restore_tm_pr(vcpu, kvmppc_get_msr(vcpu)); preempt_enable(); - if (kvmppc_get_msr(vcpu) & MSR_TM) + if (kvmppc_get_msr(vcpu) & MSR_TM) { kvmppc_handle_lost_math_exts(vcpu); - + if (vcpu->arch.fscr & FSCR_TAR) + kvmppc_handle_fac(vcpu, FSCR_TAR_LG); + } } #endif @@ -819,7 +827,7 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) } /* Give up facility (TAR / EBB / DSCR) */ -static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac) +void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac) { #ifdef CONFIG_PPC_BOOK3S_64 if (!(vcpu->arch.shadow_fscr & (1ULL << fac))) { @@ -1020,7 +1028,12 @@ void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr) if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) { /* TAR got dropped, drop it in shadow too */ kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); + } else if (!(vcpu->arch.fscr & FSCR_TAR) && (fscr & FSCR_TAR)) { + vcpu->arch.fscr = fscr; + kvmppc_handle_fac(vcpu, FSCR_TAR_LG); + return; } + vcpu->arch.fscr = fscr; } #endif diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S index 4a68dd4050a4..90e330f21356 100644 --- a/arch/powerpc/kvm/tm.S +++ b/arch/powerpc/kvm/tm.S @@ -172,15 +172,21 @@ _GLOBAL(_kvmppc_save_tm_pr) mfmsr r5 SAVE_GPR(5, r1) - /* also save DSCR/CR so that it can be recovered later */ + /* also save DSCR/CR/TAR so that it can be recovered later */ mfspr r6, SPRN_DSCR SAVE_GPR(6, r1) mfcr r7 stw r7, _CCR(r1) + mfspr r8, SPRN_TAR + SAVE_GPR(8, r1) + bl __kvmppc_save_tm + REST_GPR(8, r1) + mtspr SPRN_TAR, r8 + ld r7, _CCR(r1) mtcr r7 @@ -340,15 +346,21 @@ _GLOBAL(_kvmppc_restore_tm_pr) mfmsr r5 SAVE_GPR(5, r1) - /* also save DSCR/CR so that it can be recovered later */ + /* also save DSCR/CR/TAR so that it can be recovered later */ mfspr r6, SPRN_DSCR SAVE_GPR(6, r1) mfcr r7 stw r7, _CCR(r1) + mfspr r8, SPRN_TAR + SAVE_GPR(8, r1) + bl __kvmppc_restore_tm + REST_GPR(8, r1) + mtspr SPRN_TAR, r8 + ld r7, _CCR(r1) mtcr r7 From d234d68eb7464c9ebd11e870404e83d3e9348406 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:02:08 +0800 Subject: [PATCH 51/63] KVM: PPC: Book3S PR: Enable HTM for PR KVM for KVM_CHECK_EXTENSION ioctl With current patch set, PR KVM now supports HTM. So this patch turns it on for PR KVM. Tested with: https://github.com/justdoitqd/publicFiles/blob/master/test_kvm_htm_cap.c Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/powerpc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 05eccdc10fdd..b8247fac3e56 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -648,9 +648,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM case KVM_CAP_PPC_HTM: - r = hv_enabled && - (!!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM) || - cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)); + r = !!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM) || + (hv_enabled && cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)); break; #endif default: From b3cebfe8c1cadf1817939dcc3688a2504a69c662 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:02:09 +0800 Subject: [PATCH 52/63] KVM: PPC: Move vcpu_load/vcpu_put down to each ioctl case in kvm_arch_vcpu_ioctl Although we already have kvm_arch_vcpu_async_ioctl() which doesn't require ioctl to load vcpu, the sync ioctl code need to be cleaned up when CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL is not configured. This patch moves vcpu_load/vcpu_put down to each ioctl switch case so that each ioctl can decide to do vcpu_load/vcpu_put or not independently. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/powerpc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b8247fac3e56..c2c3477af746 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1980,16 +1980,16 @@ long kvm_arch_vcpu_ioctl(struct file *filp, void __user *argp = (void __user *)arg; long r; - vcpu_load(vcpu); - switch (ioctl) { case KVM_ENABLE_CAP: { struct kvm_enable_cap cap; r = -EFAULT; + vcpu_load(vcpu); if (copy_from_user(&cap, argp, sizeof(cap))) goto out; r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); + vcpu_put(vcpu); break; } @@ -1998,12 +1998,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp, { struct kvm_one_reg reg; r = -EFAULT; + vcpu_load(vcpu); if (copy_from_user(®, argp, sizeof(reg))) goto out; if (ioctl == KVM_SET_ONE_REG) r = kvm_vcpu_ioctl_set_one_reg(vcpu, ®); else r = kvm_vcpu_ioctl_get_one_reg(vcpu, ®); + vcpu_put(vcpu); break; } @@ -2011,9 +2013,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_DIRTY_TLB: { struct kvm_dirty_tlb dirty; r = -EFAULT; + vcpu_load(vcpu); if (copy_from_user(&dirty, argp, sizeof(dirty))) goto out; r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty); + vcpu_put(vcpu); break; } #endif @@ -2022,7 +2026,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } out: - vcpu_put(vcpu); return r; } From c8235c2891d0561960fd5d62fdda58cfb6e503b2 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:02:10 +0800 Subject: [PATCH 53/63] KVM: PPC: Remove load/put vcpu for KVM_GET/SET_ONE_REG ioctl Since the vcpu mutex locking/unlock has been moved out of vcpu_load() /vcpu_put(), KVM_GET_ONE_REG and KVM_SET_ONE_REG doesn't need to do ioctl with loading vcpu anymore. This patch removes vcpu_load()/vcpu_put() from KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctl. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/powerpc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index c2c3477af746..4cb377605167 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1998,14 +1998,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp, { struct kvm_one_reg reg; r = -EFAULT; - vcpu_load(vcpu); if (copy_from_user(®, argp, sizeof(reg))) goto out; if (ioctl == KVM_SET_ONE_REG) r = kvm_vcpu_ioctl_set_one_reg(vcpu, ®); else r = kvm_vcpu_ioctl_get_one_reg(vcpu, ®); - vcpu_put(vcpu); break; } From 8f04412699e42c7fb5f68a847b1531ad3211b523 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:02:11 +0800 Subject: [PATCH 54/63] KVM: PPC: Book3S: Remove load/put vcpu for KVM_GET_REGS/KVM_SET_REGS In both HV and PR KVM, the KVM_SET_REGS/KVM_GET_REGS ioctl should be able to perform without the vcpu loaded. Since the vcpu mutex locking/unlock has been moved out of vcpu_load() /vcpu_put(), KVM_SET_REGS/KVM_GET_REGS don't need to do ioctl with the vcpu loaded anymore. This patch removes vcpu_load()/vcpu_put() from KVM_SET_REGS/KVM_GET_REGS ioctl. Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 320cdcf84591..309c8cf8fed4 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -509,8 +509,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; - vcpu_load(vcpu); - regs->pc = kvmppc_get_pc(vcpu); regs->cr = kvmppc_get_cr(vcpu); regs->ctr = kvmppc_get_ctr(vcpu); @@ -532,7 +530,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) regs->gpr[i] = kvmppc_get_gpr(vcpu, i); - vcpu_put(vcpu); return 0; } @@ -540,8 +537,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; - vcpu_load(vcpu); - kvmppc_set_pc(vcpu, regs->pc); kvmppc_set_cr(vcpu, regs->cr); kvmppc_set_ctr(vcpu, regs->ctr); @@ -562,7 +557,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) kvmppc_set_gpr(vcpu, i, regs->gpr[i]); - vcpu_put(vcpu); return 0; } From deeb879de955f4e04be3d43c33bc311087f063bf Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:02:12 +0800 Subject: [PATCH 55/63] KVM: PPC: Book3S PR: Enable kvmppc_get/set_one_reg_pr() for HTM registers We need to migrate PR KVM during transaction and userspace will use kvmppc_get_one_reg_pr()/kvmppc_set_one_reg_pr() APIs to get/set transaction checkpoint state. This patch adds support for that. So far, QEMU on PR KVM doesn't fully function for migration but the savevm/loadvm can be done against a RHEL72 guest. During savevm/ loadvm procedure, the kvm ioctls will be invoked as well. Test has been performed to savevm/loadvm for a guest running a HTM test program: https://github.com/justdoitqd/publicFiles/blob/master/test-tm-mig.c Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_pr.c | 133 +++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index eaf0c4f03c47..e96ead92ae48 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1539,6 +1539,73 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, else *val = get_reg_val(id, 0); break; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + case KVM_REG_PPC_TFHAR: + *val = get_reg_val(id, vcpu->arch.tfhar); + break; + case KVM_REG_PPC_TFIAR: + *val = get_reg_val(id, vcpu->arch.tfiar); + break; + case KVM_REG_PPC_TEXASR: + *val = get_reg_val(id, vcpu->arch.texasr); + break; + case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31: + *val = get_reg_val(id, + vcpu->arch.gpr_tm[id-KVM_REG_PPC_TM_GPR0]); + break; + case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63: + { + int i, j; + + i = id - KVM_REG_PPC_TM_VSR0; + if (i < 32) + for (j = 0; j < TS_FPRWIDTH; j++) + val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j]; + else { + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + val->vval = vcpu->arch.vr_tm.vr[i-32]; + else + r = -ENXIO; + } + break; + } + case KVM_REG_PPC_TM_CR: + *val = get_reg_val(id, vcpu->arch.cr_tm); + break; + case KVM_REG_PPC_TM_XER: + *val = get_reg_val(id, vcpu->arch.xer_tm); + break; + case KVM_REG_PPC_TM_LR: + *val = get_reg_val(id, vcpu->arch.lr_tm); + break; + case KVM_REG_PPC_TM_CTR: + *val = get_reg_val(id, vcpu->arch.ctr_tm); + break; + case KVM_REG_PPC_TM_FPSCR: + *val = get_reg_val(id, vcpu->arch.fp_tm.fpscr); + break; + case KVM_REG_PPC_TM_AMR: + *val = get_reg_val(id, vcpu->arch.amr_tm); + break; + case KVM_REG_PPC_TM_PPR: + *val = get_reg_val(id, vcpu->arch.ppr_tm); + break; + case KVM_REG_PPC_TM_VRSAVE: + *val = get_reg_val(id, vcpu->arch.vrsave_tm); + break; + case KVM_REG_PPC_TM_VSCR: + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + *val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]); + else + r = -ENXIO; + break; + case KVM_REG_PPC_TM_DSCR: + *val = get_reg_val(id, vcpu->arch.dscr_tm); + break; + case KVM_REG_PPC_TM_TAR: + *val = get_reg_val(id, vcpu->arch.tar_tm); + break; +#endif default: r = -EINVAL; break; @@ -1572,6 +1639,72 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_LPCR_64: kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val)); break; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + case KVM_REG_PPC_TFHAR: + vcpu->arch.tfhar = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TFIAR: + vcpu->arch.tfiar = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TEXASR: + vcpu->arch.texasr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31: + vcpu->arch.gpr_tm[id - KVM_REG_PPC_TM_GPR0] = + set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63: + { + int i, j; + + i = id - KVM_REG_PPC_TM_VSR0; + if (i < 32) + for (j = 0; j < TS_FPRWIDTH; j++) + vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j]; + else + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + vcpu->arch.vr_tm.vr[i-32] = val->vval; + else + r = -ENXIO; + break; + } + case KVM_REG_PPC_TM_CR: + vcpu->arch.cr_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_XER: + vcpu->arch.xer_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_LR: + vcpu->arch.lr_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_CTR: + vcpu->arch.ctr_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_FPSCR: + vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_AMR: + vcpu->arch.amr_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_PPR: + vcpu->arch.ppr_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_VRSAVE: + vcpu->arch.vrsave_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_VSCR: + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val); + else + r = -ENXIO; + break; + case KVM_REG_PPC_TM_DSCR: + vcpu->arch.dscr_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_TAR: + vcpu->arch.tar_tm = set_reg_val(id, *val); + break; +#endif default: r = -EINVAL; break; From b71dc519a993d10f5db416c82b174f60e644ac3a Mon Sep 17 00:00:00 2001 From: Cameron Kaiser Date: Tue, 5 Jun 2018 07:48:55 -0700 Subject: [PATCH 56/63] KVM: PPC: Book3S PR: Handle additional interrupt types This adds trivial handling for additional interrupt types that KVM-PR must support for proper virtualization on a POWER9 host in HPT mode, as a further prerequisite to enabling KVM-PR on that configuration. Signed-off-by: Cameron Kaiser Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_pr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e96ead92ae48..a3569165a970 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1253,10 +1253,13 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, case BOOK3S_INTERRUPT_EXTERNAL: case BOOK3S_INTERRUPT_EXTERNAL_LEVEL: case BOOK3S_INTERRUPT_EXTERNAL_HV: + case BOOK3S_INTERRUPT_H_VIRT: vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; + case BOOK3S_INTERRUPT_HMI: case BOOK3S_INTERRUPT_PERFMON: + case BOOK3S_INTERRUPT_SYSTEM_RESET: r = RESUME_GUEST; break; case BOOK3S_INTERRUPT_PROGRAM: From 916ccadccdcd8a0b7184dce37066a9fb2f9b4195 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 7 Jun 2018 18:04:37 +1000 Subject: [PATCH 57/63] KVM: PPC: Book3S PR: Fix MSR setting when delivering interrupts This makes sure that MSR "partial-function" bits are not transferred to SRR1 when delivering an interrupt. This was causing failures in guests running kernels that include commit f3d96e698ed0 ("powerpc/mm: Overhaul handling of bad page faults", 2017-07-19), which added code to check bits of SRR1 on instruction storage interrupts (ISIs) that indicate a bad page fault. The symptom was that a guest user program that handled a signal and attempted to return from the signal handler would get a SIGBUS signal and die. The code that generated ISIs and some other interrupts would previously set bits in the guest MSR to indicate the interrupt status and then call kvmppc_book3s_queue_irqprio(). This technique no longer works now that kvmppc_inject_interrupt() is masking off those bits. Instead we make kvmppc_core_queue_data_storage() and kvmppc_core_queue_inst_storage() call kvmppc_inject_interrupt() directly, and make sure that all the places that generate ISIs or DSIs call kvmppc_core_queue_{data,inst}_storage instead of kvmppc_book3s_queue_irqprio(). Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s.c | 13 +++++------ arch/powerpc/kvm/book3s_pr.c | 42 +++++++++++++++--------------------- 2 files changed, 22 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 309c8cf8fed4..edaf4720d156 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -134,7 +134,7 @@ void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) { kvmppc_unfixup_split_real(vcpu); kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu)); - kvmppc_set_srr1(vcpu, kvmppc_get_msr(vcpu) | flags); + kvmppc_set_srr1(vcpu, (kvmppc_get_msr(vcpu) & ~0x783f0000ul) | flags); kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec); vcpu->arch.mmu.reset_msr(vcpu); } @@ -256,18 +256,15 @@ void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar, { kvmppc_set_dar(vcpu, dar); kvmppc_set_dsisr(vcpu, flags); - kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); + kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE, 0); } -EXPORT_SYMBOL_GPL(kvmppc_core_queue_data_storage); /* used by kvm_hv */ +EXPORT_SYMBOL_GPL(kvmppc_core_queue_data_storage); void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags) { - u64 msr = kvmppc_get_msr(vcpu); - msr &= ~(SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT); - msr |= flags & (SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT); - kvmppc_set_msr_fast(vcpu, msr); - kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); + kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE, flags); } +EXPORT_SYMBOL_GPL(kvmppc_core_queue_inst_storage); static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index a3569165a970..e8036ddeded1 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -728,24 +728,20 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte.may_execute = !data; } - if (page_found == -ENOENT) { - /* Page not found in guest PTE entries */ - u64 ssrr1 = vcpu->arch.shadow_srr1; - u64 msr = kvmppc_get_msr(vcpu); - kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); - kvmppc_set_dsisr(vcpu, vcpu->arch.fault_dsisr); - kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL)); - kvmppc_book3s_queue_irqprio(vcpu, vec); - } else if (page_found == -EPERM) { - /* Storage protection */ - u32 dsisr = vcpu->arch.fault_dsisr; - u64 ssrr1 = vcpu->arch.shadow_srr1; - u64 msr = kvmppc_get_msr(vcpu); - kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); - dsisr = (dsisr & ~DSISR_NOHPTE) | DSISR_PROTFAULT; - kvmppc_set_dsisr(vcpu, dsisr); - kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL)); - kvmppc_book3s_queue_irqprio(vcpu, vec); + if (page_found == -ENOENT || page_found == -EPERM) { + /* Page not found in guest PTE entries, or protection fault */ + u64 flags; + + if (page_found == -EPERM) + flags = DSISR_PROTFAULT; + else + flags = DSISR_NOHPTE; + if (data) { + flags |= vcpu->arch.fault_dsisr & DSISR_ISSTORE; + kvmppc_core_queue_data_storage(vcpu, eaddr, flags); + } else { + kvmppc_core_queue_inst_storage(vcpu, flags); + } } else if (page_found == -EINVAL) { /* Page not found in guest SLB */ kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); @@ -1178,10 +1174,8 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); r = RESUME_GUEST; } else { - u64 msr = kvmppc_get_msr(vcpu); - msr |= shadow_srr1 & 0x58000000; - kvmppc_set_msr_fast(vcpu, msr); - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + kvmppc_core_queue_inst_storage(vcpu, + shadow_srr1 & 0x58000000); r = RESUME_GUEST; } break; @@ -1220,9 +1214,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); srcu_read_unlock(&vcpu->kvm->srcu, idx); } else { - kvmppc_set_dar(vcpu, dar); - kvmppc_set_dsisr(vcpu, fault_dsisr); - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + kvmppc_core_queue_data_storage(vcpu, dar, fault_dsisr); r = RESUME_GUEST; } break; From a50623fb57002207b9b2636e1d3bd5cf17a3e74f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 7 Jun 2018 18:06:21 +1000 Subject: [PATCH 58/63] KVM: PPC: Book3S PR: Fix failure status setting in treclaim. emulation The treclaim. emulation needs to record failure status in the TEXASR register if the transaction had not previously failed. However, the current code first does kvmppc_save_tm_pr() (which does a treclaim. itself) and then checks the failure summary bit in TEXASR after that. Since treclaim. itself causes transaction failure, the FS bit is always set, so we were never updating TEXASR with the failure cause supplied by the guest as the RA parameter to the treclaim. instruction. This caused the tm-unavailable test in tools/testing/selftests/powerpc/tm to fail. To fix this, we need to read TEXASR before calling kvmppc_save_tm_pr(), and base the final value of TEXASR on that value. Fixes: 03c81682a90b ("KVM: PPC: Book3S PR: Add emulation for treclaim.") Reviewed-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_emulate.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index fdbc695038dc..05cac5ea79c5 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -138,6 +138,7 @@ static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val) { unsigned long guest_msr = kvmppc_get_msr(vcpu); int fc_val = ra_val ? ra_val : 1; + uint64_t texasr; /* CR0 = 0 | MSR[TS] | 0 */ vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) | @@ -145,25 +146,26 @@ static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val) << CR0_SHIFT); preempt_disable(); + tm_enable(); + texasr = mfspr(SPRN_TEXASR); kvmppc_save_tm_pr(vcpu); kvmppc_copyfrom_vcpu_tm(vcpu); - tm_enable(); - vcpu->arch.texasr = mfspr(SPRN_TEXASR); /* failure recording depends on Failure Summary bit */ - if (!(vcpu->arch.texasr & TEXASR_FS)) { - vcpu->arch.texasr &= ~TEXASR_FC; - vcpu->arch.texasr |= ((u64)fc_val << TEXASR_FC_LG); + if (!(texasr & TEXASR_FS)) { + texasr &= ~TEXASR_FC; + texasr |= ((u64)fc_val << TEXASR_FC_LG) | TEXASR_FS; - vcpu->arch.texasr &= ~(TEXASR_PR | TEXASR_HV); + texasr &= ~(TEXASR_PR | TEXASR_HV); if (kvmppc_get_msr(vcpu) & MSR_PR) - vcpu->arch.texasr |= TEXASR_PR; + texasr |= TEXASR_PR; if (kvmppc_get_msr(vcpu) & MSR_HV) - vcpu->arch.texasr |= TEXASR_HV; + texasr |= TEXASR_HV; + vcpu->arch.texasr = texasr; vcpu->arch.tfiar = kvmppc_get_pc(vcpu); - mtspr(SPRN_TEXASR, vcpu->arch.texasr); + mtspr(SPRN_TEXASR, texasr); mtspr(SPRN_TFIAR, vcpu->arch.tfiar); } tm_disable(); From 4f169d21181faad87a6cdb288742e08c808ec0ef Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 7 Jun 2018 18:07:06 +1000 Subject: [PATCH 59/63] KVM: PPC: Book3S PR: Don't let PAPR guest set MSR hypervisor bit PAPR guests run in supervisor mode and should not be able to set the MSR HV (hypervisor mode) bit or clear the ME (machine check enable) bit by mtmsrd or any other means. To enforce this, we force MSR_HV off and MSR_ME on in kvmppc_set_msr_pr. Without this, the guest can appear to be in hypervisor mode to itself and to userspace. This has been observed to cause a crash in QEMU when it tries to deliver a system reset interrupt to the guest. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_pr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e8036ddeded1..5c99b84e0856 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -456,6 +456,10 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) { ulong old_msr; + /* For PAPR guest, make sure MSR reflects guest mode */ + if (vcpu->arch.papr_enabled) + msr = (msr & ~MSR_HV) | MSR_ME; + #ifdef EXIT_DEBUG printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); #endif From db96a04a86c73817b4584aa4fa2a3f60a9aa3c52 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 7 Jun 2018 18:08:02 +1000 Subject: [PATCH 60/63] KVM: PPC: Book3S PR: Enable use on POWER9 bare-metal hosts in HPT mode It turns out that PR KVM has no dependency on the format of HPTEs, because it uses functions pointed to by mmu_hash_ops which do all the formatting and interpretation of HPTEs. Thus we can allow PR KVM to load on POWER9 bare-metal hosts as long as they are running in HPT mode. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_pr.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 5c99b84e0856..c3b8006f0eac 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -2041,13 +2041,9 @@ static int kvmppc_core_check_processor_compat_pr(void) * PR KVM can work on POWER9 inside a guest partition * running in HPT mode. It can't work if we are using * radix translation (because radix provides no way for - * a process to have unique translations in quadrant 3) - * or in a bare-metal HPT-mode host (because POWER9 - * uses a modified HPTE format which the PR KVM code - * has not been adapted to use). + * a process to have unique translations in quadrant 3). */ - if (cpu_has_feature(CPU_FTR_ARCH_300) && - (radix_enabled() || cpu_has_feature(CPU_FTR_HVMODE))) + if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled()) return -EIO; return 0; } From f61e0d3cc4aee194014074471658a5a037e311ce Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Fri, 8 Jun 2018 01:40:03 -0400 Subject: [PATCH 61/63] KVM: PPC: Book3S PR: Fix failure status setting in tabort. emulation tabort. will perform transaction failure recording and the recording depends on TEXASR FS bit. Currently the TEXASR FS bit is retrieved after tabort., when the TEXASR FS bit is already been updated by tabort. itself. This patch corrects this behavior by retrieving TEXASR val before tabort. tabort. will not immediately leads to transaction failure handling in suspend state. So this patch also remove the mtspr on TEXASR/TFIAR registers to avoid TM bad thing exception. Fixes: 26798f88d58d ("KVM: PPC: Book3S PR: Add emulation for tabort. in privileged state") Signed-off-by: Simon Guo Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_emulate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 05cac5ea79c5..36b11c5a0dbb 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -212,9 +212,11 @@ void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) * present. */ unsigned long guest_msr = kvmppc_get_msr(vcpu); + uint64_t org_texasr; preempt_disable(); tm_enable(); + org_texasr = mfspr(SPRN_TEXASR); tm_abort(ra_val); /* CR0 = 0 | MSR[TS] | 0 */ @@ -227,7 +229,7 @@ void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) * and tabort will be treated as nops in non-transactional * state. */ - if (!(vcpu->arch.texasr & TEXASR_FS) && + if (!(org_texasr & TEXASR_FS) && MSR_TM_ACTIVE(guest_msr)) { vcpu->arch.texasr &= ~(TEXASR_PR | TEXASR_HV); if (guest_msr & MSR_PR) @@ -237,8 +239,6 @@ void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) vcpu->arch.texasr |= TEXASR_HV; vcpu->arch.tfiar = kvmppc_get_pc(vcpu); - mtspr(SPRN_TEXASR, vcpu->arch.texasr); - mtspr(SPRN_TFIAR, vcpu->arch.tfiar); } tm_disable(); preempt_enable(); From 273ba45796c14b4a2b669098f13d576b9e233dd8 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 11 Jun 2018 14:12:10 -0300 Subject: [PATCH 62/63] KVM: x86: fix typo at kvm_arch_hardware_setup comment Fix typo in sentence about min value calculation. Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6bcecc325e7e..0046aa70205a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8567,7 +8567,7 @@ int kvm_arch_hardware_setup(void) /* * Make sure the user can only configure tsc_khz values that * fit into a signed integer. - * A min value is not calculated needed because it will always + * A min value is not calculated because it will always * be 1 on all machines. */ u64 max = min(0x7fffffffULL, From 1f008e114b1ba17e1d73e61149070c502174bb89 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 25 May 2018 17:36:17 +0200 Subject: [PATCH 63/63] KVM: x86: VMX: redo fix for link error without CONFIG_HYPERV Arnd had sent this patch to the KVM mailing list, but it slipped through the cracks of maintainers hand-off, and therefore wasn't included in the pull request. The same issue had been fixed by Linus in commit dbee3d0 ("KVM: x86: VMX: fix build without hyper-v", 2018-06-12) as a self-described "quick-and-hacky build fix". However, checking the compile-time configuration symbol with IS_ENABLED is cleaner and it is enough to avoid the link error, so switch to Arnd's solution. Signed-off-by: Arnd Bergmann [Rewritten commit message. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d0dd35d582da..559a12b6184d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4429,16 +4429,14 @@ static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) goto out_vmcs; memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); -#if IS_ENABLED(CONFIG_HYPERV) - if (static_branch_unlikely(&enable_evmcs) && + if (IS_ENABLED(CONFIG_HYPERV) && + static_branch_unlikely(&enable_evmcs) && (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) { struct hv_enlightened_vmcs *evmcs = (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs; evmcs->hv_enlightenments_control.msr_bitmap = 1; } -#endif - } return 0;