diff --git a/Documentation/virtual/kvm/devices/arm-vgic-its.txt b/Documentation/virtual/kvm/devices/arm-vgic-its.txt index eb06beb75960..8d5830eab26a 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic-its.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic-its.txt @@ -33,6 +33,10 @@ Groups: request the initialization of the ITS, no additional parameter in kvm_device_attr.addr. + KVM_DEV_ARM_ITS_CTRL_RESET + reset the ITS, no additional parameter in kvm_device_attr.addr. + See "ITS Reset State" section. + KVM_DEV_ARM_ITS_SAVE_TABLES save the ITS table data into guest RAM, at the location provisioned by the guest in corresponding registers/table entries. @@ -157,3 +161,19 @@ Then vcpus can be started. - pINTID is the physical LPI ID; if zero, it means the entry is not valid and other fields are not meaningful. - ICID is the collection ID + + ITS Reset State: + ---------------- + +RESET returns the ITS to the same state that it was when first created and +initialized. When the RESET command returns, the following things are +guaranteed: + +- The ITS is not enabled and quiescent + GITS_CTLR.Enabled = 0 .Quiescent=1 +- There is no internally cached state +- No collection or device table are used + GITS_BASER.Valid = 0 +- GITS_CBASER = 0, GITS_CREADR = 0, GITS_CWRITER = 0 +- The ABI version is unchanged and remains the one set when the ITS + device was first created. diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 14d68a4d826f..36dd2962a42d 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -68,6 +68,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern void __kvm_tlb_flush_vmid(struct kvm *kvm); extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); +extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high); + extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern void __init_stage2_translation(void); diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 98089ffd91bb..3d22eb87f919 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -25,7 +25,22 @@ #include #include +/* arm64 compatibility macros */ +#define COMPAT_PSR_MODE_ABT ABT_MODE +#define COMPAT_PSR_MODE_UND UND_MODE +#define COMPAT_PSR_T_BIT PSR_T_BIT +#define COMPAT_PSR_I_BIT PSR_I_BIT +#define COMPAT_PSR_A_BIT PSR_A_BIT +#define COMPAT_PSR_E_BIT PSR_E_BIT +#define COMPAT_PSR_IT_MASK PSR_IT_MASK + unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); + +static inline unsigned long *vcpu_reg32(struct kvm_vcpu *vcpu, u8 reg_num) +{ + return vcpu_reg(vcpu, reg_num); +} + unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu); static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu, @@ -42,10 +57,25 @@ static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr); -void kvm_inject_undefined(struct kvm_vcpu *vcpu); +void kvm_inject_undef32(struct kvm_vcpu *vcpu); +void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr); +void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_vabt(struct kvm_vcpu *vcpu); -void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); -void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); + +static inline void kvm_inject_undefined(struct kvm_vcpu *vcpu) +{ + kvm_inject_undef32(vcpu); +} + +static inline void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) +{ + kvm_inject_dabt32(vcpu, addr); +} + +static inline void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) +{ + kvm_inject_pabt32(vcpu, addr); +} static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu) { @@ -203,7 +233,7 @@ static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu) static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu) { - switch (kvm_vcpu_trap_get_fault_type(vcpu)) { + switch (kvm_vcpu_trap_get_fault(vcpu)) { case FSC_SEA: case FSC_SEA_TTW0: case FSC_SEA_TTW1: diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h index 14b5903f0224..ab20ffa8b9e7 100644 --- a/arch/arm/include/asm/kvm_hyp.h +++ b/arch/arm/include/asm/kvm_hyp.h @@ -98,8 +98,8 @@ #define cntvoff_el2 CNTVOFF #define cnthctl_el2 CNTHCTL -void __timer_save_state(struct kvm_vcpu *vcpu); -void __timer_restore_state(struct kvm_vcpu *vcpu); +void __timer_enable_traps(struct kvm_vcpu *vcpu); +void __timer_disable_traps(struct kvm_vcpu *vcpu); void __vgic_v2_save_state(struct kvm_vcpu *vcpu); void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 5db2d4c6a55f..b56895593c84 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -151,6 +151,12 @@ struct kvm_arch_memory_slot { (__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64) #define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__) +/* PL1 Physical Timer Registers */ +#define KVM_REG_ARM_PTIMER_CTL ARM_CP15_REG32(0, 14, 2, 1) +#define KVM_REG_ARM_PTIMER_CNT ARM_CP15_REG64(0, 14) +#define KVM_REG_ARM_PTIMER_CVAL ARM_CP15_REG64(2, 14) + +/* Virtual Timer Registers */ #define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1) #define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14) #define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14) @@ -215,6 +221,7 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_ITS_SAVE_TABLES 1 #define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 #define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 +#define KVM_DEV_ARM_ITS_CTRL_RESET 4 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index 0064b86a2c87..cdff963f133a 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -165,145 +165,6 @@ unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu) * Inject exceptions into the guest */ -static u32 exc_vector_base(struct kvm_vcpu *vcpu) -{ - u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - u32 vbar = vcpu_cp15(vcpu, c12_VBAR); - - if (sctlr & SCTLR_V) - return 0xffff0000; - else /* always have security exceptions */ - return vbar; -} - -/* - * Switch to an exception mode, updating both CPSR and SPSR. Follow - * the logic described in AArch32.EnterMode() from the ARMv8 ARM. - */ -static void kvm_update_psr(struct kvm_vcpu *vcpu, unsigned long mode) -{ - unsigned long cpsr = *vcpu_cpsr(vcpu); - u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - - *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | mode; - - switch (mode) { - case FIQ_MODE: - *vcpu_cpsr(vcpu) |= PSR_F_BIT; - /* Fall through */ - case ABT_MODE: - case IRQ_MODE: - *vcpu_cpsr(vcpu) |= PSR_A_BIT; - /* Fall through */ - default: - *vcpu_cpsr(vcpu) |= PSR_I_BIT; - } - - *vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT); - - if (sctlr & SCTLR_TE) - *vcpu_cpsr(vcpu) |= PSR_T_BIT; - if (sctlr & SCTLR_EE) - *vcpu_cpsr(vcpu) |= PSR_E_BIT; - - /* Note: These now point to the mode banked copies */ - *vcpu_spsr(vcpu) = cpsr; -} - -/** - * kvm_inject_undefined - inject an undefined exception into the guest - * @vcpu: The VCPU to receive the undefined exception - * - * It is assumed that this code is called from the VCPU thread and that the - * VCPU therefore is not currently executing guest code. - * - * Modelled after TakeUndefInstrException() pseudocode. - */ -void kvm_inject_undefined(struct kvm_vcpu *vcpu) -{ - unsigned long cpsr = *vcpu_cpsr(vcpu); - bool is_thumb = (cpsr & PSR_T_BIT); - u32 vect_offset = 4; - u32 return_offset = (is_thumb) ? 2 : 4; - - kvm_update_psr(vcpu, UND_MODE); - *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) - return_offset; - - /* Branch to exception vector */ - *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset; -} - -/* - * Modelled after TakeDataAbortException() and TakePrefetchAbortException - * pseudocode. - */ -static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr) -{ - unsigned long cpsr = *vcpu_cpsr(vcpu); - bool is_thumb = (cpsr & PSR_T_BIT); - u32 vect_offset; - u32 return_offset = (is_thumb) ? 4 : 0; - bool is_lpae; - - kvm_update_psr(vcpu, ABT_MODE); - *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; - - if (is_pabt) - vect_offset = 12; - else - vect_offset = 16; - - /* Branch to exception vector */ - *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset; - - if (is_pabt) { - /* Set IFAR and IFSR */ - vcpu_cp15(vcpu, c6_IFAR) = addr; - is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); - /* Always give debug fault for now - should give guest a clue */ - if (is_lpae) - vcpu_cp15(vcpu, c5_IFSR) = 1 << 9 | 0x22; - else - vcpu_cp15(vcpu, c5_IFSR) = 2; - } else { /* !iabt */ - /* Set DFAR and DFSR */ - vcpu_cp15(vcpu, c6_DFAR) = addr; - is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); - /* Always give debug fault for now - should give guest a clue */ - if (is_lpae) - vcpu_cp15(vcpu, c5_DFSR) = 1 << 9 | 0x22; - else - vcpu_cp15(vcpu, c5_DFSR) = 2; - } - -} - -/** - * kvm_inject_dabt - inject a data abort into the guest - * @vcpu: The VCPU to receive the undefined exception - * @addr: The address to report in the DFAR - * - * It is assumed that this code is called from the VCPU thread and that the - * VCPU therefore is not currently executing guest code. - */ -void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) -{ - inject_abt(vcpu, false, addr); -} - -/** - * kvm_inject_pabt - inject a prefetch abort into the guest - * @vcpu: The VCPU to receive the undefined exception - * @addr: The address to report in the DFAR - * - * It is assumed that this code is called from the VCPU thread and that the - * VCPU therefore is not currently executing guest code. - */ -void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) -{ - inject_abt(vcpu, true, addr); -} - /** * kvm_inject_vabt - inject an async abort / SError into the guest * @vcpu: The VCPU to receive the exception diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c index ebd2dd46adf7..330c9ce34ba5 100644 --- a/arch/arm/kvm/hyp/switch.c +++ b/arch/arm/kvm/hyp/switch.c @@ -174,7 +174,7 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) __activate_vm(vcpu); __vgic_restore_state(vcpu); - __timer_restore_state(vcpu); + __timer_enable_traps(vcpu); __sysreg_restore_state(guest_ctxt); __banked_restore_state(guest_ctxt); @@ -191,7 +191,8 @@ again: __banked_save_state(guest_ctxt); __sysreg_save_state(guest_ctxt); - __timer_save_state(vcpu); + __timer_disable_traps(vcpu); + __vgic_save_state(vcpu); __deactivate_traps(vcpu); @@ -237,7 +238,7 @@ void __hyp_text __noreturn __hyp_panic(int cause) vcpu = (struct kvm_vcpu *)read_sysreg(HTPIDR); host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); - __timer_save_state(vcpu); + __timer_disable_traps(vcpu); __deactivate_traps(vcpu); __deactivate_vm(vcpu); __banked_restore_state(host_ctxt); diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index a652ce0a5cb2..04275de614db 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -52,6 +52,7 @@ struct arch_timer_erratum_workaround { const char *desc; u32 (*read_cntp_tval_el0)(void); u32 (*read_cntv_tval_el0)(void); + u64 (*read_cntpct_el0)(void); u64 (*read_cntvct_el0)(void); int (*set_next_event_phys)(unsigned long, struct clock_event_device *); int (*set_next_event_virt)(unsigned long, struct clock_event_device *); @@ -148,11 +149,8 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl) static inline u64 arch_counter_get_cntpct(void) { - /* - * AArch64 kernel and user space mandate the use of CNTVCT. - */ - BUG(); - return 0; + isb(); + return arch_timer_reg_read_stable(cntpct_el0); } static inline u64 arch_counter_get_cntvct(void) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 26a64d0f9ab9..ab4d0a926043 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -55,6 +55,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern void __kvm_tlb_flush_vmid(struct kvm *kvm); extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); +extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high); + extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern u64 __vgic_v3_get_ich_vtr_el2(void); diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index e5df3fce0008..5f28dfa14cee 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -41,6 +41,9 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_vabt(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +void kvm_inject_undef32(struct kvm_vcpu *vcpu); +void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr); +void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr); static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) { @@ -237,7 +240,7 @@ static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) { - switch (kvm_vcpu_trap_get_fault_type(vcpu)) { + switch (kvm_vcpu_trap_get_fault(vcpu)) { case FSC_SEA: case FSC_SEA_TTW0: case FSC_SEA_TTW1: diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 4572a9b560fa..08d3bb66c8b7 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -129,8 +129,8 @@ void __vgic_v3_save_state(struct kvm_vcpu *vcpu); void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); -void __timer_save_state(struct kvm_vcpu *vcpu); -void __timer_restore_state(struct kvm_vcpu *vcpu); +void __timer_enable_traps(struct kvm_vcpu *vcpu); +void __timer_disable_traps(struct kvm_vcpu *vcpu); void __sysreg_save_host_state(struct kvm_cpu_context *ctxt); void __sysreg_restore_host_state(struct kvm_cpu_context *ctxt); diff --git a/arch/arm64/include/asm/timex.h b/arch/arm64/include/asm/timex.h index 81a076eb37fa..9ad60bae5c8d 100644 --- a/arch/arm64/include/asm/timex.h +++ b/arch/arm64/include/asm/timex.h @@ -22,7 +22,7 @@ * Use the current timer as a cycle counter since this is what we use for * the delay loop. */ -#define get_cycles() arch_counter_get_cntvct() +#define get_cycles() arch_timer_read_counter() #include diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 9f3ca24bbcc6..37ca7394549c 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -195,6 +195,12 @@ struct kvm_arch_memory_slot { #define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64) +/* Physical Timer EL0 Registers */ +#define KVM_REG_ARM_PTIMER_CTL ARM64_SYS_REG(3, 3, 14, 2, 1) +#define KVM_REG_ARM_PTIMER_CVAL ARM64_SYS_REG(3, 3, 14, 2, 2) +#define KVM_REG_ARM_PTIMER_CNT ARM64_SYS_REG(3, 3, 14, 0, 1) + +/* EL0 Virtual Timer Registers */ #define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1) #define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) #define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) @@ -227,6 +233,7 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_ITS_SAVE_TABLES 1 #define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 #define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 +#define KVM_DEV_ARM_ITS_CTRL_RESET 4 /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 945e79c641c4..4994f4bdaca5 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -298,7 +298,7 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) __activate_vm(vcpu); __vgic_restore_state(vcpu); - __timer_restore_state(vcpu); + __timer_enable_traps(vcpu); /* * We must restore the 32-bit state before the sysregs, thanks @@ -368,7 +368,7 @@ again: __sysreg_save_guest_state(guest_ctxt); __sysreg32_save_state(vcpu); - __timer_save_state(vcpu); + __timer_disable_traps(vcpu); __vgic_save_state(vcpu); __deactivate_traps(vcpu); @@ -436,7 +436,7 @@ void __hyp_text __noreturn __hyp_panic(void) vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2); host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); - __timer_save_state(vcpu); + __timer_disable_traps(vcpu); __deactivate_traps(vcpu); __deactivate_vm(vcpu); __sysreg_restore_host_state(host_ctxt); diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index da6a8cfa54a0..8ecbcb40e317 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -33,74 +33,6 @@ #define LOWER_EL_AArch64_VECTOR 0x400 #define LOWER_EL_AArch32_VECTOR 0x600 -static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) -{ - unsigned long cpsr; - unsigned long new_spsr_value = *vcpu_cpsr(vcpu); - bool is_thumb = (new_spsr_value & COMPAT_PSR_T_BIT); - u32 return_offset = (is_thumb) ? 4 : 0; - u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - - cpsr = mode | COMPAT_PSR_I_BIT; - - if (sctlr & (1 << 30)) - cpsr |= COMPAT_PSR_T_BIT; - if (sctlr & (1 << 25)) - cpsr |= COMPAT_PSR_E_BIT; - - *vcpu_cpsr(vcpu) = cpsr; - - /* Note: These now point to the banked copies */ - *vcpu_spsr(vcpu) = new_spsr_value; - *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; - - /* Branch to exception vector */ - if (sctlr & (1 << 13)) - vect_offset += 0xffff0000; - else /* always have security exceptions */ - vect_offset += vcpu_cp15(vcpu, c12_VBAR); - - *vcpu_pc(vcpu) = vect_offset; -} - -static void inject_undef32(struct kvm_vcpu *vcpu) -{ - prepare_fault32(vcpu, COMPAT_PSR_MODE_UND, 4); -} - -/* - * Modelled after TakeDataAbortException() and TakePrefetchAbortException - * pseudocode. - */ -static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, - unsigned long addr) -{ - u32 vect_offset; - u32 *far, *fsr; - bool is_lpae; - - if (is_pabt) { - vect_offset = 12; - far = &vcpu_cp15(vcpu, c6_IFAR); - fsr = &vcpu_cp15(vcpu, c5_IFSR); - } else { /* !iabt */ - vect_offset = 16; - far = &vcpu_cp15(vcpu, c6_DFAR); - fsr = &vcpu_cp15(vcpu, c5_DFSR); - } - - prepare_fault32(vcpu, COMPAT_PSR_MODE_ABT | COMPAT_PSR_A_BIT, vect_offset); - - *far = addr; - - /* Give the guest an IMPLEMENTATION DEFINED exception */ - is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); - if (is_lpae) - *fsr = 1 << 9 | 0x34; - else - *fsr = 0x14; -} - enum exception_type { except_type_sync = 0, except_type_irq = 0x80, @@ -197,7 +129,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (!(vcpu->arch.hcr_el2 & HCR_RW)) - inject_abt32(vcpu, false, addr); + kvm_inject_dabt32(vcpu, addr); else inject_abt64(vcpu, false, addr); } @@ -213,7 +145,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (!(vcpu->arch.hcr_el2 & HCR_RW)) - inject_abt32(vcpu, true, addr); + kvm_inject_pabt32(vcpu, addr); else inject_abt64(vcpu, true, addr); } @@ -227,7 +159,7 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) void kvm_inject_undefined(struct kvm_vcpu *vcpu) { if (!(vcpu->arch.hcr_el2 & HCR_RW)) - inject_undef32(vcpu); + kvm_inject_undef32(vcpu); else inject_undef64(vcpu); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 2e070d3baf9f..bb0e41b3154e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -841,13 +841,16 @@ static bool access_cntp_tval(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); u64 now = kvm_phys_timer_read(); + u64 cval; - if (p->is_write) - ptimer->cnt_cval = p->regval + now; - else - p->regval = ptimer->cnt_cval - now; + if (p->is_write) { + kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, + p->regval + now); + } else { + cval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); + p->regval = cval - now; + } return true; } @@ -856,24 +859,10 @@ static bool access_cntp_ctl(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - - if (p->is_write) { - /* ISTATUS bit is read-only */ - ptimer->cnt_ctl = p->regval & ~ARCH_TIMER_CTRL_IT_STAT; - } else { - u64 now = kvm_phys_timer_read(); - - p->regval = ptimer->cnt_ctl; - /* - * Set ISTATUS bit if it's expired. - * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is - * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit - * regardless of ENABLE bit for our implementation convenience. - */ - if (ptimer->cnt_cval <= now) - p->regval |= ARCH_TIMER_CTRL_IT_STAT; - } + if (p->is_write) + kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, p->regval); + else + p->regval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL); return true; } @@ -882,12 +871,10 @@ static bool access_cntp_cval(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - if (p->is_write) - ptimer->cnt_cval = p->regval; + kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, p->regval); else - p->regval = ptimer->cnt_cval; + p->regval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); return true; } diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index fd4b7f684bd0..061476e92db7 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -158,6 +158,7 @@ u32 arch_timer_reg_read(int access, enum arch_timer_reg reg, * if we don't have the cp15 accessors we won't have a problem. */ u64 (*arch_timer_read_counter)(void) = arch_counter_get_cntvct; +EXPORT_SYMBOL_GPL(arch_timer_read_counter); static u64 arch_counter_read(struct clocksource *cs) { @@ -217,6 +218,11 @@ static u32 notrace fsl_a008585_read_cntv_tval_el0(void) return __fsl_a008585_read_reg(cntv_tval_el0); } +static u64 notrace fsl_a008585_read_cntpct_el0(void) +{ + return __fsl_a008585_read_reg(cntpct_el0); +} + static u64 notrace fsl_a008585_read_cntvct_el0(void) { return __fsl_a008585_read_reg(cntvct_el0); @@ -258,6 +264,11 @@ static u32 notrace hisi_161010101_read_cntv_tval_el0(void) return __hisi_161010101_read_reg(cntv_tval_el0); } +static u64 notrace hisi_161010101_read_cntpct_el0(void) +{ + return __hisi_161010101_read_reg(cntpct_el0); +} + static u64 notrace hisi_161010101_read_cntvct_el0(void) { return __hisi_161010101_read_reg(cntvct_el0); @@ -288,6 +299,15 @@ static struct ate_acpi_oem_info hisi_161010101_oem_info[] = { #endif #ifdef CONFIG_ARM64_ERRATUM_858921 +static u64 notrace arm64_858921_read_cntpct_el0(void) +{ + u64 old, new; + + old = read_sysreg(cntpct_el0); + new = read_sysreg(cntpct_el0); + return (((old ^ new) >> 32) & 1) ? old : new; +} + static u64 notrace arm64_858921_read_cntvct_el0(void) { u64 old, new; @@ -310,16 +330,19 @@ static void erratum_set_next_event_tval_generic(const int access, unsigned long struct clock_event_device *clk) { unsigned long ctrl; - u64 cval = evt + arch_counter_get_cntvct(); + u64 cval; ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); ctrl |= ARCH_TIMER_CTRL_ENABLE; ctrl &= ~ARCH_TIMER_CTRL_IT_MASK; - if (access == ARCH_TIMER_PHYS_ACCESS) + if (access == ARCH_TIMER_PHYS_ACCESS) { + cval = evt + arch_counter_get_cntpct(); write_sysreg(cval, cntp_cval_el0); - else + } else { + cval = evt + arch_counter_get_cntvct(); write_sysreg(cval, cntv_cval_el0); + } arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); } @@ -346,6 +369,7 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { .desc = "Freescale erratum a005858", .read_cntp_tval_el0 = fsl_a008585_read_cntp_tval_el0, .read_cntv_tval_el0 = fsl_a008585_read_cntv_tval_el0, + .read_cntpct_el0 = fsl_a008585_read_cntpct_el0, .read_cntvct_el0 = fsl_a008585_read_cntvct_el0, .set_next_event_phys = erratum_set_next_event_tval_phys, .set_next_event_virt = erratum_set_next_event_tval_virt, @@ -358,6 +382,7 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { .desc = "HiSilicon erratum 161010101", .read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0, .read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0, + .read_cntpct_el0 = hisi_161010101_read_cntpct_el0, .read_cntvct_el0 = hisi_161010101_read_cntvct_el0, .set_next_event_phys = erratum_set_next_event_tval_phys, .set_next_event_virt = erratum_set_next_event_tval_virt, @@ -368,6 +393,7 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { .desc = "HiSilicon erratum 161010101", .read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0, .read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0, + .read_cntpct_el0 = hisi_161010101_read_cntpct_el0, .read_cntvct_el0 = hisi_161010101_read_cntvct_el0, .set_next_event_phys = erratum_set_next_event_tval_phys, .set_next_event_virt = erratum_set_next_event_tval_virt, @@ -378,6 +404,7 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { .match_type = ate_match_local_cap_id, .id = (void *)ARM64_WORKAROUND_858921, .desc = "ARM erratum 858921", + .read_cntpct_el0 = arm64_858921_read_cntpct_el0, .read_cntvct_el0 = arm64_858921_read_cntvct_el0, }, #endif @@ -890,7 +917,7 @@ static void __init arch_counter_register(unsigned type) /* Register the CP15 based counter if we have one */ if (type & ARCH_TIMER_TYPE_CP15) { - if (IS_ENABLED(CONFIG_ARM64) || + if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) || arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) arch_timer_read_counter = arch_counter_get_cntvct; else diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index b5df99c6f680..854334a6f225 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1228,7 +1228,9 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare goto out_unmap_rdist; gic_populate_ppi_partitions(node); - gic_of_setup_kvm_info(node); + + if (static_key_true(&supports_deactivate)) + gic_of_setup_kvm_info(node); return 0; out_unmap_rdist: @@ -1517,7 +1519,9 @@ gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end) goto out_fwhandle_free; acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle); - gic_acpi_setup_kvm_info(); + + if (static_key_true(&supports_deactivate)) + gic_acpi_setup_kvm_info(); return 0; diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 651d726e8b12..cd9371b749c2 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1367,7 +1367,8 @@ static void __init gic_of_setup_kvm_info(struct device_node *node) if (ret) return; - gic_set_kvm_info(&gic_v2_kvm_info); + if (static_key_true(&supports_deactivate)) + gic_set_kvm_info(&gic_v2_kvm_info); } int __init @@ -1599,7 +1600,8 @@ static int __init gic_v2_acpi_init(struct acpi_subtable_header *header, if (IS_ENABLED(CONFIG_ARM_GIC_V2M)) gicv2m_init(NULL, gic_data[0].domain); - gic_acpi_setup_kvm_info(); + if (static_key_true(&supports_deactivate)) + gic_acpi_setup_kvm_info(); return 0; } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index f0053f884b4a..01ee473517e2 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -31,8 +31,15 @@ struct arch_timer_context { /* Timer IRQ */ struct kvm_irq_level irq; - /* Active IRQ state caching */ - bool active_cleared_last; + /* + * We have multiple paths which can save/restore the timer state + * onto the hardware, so we need some way of keeping track of + * where the latest state is. + * + * loaded == true: State is loaded on the hardware registers. + * loaded == false: State is stored in memory. + */ + bool loaded; /* Virtual offset */ u64 cntvoff; @@ -43,13 +50,13 @@ struct arch_timer_cpu { struct arch_timer_context ptimer; /* Background timer used when the guest is not running */ - struct hrtimer timer; + struct hrtimer bg_timer; /* Work queued with the above timer expires */ struct work_struct expired; - /* Background timer active */ - bool armed; + /* Physical timer emulation */ + struct hrtimer phys_timer; /* Is the timer enabled */ bool enabled; @@ -59,7 +66,6 @@ int kvm_timer_hyp_init(void); int kvm_timer_enable(struct kvm_vcpu *vcpu); int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); -void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu); void kvm_timer_update_run(struct kvm_vcpu *vcpu); @@ -72,16 +78,22 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); -bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); +bool kvm_timer_is_pending(struct kvm_vcpu *vcpu); + void kvm_timer_schedule(struct kvm_vcpu *vcpu); void kvm_timer_unschedule(struct kvm_vcpu *vcpu); u64 kvm_phys_timer_read(void); +void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu); void kvm_timer_init_vhe(void); #define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer) #define vcpu_ptimer(v) (&(v)->arch.timer_cpu.ptimer) + +void enable_el1_phys_timer_access(void); +void disable_el1_phys_timer_access(void); + #endif diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index 79c7c357804b..8bc479fa37e6 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -25,11 +25,6 @@ #include #include -#ifndef CONFIG_ARM64 -#define COMPAT_PSR_T_BIT PSR_T_BIT -#define COMPAT_PSR_IT_MASK PSR_IT_MASK -#endif - /* * stolen from arch/arm/kernel/opcodes.c * @@ -150,3 +145,95 @@ void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) *vcpu_pc(vcpu) += 4; kvm_adjust_itstate(vcpu); } + +/* + * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. + */ +static const u8 return_offsets[8][2] = { + [0] = { 0, 0 }, /* Reset, unused */ + [1] = { 4, 2 }, /* Undefined */ + [2] = { 0, 0 }, /* SVC, unused */ + [3] = { 4, 4 }, /* Prefetch abort */ + [4] = { 8, 8 }, /* Data abort */ + [5] = { 0, 0 }, /* HVC, unused */ + [6] = { 4, 4 }, /* IRQ, unused */ + [7] = { 4, 4 }, /* FIQ, unused */ +}; + +static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) +{ + unsigned long cpsr; + unsigned long new_spsr_value = *vcpu_cpsr(vcpu); + bool is_thumb = (new_spsr_value & COMPAT_PSR_T_BIT); + u32 return_offset = return_offsets[vect_offset >> 2][is_thumb]; + u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); + + cpsr = mode | COMPAT_PSR_I_BIT; + + if (sctlr & (1 << 30)) + cpsr |= COMPAT_PSR_T_BIT; + if (sctlr & (1 << 25)) + cpsr |= COMPAT_PSR_E_BIT; + + *vcpu_cpsr(vcpu) = cpsr; + + /* Note: These now point to the banked copies */ + *vcpu_spsr(vcpu) = new_spsr_value; + *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; + + /* Branch to exception vector */ + if (sctlr & (1 << 13)) + vect_offset += 0xffff0000; + else /* always have security exceptions */ + vect_offset += vcpu_cp15(vcpu, c12_VBAR); + + *vcpu_pc(vcpu) = vect_offset; +} + +void kvm_inject_undef32(struct kvm_vcpu *vcpu) +{ + prepare_fault32(vcpu, COMPAT_PSR_MODE_UND, 4); +} + +/* + * Modelled after TakeDataAbortException() and TakePrefetchAbortException + * pseudocode. + */ +static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, + unsigned long addr) +{ + u32 vect_offset; + u32 *far, *fsr; + bool is_lpae; + + if (is_pabt) { + vect_offset = 12; + far = &vcpu_cp15(vcpu, c6_IFAR); + fsr = &vcpu_cp15(vcpu, c5_IFSR); + } else { /* !iabt */ + vect_offset = 16; + far = &vcpu_cp15(vcpu, c6_DFAR); + fsr = &vcpu_cp15(vcpu, c5_DFSR); + } + + prepare_fault32(vcpu, COMPAT_PSR_MODE_ABT | COMPAT_PSR_A_BIT, vect_offset); + + *far = addr; + + /* Give the guest an IMPLEMENTATION DEFINED exception */ + is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); + if (is_lpae) + *fsr = 1 << 9 | 0x34; + else + *fsr = 0x14; +} + +void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr) +{ + inject_abt32(vcpu, false, addr); +} + +void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr) +{ + inject_abt32(vcpu, true, addr); +} diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 8e89d63005c7..4db54ff08d9e 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -46,49 +46,68 @@ static const struct kvm_irq_level default_vtimer_irq = { .level = 1, }; -void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) -{ - vcpu_vtimer(vcpu)->active_cleared_last = false; -} +static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); +static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, + struct arch_timer_context *timer_ctx); +static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); u64 kvm_phys_timer_read(void) { return timecounter->cc->read(timecounter->cc); } -static bool timer_is_armed(struct arch_timer_cpu *timer) +static void soft_timer_start(struct hrtimer *hrt, u64 ns) { - return timer->armed; -} - -/* timer_arm: as in "arm the timer", not as in ARM the company */ -static void timer_arm(struct arch_timer_cpu *timer, u64 ns) -{ - timer->armed = true; - hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns), + hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), HRTIMER_MODE_ABS); } -static void timer_disarm(struct arch_timer_cpu *timer) +static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work) { - if (timer_is_armed(timer)) { - hrtimer_cancel(&timer->timer); - cancel_work_sync(&timer->expired); - timer->armed = false; - } + hrtimer_cancel(hrt); + if (work) + cancel_work_sync(work); +} + +static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + + /* + * When using a userspace irqchip with the architected timers, we must + * prevent continuously exiting from the guest, and therefore mask the + * physical interrupt by disabling it on the host interrupt controller + * when the virtual level is high, such that the guest can make + * forward progress. Once we detect the output level being + * de-asserted, we unmask the interrupt again so that we exit from the + * guest when the timer fires. + */ + if (vtimer->irq.level) + disable_percpu_irq(host_vtimer_irq); + else + enable_percpu_irq(host_vtimer_irq, 0); } static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) { struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; + struct arch_timer_context *vtimer; + + if (!vcpu) { + pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n"); + return IRQ_NONE; + } + vtimer = vcpu_vtimer(vcpu); + + if (!vtimer->irq.level) { + vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); + if (kvm_timer_irq_can_fire(vtimer)) + kvm_timer_update_irq(vcpu, true, vtimer); + } + + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + kvm_vtimer_update_mask_user(vcpu); - /* - * We disable the timer in the world switch and let it be - * handled by kvm_timer_sync_hwstate(). Getting a timer - * interrupt at this point is a sure sign of some major - * breakage. - */ - pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu); return IRQ_HANDLED; } @@ -158,13 +177,13 @@ static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) return min(min_virt, min_phys); } -static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) +static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) { struct arch_timer_cpu *timer; struct kvm_vcpu *vcpu; u64 ns; - timer = container_of(hrt, struct arch_timer_cpu, timer); + timer = container_of(hrt, struct arch_timer_cpu, bg_timer); vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); /* @@ -182,7 +201,33 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) return HRTIMER_NORESTART; } -bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) +static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt) +{ + struct arch_timer_context *ptimer; + struct arch_timer_cpu *timer; + struct kvm_vcpu *vcpu; + u64 ns; + + timer = container_of(hrt, struct arch_timer_cpu, phys_timer); + vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); + ptimer = vcpu_ptimer(vcpu); + + /* + * Check that the timer has really expired from the guest's + * PoV (NTP on the host may have forced it to expire + * early). If not ready, schedule for a later time. + */ + ns = kvm_timer_compute_delta(ptimer); + if (unlikely(ns)) { + hrtimer_forward_now(hrt, ns_to_ktime(ns)); + return HRTIMER_RESTART; + } + + kvm_timer_update_irq(vcpu, true, ptimer); + return HRTIMER_NORESTART; +} + +static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) { u64 cval, now; @@ -195,6 +240,25 @@ bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) return cval <= now; } +bool kvm_timer_is_pending(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + + if (vtimer->irq.level || ptimer->irq.level) + return true; + + /* + * When this is called from withing the wait loop of kvm_vcpu_block(), + * the software view of the timer state is up to date (timer->loaded + * is false), and so we can simply check if the timer should fire now. + */ + if (!vtimer->loaded && kvm_timer_should_fire(vtimer)) + return true; + + return kvm_timer_should_fire(ptimer); +} + /* * Reflect the timer output level into the kvm_run structure */ @@ -218,7 +282,6 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, { int ret; - timer_ctx->active_cleared_last = false; timer_ctx->irq.level = new_level; trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, timer_ctx->irq.level); @@ -232,9 +295,29 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, } } +/* Schedule the background timer for the emulated timer. */ +static void phys_timer_emulate(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + + /* + * If the timer can fire now we have just raised the IRQ line and we + * don't need to have a soft timer scheduled for the future. If the + * timer cannot fire at all, then we also don't need a soft timer. + */ + if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) { + soft_timer_cancel(&timer->phys_timer, NULL); + return; + } + + soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(ptimer)); +} + /* - * Check if there was a change in the timer state (should we raise or lower - * the line level to the GIC). + * Check if there was a change in the timer state, so that we should either + * raise or lower the line level to the GIC or schedule a background timer to + * emulate the physical timer. */ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) { @@ -242,12 +325,6 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - /* - * If userspace modified the timer registers via SET_ONE_REG before - * the vgic was initialized, we mustn't set the vtimer->irq.level value - * because the guest would never see the interrupt. Instead wait - * until we call this function from kvm_timer_flush_hwstate. - */ if (unlikely(!timer->enabled)) return; @@ -256,22 +333,32 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); + + phys_timer_emulate(vcpu); } -/* Schedule the background timer for the emulated timer. */ -static void kvm_timer_emulate(struct kvm_vcpu *vcpu, - struct arch_timer_context *timer_ctx) +static void vtimer_save_state(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + unsigned long flags; - if (kvm_timer_should_fire(timer_ctx)) - return; + local_irq_save(flags); - if (!kvm_timer_irq_can_fire(timer_ctx)) - return; + if (!vtimer->loaded) + goto out; - /* The timer has not yet expired, schedule a background timer */ - timer_arm(timer, kvm_timer_compute_delta(timer_ctx)); + if (timer->enabled) { + vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); + vtimer->cnt_cval = read_sysreg_el0(cntv_cval); + } + + /* Disable the virtual timer */ + write_sysreg_el0(0, cntv_ctl); + + vtimer->loaded = false; +out: + local_irq_restore(flags); } /* @@ -285,7 +372,7 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu) struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - BUG_ON(timer_is_armed(timer)); + vtimer_save_state(vcpu); /* * No need to schedule a background timer if any guest timer has @@ -306,70 +393,97 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu) * The guest timers have not yet expired, schedule a background timer. * Set the earliest expiration time among the guest timers. */ - timer_arm(timer, kvm_timer_earliest_exp(vcpu)); + soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); +} + +static void vtimer_restore_state(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + unsigned long flags; + + local_irq_save(flags); + + if (vtimer->loaded) + goto out; + + if (timer->enabled) { + write_sysreg_el0(vtimer->cnt_cval, cntv_cval); + isb(); + write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl); + } + + vtimer->loaded = true; +out: + local_irq_restore(flags); } void kvm_timer_unschedule(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - timer_disarm(timer); + + vtimer_restore_state(vcpu); + + soft_timer_cancel(&timer->bg_timer, &timer->expired); } -static void kvm_timer_flush_hwstate_vgic(struct kvm_vcpu *vcpu) +static void set_cntvoff(u64 cntvoff) +{ + u32 low = lower_32_bits(cntvoff); + u32 high = upper_32_bits(cntvoff); + + /* + * Since kvm_call_hyp doesn't fully support the ARM PCS especially on + * 32-bit systems, but rather passes register by register shifted one + * place (we put the function address in r0/x0), we cannot simply pass + * a 64-bit value as an argument, but have to split the value in two + * 32-bit halves. + */ + kvm_call_hyp(__kvm_timer_set_cntvoff, low, high); +} + +static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu) { struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); bool phys_active; int ret; - /* - * If we enter the guest with the virtual input level to the VGIC - * asserted, then we have already told the VGIC what we need to, and - * we don't need to exit from the guest until the guest deactivates - * the already injected interrupt, so therefore we should set the - * hardware active state to prevent unnecessary exits from the guest. - * - * Also, if we enter the guest with the virtual timer interrupt active, - * then it must be active on the physical distributor, because we set - * the HW bit and the guest must be able to deactivate the virtual and - * physical interrupt at the same time. - * - * Conversely, if the virtual input level is deasserted and the virtual - * interrupt is not active, then always clear the hardware active state - * to ensure that hardware interrupts from the timer triggers a guest - * exit. - */ phys_active = vtimer->irq.level || - kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); - - /* - * We want to avoid hitting the (re)distributor as much as - * possible, as this is a potentially expensive MMIO access - * (not to mention locks in the irq layer), and a solution for - * this is to cache the "active" state in memory. - * - * Things to consider: we cannot cache an "active set" state, - * because the HW can change this behind our back (it becomes - * "clear" in the HW). We must then restrict the caching to - * the "clear" state. - * - * The cache is invalidated on: - * - vcpu put, indicating that the HW cannot be trusted to be - * in a sane state on the next vcpu load, - * - any change in the interrupt state - * - * Usage conditions: - * - cached value is "active clear" - * - value to be programmed is "active clear" - */ - if (vtimer->active_cleared_last && !phys_active) - return; + kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); ret = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, phys_active); WARN_ON(ret); +} - vtimer->active_cleared_last = !phys_active; +static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu) +{ + kvm_vtimer_update_mask_user(vcpu); +} + +void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + + if (unlikely(!timer->enabled)) + return; + + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + kvm_timer_vcpu_load_user(vcpu); + else + kvm_timer_vcpu_load_vgic(vcpu); + + set_cntvoff(vtimer->cntvoff); + + vtimer_restore_state(vcpu); + + if (has_vhe()) + disable_el1_phys_timer_access(); + + /* Set the background timer for the physical timer emulation. */ + phys_timer_emulate(vcpu); } bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) @@ -389,48 +503,60 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) ptimer->irq.level != plevel; } -static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu) -{ - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - - /* - * To prevent continuously exiting from the guest, we mask the - * physical interrupt such that the guest can make forward progress. - * Once we detect the output level being deasserted, we unmask the - * interrupt again so that we exit from the guest when the timer - * fires. - */ - if (vtimer->irq.level) - disable_percpu_irq(host_vtimer_irq); - else - enable_percpu_irq(host_vtimer_irq, 0); -} - -/** - * kvm_timer_flush_hwstate - prepare timers before running the vcpu - * @vcpu: The vcpu pointer - * - * Check if the virtual timer has expired while we were running in the host, - * and inject an interrupt if that was the case, making sure the timer is - * masked or disabled on the host so that we keep executing. Also schedule a - * software timer for the physical timer if it is enabled. - */ -void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) +void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; if (unlikely(!timer->enabled)) return; - kvm_timer_update_state(vcpu); + if (has_vhe()) + enable_el1_phys_timer_access(); - /* Set the background timer for the physical timer emulation. */ - kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu)); + vtimer_save_state(vcpu); - if (unlikely(!irqchip_in_kernel(vcpu->kvm))) - kvm_timer_flush_hwstate_user(vcpu); - else - kvm_timer_flush_hwstate_vgic(vcpu); + /* + * Cancel the physical timer emulation, because the only case where we + * need it after a vcpu_put is in the context of a sleeping VCPU, and + * in that case we already factor in the deadline for the physical + * timer when scheduling the bg_timer. + * + * In any case, we re-schedule the hrtimer for the physical timer when + * coming back to the VCPU thread in kvm_timer_vcpu_load(). + */ + soft_timer_cancel(&timer->phys_timer, NULL); + + /* + * The kernel may decide to run userspace after calling vcpu_put, so + * we reset cntvoff to 0 to ensure a consistent read between user + * accesses to the virtual counter and kernel access to the physical + * counter. + */ + set_cntvoff(0); +} + +static void unmask_vtimer_irq(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { + kvm_vtimer_update_mask_user(vcpu); + return; + } + + /* + * If the guest disabled the timer without acking the interrupt, then + * we must make sure the physical and virtual active states are in + * sync by deactivating the physical interrupt, because otherwise we + * wouldn't see the next timer interrupt in the host. + */ + if (!kvm_vgic_map_is_active(vcpu, vtimer->irq.irq)) { + int ret; + ret = irq_set_irqchip_state(host_vtimer_irq, + IRQCHIP_STATE_ACTIVE, + false); + WARN_ON(ret); + } } /** @@ -442,19 +568,21 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) */ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); /* - * This is to cancel the background timer for the physical timer - * emulation if it is set. + * If we entered the guest with the vtimer output asserted we have to + * check if the guest has modified the timer so that we should lower + * the line at this point. */ - timer_disarm(timer); - - /* - * The guest could have modified the timer registers or the timer - * could have expired, update the timer state. - */ - kvm_timer_update_state(vcpu); + if (vtimer->irq.level) { + vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); + vtimer->cnt_cval = read_sysreg_el0(cntv_cval); + if (!kvm_timer_should_fire(vtimer)) { + kvm_timer_update_irq(vcpu, false, vtimer); + unmask_vtimer_irq(vcpu); + } + } } int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) @@ -505,8 +633,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) vcpu_ptimer(vcpu)->cntvoff = 0; INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); - hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - timer->timer.function = kvm_timer_expire; + hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + timer->bg_timer.function = kvm_bg_timer_expire; + + hrtimer_init(&timer->phys_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + timer->phys_timer.function = kvm_phys_timer_expire; vtimer->irq.irq = default_vtimer_irq.irq; ptimer->irq.irq = default_ptimer_irq.irq; @@ -520,10 +651,11 @@ static void kvm_timer_init_interrupt(void *info) int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) { struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); switch (regid) { case KVM_REG_ARM_TIMER_CTL: - vtimer->cnt_ctl = value; + vtimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT; break; case KVM_REG_ARM_TIMER_CNT: update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value); @@ -531,6 +663,13 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) case KVM_REG_ARM_TIMER_CVAL: vtimer->cnt_cval = value; break; + case KVM_REG_ARM_PTIMER_CTL: + ptimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT; + break; + case KVM_REG_ARM_PTIMER_CVAL: + ptimer->cnt_cval = value; + break; + default: return -1; } @@ -539,17 +678,38 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) return 0; } +static u64 read_timer_ctl(struct arch_timer_context *timer) +{ + /* + * Set ISTATUS bit if it's expired. + * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is + * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit + * regardless of ENABLE bit for our implementation convenience. + */ + if (!kvm_timer_compute_delta(timer)) + return timer->cnt_ctl | ARCH_TIMER_CTRL_IT_STAT; + else + return timer->cnt_ctl; +} + u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) { + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); switch (regid) { case KVM_REG_ARM_TIMER_CTL: - return vtimer->cnt_ctl; + return read_timer_ctl(vtimer); case KVM_REG_ARM_TIMER_CNT: return kvm_phys_timer_read() - vtimer->cntvoff; case KVM_REG_ARM_TIMER_CVAL: return vtimer->cnt_cval; + case KVM_REG_ARM_PTIMER_CTL: + return read_timer_ctl(ptimer); + case KVM_REG_ARM_PTIMER_CVAL: + return ptimer->cnt_cval; + case KVM_REG_ARM_PTIMER_CNT: + return kvm_phys_timer_read(); } return (u64)-1; } @@ -602,11 +762,20 @@ int kvm_timer_hyp_init(void) return err; } + err = irq_set_vcpu_affinity(host_vtimer_irq, kvm_get_running_vcpus()); + if (err) { + kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); + goto out_free_irq; + } + kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, "kvm/arm/timer:starting", kvm_timer_starting_cpu, kvm_timer_dying_cpu); + return 0; +out_free_irq: + free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus()); return err; } @@ -615,7 +784,8 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - timer_disarm(timer); + soft_timer_cancel(&timer->bg_timer, &timer->expired); + soft_timer_cancel(&timer->phys_timer, NULL); kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq); } @@ -691,7 +861,11 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) return ret; no_vgic: + preempt_disable(); timer->enabled = 1; + kvm_timer_vcpu_load_vgic(vcpu); + preempt_enable(); + return 0; } diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index b9f68e4add71..bc126fb99a3d 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -307,8 +307,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return kvm_timer_should_fire(vcpu_vtimer(vcpu)) || - kvm_timer_should_fire(vcpu_ptimer(vcpu)); + return kvm_timer_is_pending(vcpu); } void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) @@ -354,18 +353,18 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); kvm_arm_set_running_vcpu(vcpu); - kvm_vgic_load(vcpu); + kvm_timer_vcpu_load(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + kvm_timer_vcpu_put(vcpu); kvm_vgic_put(vcpu); vcpu->cpu = -1; kvm_arm_set_running_vcpu(NULL); - kvm_timer_vcpu_put(vcpu); } static void vcpu_power_off(struct kvm_vcpu *vcpu) @@ -654,11 +653,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_pmu_flush_hwstate(vcpu); - kvm_timer_flush_hwstate(vcpu); - kvm_vgic_flush_hwstate(vcpu); - local_irq_disable(); + kvm_vgic_flush_hwstate(vcpu); + /* * If we have a singal pending, or need to notify a userspace * irqchip about timer or PMU level changes, then we exit (and @@ -683,10 +681,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || kvm_request_pending(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; - local_irq_enable(); kvm_pmu_sync_hwstate(vcpu); kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); + local_irq_enable(); preempt_enable(); continue; } @@ -709,6 +707,27 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_arm_clear_debug(vcpu); + /* + * We must sync the PMU state before the vgic state so + * that the vgic can properly sample the updated state of the + * interrupt line. + */ + kvm_pmu_sync_hwstate(vcpu); + + /* + * Sync the vgic state before syncing the timer state because + * the timer code needs to know if the virtual timer + * interrupts are active. + */ + kvm_vgic_sync_hwstate(vcpu); + + /* + * Sync the timer hardware state before enabling interrupts as + * we don't want vtimer interrupts to race with syncing the + * timer virtual interrupt state. + */ + kvm_timer_sync_hwstate(vcpu); + /* * We may have taken a host interrupt in HYP mode (ie * while executing the guest). This interrupt is still @@ -732,16 +751,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) guest_exit(); trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); - /* - * We must sync the PMU and timer state before the vgic state so - * that the vgic can properly sample the updated state of the - * interrupt line. - */ - kvm_pmu_sync_hwstate(vcpu); - kvm_timer_sync_hwstate(vcpu); - - kvm_vgic_sync_hwstate(vcpu); - preempt_enable(); ret = handle_exit(vcpu, run, ret); diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c index 4734915ab71f..f39861639f08 100644 --- a/virt/kvm/arm/hyp/timer-sr.c +++ b/virt/kvm/arm/hyp/timer-sr.c @@ -21,58 +21,48 @@ #include -/* vcpu is already in the HYP VA space */ -void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) +void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high) +{ + u64 cntvoff = (u64)cntvoff_high << 32 | cntvoff_low; + write_sysreg(cntvoff, cntvoff_el2); +} + +void __hyp_text enable_el1_phys_timer_access(void) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); u64 val; - if (timer->enabled) { - vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); - vtimer->cnt_cval = read_sysreg_el0(cntv_cval); - } + /* Allow physical timer/counter access for the host */ + val = read_sysreg(cnthctl_el2); + val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; + write_sysreg(val, cnthctl_el2); +} - /* Disable the virtual timer */ - write_sysreg_el0(0, cntv_ctl); +void __hyp_text disable_el1_phys_timer_access(void) +{ + u64 val; + /* + * Disallow physical timer access for the guest + * Physical counter access is allowed + */ + val = read_sysreg(cnthctl_el2); + val &= ~CNTHCTL_EL1PCEN; + val |= CNTHCTL_EL1PCTEN; + write_sysreg(val, cnthctl_el2); +} + +void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) +{ /* * We don't need to do this for VHE since the host kernel runs in EL2 * with HCR_EL2.TGE ==1, which makes those bits have no impact. */ - if (!has_vhe()) { - /* Allow physical timer/counter access for the host */ - val = read_sysreg(cnthctl_el2); - val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; - write_sysreg(val, cnthctl_el2); - } - - /* Clear cntvoff for the host */ - write_sysreg(0, cntvoff_el2); + if (!has_vhe()) + enable_el1_phys_timer_access(); } -void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) +void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - u64 val; - - /* Those bits are already configured at boot on VHE-system */ - if (!has_vhe()) { - /* - * Disallow physical timer access for the guest - * Physical counter access is allowed - */ - val = read_sysreg(cnthctl_el2); - val &= ~CNTHCTL_EL1PCEN; - val |= CNTHCTL_EL1PCTEN; - write_sysreg(val, cnthctl_el2); - } - - if (timer->enabled) { - write_sysreg(vtimer->cntvoff, cntvoff_el2); - write_sysreg_el0(vtimer->cnt_cval, cntv_cval); - isb(); - write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl); - } + if (!has_vhe()) + disable_el1_phys_timer_access(); } diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index f51c1e1b3f70..40791c121710 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -278,6 +278,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser); u8 prop; int ret; + unsigned long flags; ret = kvm_read_guest(kvm, propbase + irq->intid - GIC_LPI_OFFSET, &prop, 1); @@ -285,15 +286,15 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, if (ret) return ret; - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); if (!filter_vcpu || filter_vcpu == irq->target_vcpu) { irq->priority = LPI_PROP_PRIORITY(prop); irq->enabled = LPI_PROP_ENABLE_BIT(prop); - vgic_queue_irq_unlock(kvm, irq); + vgic_queue_irq_unlock(kvm, irq, flags); } else { - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); } return 0; @@ -393,6 +394,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) int ret = 0; u32 *intids; int nr_irqs, i; + unsigned long flags; nr_irqs = vgic_copy_lpi_list(vcpu, &intids); if (nr_irqs < 0) @@ -420,9 +422,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) } irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = pendmask & (1U << bit_nr); - vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); vgic_put_irq(vcpu->kvm, irq); } @@ -515,6 +517,7 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, { struct kvm_vcpu *vcpu; struct its_ite *ite; + unsigned long flags; if (!its->enabled) return -EBUSY; @@ -530,9 +533,9 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, if (!vcpu->arch.vgic_cpu.lpis_enabled) return -EBUSY; - spin_lock(&ite->irq->irq_lock); + spin_lock_irqsave(&ite->irq->irq_lock, flags); ite->irq->pending_latch = true; - vgic_queue_irq_unlock(kvm, ite->irq); + vgic_queue_irq_unlock(kvm, ite->irq, flags); return 0; } @@ -894,7 +897,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, } /* Requires the its_lock to be held. */ -static void vgic_its_unmap_device(struct kvm *kvm, struct its_device *device) +static void vgic_its_free_device(struct kvm *kvm, struct its_device *device) { struct its_ite *ite, *temp; @@ -910,6 +913,24 @@ static void vgic_its_unmap_device(struct kvm *kvm, struct its_device *device) kfree(device); } +/* its lock must be held */ +static void vgic_its_free_device_list(struct kvm *kvm, struct vgic_its *its) +{ + struct its_device *cur, *temp; + + list_for_each_entry_safe(cur, temp, &its->device_list, dev_list) + vgic_its_free_device(kvm, cur); +} + +/* its lock must be held */ +static void vgic_its_free_collection_list(struct kvm *kvm, struct vgic_its *its) +{ + struct its_collection *cur, *temp; + + list_for_each_entry_safe(cur, temp, &its->collection_list, coll_list) + vgic_its_free_collection(its, cur->collection_id); +} + /* Must be called with its_lock mutex held */ static struct its_device *vgic_its_alloc_device(struct vgic_its *its, u32 device_id, gpa_t itt_addr, @@ -957,7 +978,7 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, * by removing the mapping and re-establishing it. */ if (device) - vgic_its_unmap_device(kvm, device); + vgic_its_free_device(kvm, device); /* * The spec does not say whether unmapping a not-mapped device @@ -1410,7 +1431,7 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm, unsigned long val) { const struct vgic_its_abi *abi = vgic_its_get_abi(its); - u64 entry_size, device_type; + u64 entry_size, table_type; u64 reg, *regptr, clearbits = 0; /* When GITS_CTLR.Enable is 1, we ignore write accesses. */ @@ -1421,12 +1442,12 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm, case 0: regptr = &its->baser_device_table; entry_size = abi->dte_esz; - device_type = GITS_BASER_TYPE_DEVICE; + table_type = GITS_BASER_TYPE_DEVICE; break; case 1: regptr = &its->baser_coll_table; entry_size = abi->cte_esz; - device_type = GITS_BASER_TYPE_COLLECTION; + table_type = GITS_BASER_TYPE_COLLECTION; clearbits = GITS_BASER_INDIRECT; break; default: @@ -1438,10 +1459,24 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm, reg &= ~clearbits; reg |= (entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT; - reg |= device_type << GITS_BASER_TYPE_SHIFT; + reg |= table_type << GITS_BASER_TYPE_SHIFT; reg = vgic_sanitise_its_baser(reg); *regptr = reg; + + if (!(reg & GITS_BASER_VALID)) { + /* Take the its_lock to prevent a race with a save/restore */ + mutex_lock(&its->its_lock); + switch (table_type) { + case GITS_BASER_TYPE_DEVICE: + vgic_its_free_device_list(kvm, its); + break; + case GITS_BASER_TYPE_COLLECTION: + vgic_its_free_collection_list(kvm, its); + break; + } + mutex_unlock(&its->its_lock); + } } static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, @@ -1612,46 +1647,17 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) return vgic_its_set_abi(its, NR_ITS_ABIS - 1); } -static void vgic_its_free_device(struct kvm *kvm, struct its_device *dev) -{ - struct its_ite *ite, *tmp; - - list_for_each_entry_safe(ite, tmp, &dev->itt_head, ite_list) - its_free_ite(kvm, ite); - list_del(&dev->dev_list); - kfree(dev); -} - static void vgic_its_destroy(struct kvm_device *kvm_dev) { struct kvm *kvm = kvm_dev->kvm; struct vgic_its *its = kvm_dev->private; - struct list_head *cur, *temp; - - /* - * We may end up here without the lists ever having been initialized. - * Check this and bail out early to avoid dereferencing a NULL pointer. - */ - if (!its->device_list.next) - return; mutex_lock(&its->its_lock); - list_for_each_safe(cur, temp, &its->device_list) { - struct its_device *dev; - dev = list_entry(cur, struct its_device, dev_list); - vgic_its_free_device(kvm, dev); - } + vgic_its_free_device_list(kvm, its); + vgic_its_free_collection_list(kvm, its); - list_for_each_safe(cur, temp, &its->collection_list) { - struct its_collection *coll; - - coll = list_entry(cur, struct its_collection, coll_list); - list_del(cur); - kfree(coll); - } mutex_unlock(&its->its_lock); - kfree(its); } @@ -2267,29 +2273,13 @@ static int vgic_its_restore_collection_table(struct vgic_its *its) */ static int vgic_its_save_tables_v0(struct vgic_its *its) { - struct kvm *kvm = its->dev->kvm; int ret; - mutex_lock(&kvm->lock); - mutex_lock(&its->its_lock); - - if (!lock_all_vcpus(kvm)) { - mutex_unlock(&its->its_lock); - mutex_unlock(&kvm->lock); - return -EBUSY; - } - ret = vgic_its_save_device_tables(its); if (ret) - goto out; + return ret; - ret = vgic_its_save_collection_table(its); - -out: - unlock_all_vcpus(kvm); - mutex_unlock(&its->its_lock); - mutex_unlock(&kvm->lock); - return ret; + return vgic_its_save_collection_table(its); } /** @@ -2299,29 +2289,13 @@ out: */ static int vgic_its_restore_tables_v0(struct vgic_its *its) { - struct kvm *kvm = its->dev->kvm; int ret; - mutex_lock(&kvm->lock); - mutex_lock(&its->its_lock); - - if (!lock_all_vcpus(kvm)) { - mutex_unlock(&its->its_lock); - mutex_unlock(&kvm->lock); - return -EBUSY; - } - ret = vgic_its_restore_collection_table(its); if (ret) - goto out; + return ret; - ret = vgic_its_restore_device_tables(its); -out: - unlock_all_vcpus(kvm); - mutex_unlock(&its->its_lock); - mutex_unlock(&kvm->lock); - - return ret; + return vgic_its_restore_device_tables(its); } static int vgic_its_commit_v0(struct vgic_its *its) @@ -2340,6 +2314,19 @@ static int vgic_its_commit_v0(struct vgic_its *its) return 0; } +static void vgic_its_reset(struct kvm *kvm, struct vgic_its *its) +{ + /* We need to keep the ABI specific field values */ + its->baser_coll_table &= ~GITS_BASER_VALID; + its->baser_device_table &= ~GITS_BASER_VALID; + its->cbaser = 0; + its->creadr = 0; + its->cwriter = 0; + its->enabled = 0; + vgic_its_free_device_list(kvm, its); + vgic_its_free_collection_list(kvm, its); +} + static int vgic_its_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { @@ -2354,6 +2341,8 @@ static int vgic_its_has_attr(struct kvm_device *dev, switch (attr->attr) { case KVM_DEV_ARM_VGIC_CTRL_INIT: return 0; + case KVM_DEV_ARM_ITS_CTRL_RESET: + return 0; case KVM_DEV_ARM_ITS_SAVE_TABLES: return 0; case KVM_DEV_ARM_ITS_RESTORE_TABLES: @@ -2366,6 +2355,41 @@ static int vgic_its_has_attr(struct kvm_device *dev, return -ENXIO; } +static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + int ret = 0; + + if (attr == KVM_DEV_ARM_VGIC_CTRL_INIT) /* Nothing to do */ + return 0; + + mutex_lock(&kvm->lock); + mutex_lock(&its->its_lock); + + if (!lock_all_vcpus(kvm)) { + mutex_unlock(&its->its_lock); + mutex_unlock(&kvm->lock); + return -EBUSY; + } + + switch (attr) { + case KVM_DEV_ARM_ITS_CTRL_RESET: + vgic_its_reset(kvm, its); + break; + case KVM_DEV_ARM_ITS_SAVE_TABLES: + ret = abi->save_tables(its); + break; + case KVM_DEV_ARM_ITS_RESTORE_TABLES: + ret = abi->restore_tables(its); + break; + } + + unlock_all_vcpus(kvm); + mutex_unlock(&its->its_lock); + mutex_unlock(&kvm->lock); + return ret; +} + static int vgic_its_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { @@ -2391,19 +2415,8 @@ static int vgic_its_set_attr(struct kvm_device *dev, return vgic_register_its_iodev(dev->kvm, its, addr); } - case KVM_DEV_ARM_VGIC_GRP_CTRL: { - const struct vgic_its_abi *abi = vgic_its_get_abi(its); - - switch (attr->attr) { - case KVM_DEV_ARM_VGIC_CTRL_INIT: - /* Nothing to do */ - return 0; - case KVM_DEV_ARM_ITS_SAVE_TABLES: - return abi->save_tables(its); - case KVM_DEV_ARM_ITS_RESTORE_TABLES: - return abi->restore_tables(its); - } - } + case KVM_DEV_ARM_VGIC_GRP_CTRL: + return vgic_its_ctrl(dev->kvm, its, attr->attr); case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: { u64 __user *uaddr = (u64 __user *)(long)attr->addr; u64 reg; diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index b3d4a10f09a1..e21e2f49b005 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -74,6 +74,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, int mode = (val >> 24) & 0x03; int c; struct kvm_vcpu *vcpu; + unsigned long flags; switch (mode) { case 0x0: /* as specified by targets */ @@ -97,11 +98,11 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = true; irq->source |= 1U << source_vcpu->vcpu_id; - vgic_queue_irq_unlock(source_vcpu->kvm, irq); + vgic_queue_irq_unlock(source_vcpu->kvm, irq, flags); vgic_put_irq(source_vcpu->kvm, irq); } } @@ -131,6 +132,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu, u32 intid = VGIC_ADDR_TO_INTID(addr, 8); u8 cpu_mask = GENMASK(atomic_read(&vcpu->kvm->online_vcpus) - 1, 0); int i; + unsigned long flags; /* GICD_ITARGETSR[0-7] are read-only */ if (intid < VGIC_NR_PRIVATE_IRQS) @@ -140,13 +142,13 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid + i); int target; - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->targets = (val >> (i * 8)) & cpu_mask; target = irq->targets ? __ffs(irq->targets) : 0; irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target); - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } } @@ -174,17 +176,18 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu, { u32 intid = addr & 0x0f; int i; + unsigned long flags; for (i = 0; i < len; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->source &= ~((val >> (i * 8)) & 0xff); if (!irq->source) irq->pending_latch = false; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } } @@ -195,19 +198,20 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, { u32 intid = addr & 0x0f; int i; + unsigned long flags; for (i = 0; i < len; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->source |= (val >> (i * 8)) & 0xff; if (irq->source) { irq->pending_latch = true; - vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); } else { - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); } vgic_put_irq(vcpu->kvm, irq); } diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 408ef06638fc..83786108829e 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -129,6 +129,7 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, { int intid = VGIC_ADDR_TO_INTID(addr, 64); struct vgic_irq *irq; + unsigned long flags; /* The upper word is WI for us since we don't implement Aff3. */ if (addr & 4) @@ -139,13 +140,13 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, if (!irq) return; - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); /* We only care about and preserve Aff0, Aff1 and Aff2. */ irq->mpidr = val & GENMASK(23, 0); irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr); - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } @@ -241,11 +242,12 @@ static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; + unsigned long flags; for (i = 0; i < len * 8; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); if (test_bit(i, &val)) { /* * pending_latch is set irrespective of irq type @@ -253,10 +255,10 @@ static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, * restore irq config before pending info. */ irq->pending_latch = true; - vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); } else { irq->pending_latch = false; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); } vgic_put_irq(vcpu->kvm, irq); @@ -799,6 +801,7 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) int sgi, c; int vcpu_id = vcpu->vcpu_id; bool broadcast; + unsigned long flags; sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT; broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT); @@ -837,10 +840,10 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = true; - vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); vgic_put_irq(vcpu->kvm, irq); } } diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index c1e4bdd66131..deb51ee16a3d 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -69,13 +69,14 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; + unsigned long flags; for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->enabled = true; - vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); vgic_put_irq(vcpu->kvm, irq); } @@ -87,15 +88,16 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; + unsigned long flags; for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->enabled = false; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } } @@ -126,14 +128,15 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; + unsigned long flags; for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = true; - vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); vgic_put_irq(vcpu->kvm, irq); } } @@ -144,15 +147,16 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; + unsigned long flags; for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = false; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } } @@ -181,7 +185,8 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool new_active_state) { struct kvm_vcpu *requester_vcpu; - spin_lock(&irq->irq_lock); + unsigned long flags; + spin_lock_irqsave(&irq->irq_lock, flags); /* * The vcpu parameter here can mean multiple things depending on how @@ -216,9 +221,9 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, irq->active = new_active_state; if (new_active_state) - vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); else - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); } /* @@ -352,14 +357,15 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 8); int i; + unsigned long flags; for (i = 0; i < len; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); /* Narrow the priority range to what we actually support */ irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS); - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } @@ -390,6 +396,7 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 2); int i; + unsigned long flags; for (i = 0; i < len * 4; i++) { struct vgic_irq *irq; @@ -404,14 +411,14 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, continue; irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); if (test_bit(i * 2 + 1, &val)) irq->config = VGIC_CONFIG_EDGE; else irq->config = VGIC_CONFIG_LEVEL; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } } @@ -443,6 +450,7 @@ void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, { int i; int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; + unsigned long flags; for (i = 0; i < 32; i++) { struct vgic_irq *irq; @@ -459,12 +467,12 @@ void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, * restore irq config before line level. */ new_level = !!(val & (1U << i)); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->line_level = new_level; if (new_level) - vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); else - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index e4187e52bb26..80897102da26 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -62,6 +62,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2; int lr; + unsigned long flags; cpuif->vgic_hcr &= ~GICH_HCR_UIE; @@ -77,7 +78,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) irq = vgic_get_irq(vcpu->kvm, vcpu, intid); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); /* Always preserve the active bit */ irq->active = !!(val & GICH_LR_ACTIVE_BIT); @@ -104,7 +105,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) irq->pending_latch = false; } - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 96ea597db0e7..863351c090d8 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -44,6 +44,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; u32 model = vcpu->kvm->arch.vgic.vgic_model; int lr; + unsigned long flags; cpuif->vgic_hcr &= ~ICH_HCR_UIE; @@ -66,7 +67,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) if (!irq) /* An LPI could have been unmapped. */ continue; - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); /* Always preserve the active bit */ irq->active = !!(val & ICH_LR_ACTIVE_BIT); @@ -94,7 +95,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) irq->pending_latch = false; } - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } @@ -278,6 +279,7 @@ int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) bool status; u8 val; int ret; + unsigned long flags; retry: vcpu = irq->target_vcpu; @@ -296,13 +298,13 @@ retry: status = val & (1 << bit_nr); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); if (irq->target_vcpu != vcpu) { - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); goto retry; } irq->pending_latch = status; - vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); if (status) { /* clear consumed data */ diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index fed717e07938..e54ef2fdf73d 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -53,6 +53,10 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { * vcpuX->vcpu_id < vcpuY->vcpu_id: * spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock); * spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock); + * + * Since the VGIC must support injecting virtual interrupts from ISRs, we have + * to use the spin_lock_irqsave/spin_unlock_irqrestore versions of outer + * spinlocks for any lock that may be taken while injecting an interrupt. */ /* @@ -261,7 +265,8 @@ static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owne * Needs to be entered with the IRQ lock already held, but will return * with all locks dropped. */ -bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq) +bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, + unsigned long flags) { struct kvm_vcpu *vcpu; @@ -279,7 +284,7 @@ retry: * not need to be inserted into an ap_list and there is also * no more work for us to do. */ - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); /* * We have to kick the VCPU here, because we could be @@ -301,11 +306,11 @@ retry: * We must unlock the irq lock to take the ap_list_lock where * we are going to insert this new pending interrupt. */ - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); /* someone can do stuff here, which we re-check below */ - spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); + spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags); spin_lock(&irq->irq_lock); /* @@ -322,9 +327,9 @@ retry: if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) { spin_unlock(&irq->irq_lock); - spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); + spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); goto retry; } @@ -337,7 +342,7 @@ retry: irq->vcpu = vcpu; spin_unlock(&irq->irq_lock); - spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); + spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags); kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); @@ -367,6 +372,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, { struct kvm_vcpu *vcpu; struct vgic_irq *irq; + unsigned long flags; int ret; trace_vgic_update_irq_pending(cpuid, intid, level); @@ -383,11 +389,11 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, if (!irq) return -EINVAL; - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); if (!vgic_validate_injection(irq, level, owner)) { /* Nothing to see here, move along... */ - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(kvm, irq); return 0; } @@ -397,7 +403,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, else irq->pending_latch = true; - vgic_queue_irq_unlock(kvm, irq); + vgic_queue_irq_unlock(kvm, irq, flags); vgic_put_irq(kvm, irq); return 0; @@ -406,15 +412,16 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); + unsigned long flags; BUG_ON(!irq); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->hw = true; irq->hwintid = phys_irq; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); return 0; @@ -423,6 +430,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq) int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) { struct vgic_irq *irq; + unsigned long flags; if (!vgic_initialized(vcpu->kvm)) return -EAGAIN; @@ -430,12 +438,12 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); BUG_ON(!irq); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->hw = false; irq->hwintid = 0; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); return 0; @@ -486,9 +494,10 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_irq *irq, *tmp; + unsigned long flags; retry: - spin_lock(&vgic_cpu->ap_list_lock); + spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB; @@ -528,7 +537,7 @@ retry: /* This interrupt looks like it has to be migrated. */ spin_unlock(&irq->irq_lock); - spin_unlock(&vgic_cpu->ap_list_lock); + spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); /* * Ensure locking order by always locking the smallest @@ -542,7 +551,7 @@ retry: vcpuB = vcpu; } - spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock); + spin_lock_irqsave(&vcpuA->arch.vgic_cpu.ap_list_lock, flags); spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock, SINGLE_DEPTH_NESTING); spin_lock(&irq->irq_lock); @@ -566,11 +575,11 @@ retry: spin_unlock(&irq->irq_lock); spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock); - spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock); + spin_unlock_irqrestore(&vcpuA->arch.vgic_cpu.ap_list_lock, flags); goto retry; } - spin_unlock(&vgic_cpu->ap_list_lock); + spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); } static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) @@ -703,6 +712,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) return; + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); + spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); vgic_flush_lr_state(vcpu); spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); @@ -735,11 +746,12 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_irq *irq; bool pending = false; + unsigned long flags; if (!vcpu->kvm->arch.vgic.enabled) return false; - spin_lock(&vgic_cpu->ap_list_lock); + spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { spin_lock(&irq->irq_lock); @@ -750,7 +762,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) break; } - spin_unlock(&vgic_cpu->ap_list_lock); + spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); return pending; } @@ -776,10 +788,14 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); bool map_is_active; + unsigned long flags; - spin_lock(&irq->irq_lock); + if (!vgic_initialized(vcpu->kvm)) + return false; + + spin_lock_irqsave(&irq->irq_lock, flags); map_is_active = irq->hw && irq->active; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); return map_is_active; diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index bf9ceab67c77..4f8aecb07ae6 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -140,7 +140,8 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 intid); void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); -bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq); +bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, + unsigned long flags); void vgic_kick_vcpus(struct kvm *kvm); int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,