diff --git a/Makefile b/Makefile index 994b315..3c35ae6 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 154 -EXTRAVERSION = -3.13 +SUBLEVEL = 170 +EXTRAVERSION = -3.18 NAME = Kleptomaniac Octopus # *DOCUMENTATION* diff --git a/arch/e2k/include/asm-l/apic.h b/arch/e2k/include/asm-l/apic.h index e266988..96c986a 100644 --- a/arch/e2k/include/asm-l/apic.h +++ b/arch/e2k/include/asm-l/apic.h @@ -739,6 +739,8 @@ default_check_phys_apicid_present(int phys_apicid) { return __default_check_phys_apicid_present(phys_apicid); } + +extern bool default_check_phys_apicid_online(void); #else extern int default_cpu_present_to_apicid(int mps_cpu); extern int default_check_phys_apicid_present(int phys_apicid); diff --git a/arch/e2k/include/asm-l/l_pmc.h b/arch/e2k/include/asm-l/l_pmc.h index f9006a9..86f8685 100644 --- a/arch/e2k/include/asm-l/l_pmc.h +++ b/arch/e2k/include/asm-l/l_pmc.h @@ -134,7 +134,9 @@ extern unsigned int load_threshold; int spmc_get_temp_cur0(void) { return SPMC_TEMP_BAD_VALUE; } #endif /* CONFIG_L_PMC || CONFIG_S2_PMC */ - +#if defined(CONFIG_PMC_R2KP) +uint32_t r2kp_get_freq_mult(int cpu); +#endif #endif /* __L_ASM_PMC_H__ */ diff --git a/arch/e2k/include/asm-l/mpspec.h b/arch/e2k/include/asm-l/mpspec.h index c5f810c..45eb647 100644 --- a/arch/e2k/include/asm-l/mpspec.h +++ b/arch/e2k/include/asm-l/mpspec.h @@ -630,6 +630,7 @@ static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) #define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} } extern physid_mask_t phys_cpu_present_map; +extern physid_mask_t phys_cpu_offline_map; #endif /* __ASSEMBLY__ */ diff --git a/arch/e2k/include/asm-l/serial.h b/arch/e2k/include/asm-l/serial.h index c5f7f33..aed17ac 100644 --- a/arch/e2k/include/asm-l/serial.h +++ b/arch/e2k/include/asm-l/serial.h @@ -23,15 +23,6 @@ #define STD_COM4_FLAGS UPF_BOOT_AUTOCONF #endif -#ifdef CONFIG_E2K -#define SERIAL_PORT_DFNS \ - /* UART CLK PORT IRQ FLAGS */ \ - { 0, BASE_BAUD, 0x3F8, 4, STD_COM_FLAGS }, /* ttyS0 */ \ - { 0, BASE_BAUD, 0x2F8, 3, STD_COM_FLAGS }, /* ttyS1 */ \ - { 0, BASE_BAUD, 0x3E8, 4, STD_COM_FLAGS }, /* ttyS2 */ \ - { 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS }, /* ttyS3 */ -#endif - #define AM85C30_RES_Tx_P 0x28 #define AM85C30_EXT_INT_ENAB 0x01 #define AM85C30_TxINT_ENAB 0x02 diff --git a/arch/e2k/include/asm/copy-hw-stacks.h b/arch/e2k/include/asm/copy-hw-stacks.h index 5247cc0..ffdc936 100644 --- a/arch/e2k/include/asm/copy-hw-stacks.h +++ b/arch/e2k/include/asm/copy-hw-stacks.h @@ -244,12 +244,12 @@ kernel_hw_stack_frames_copy(u64 *dst, const u64 *src, unsigned long size) native_kernel_hw_stack_frames_copy(dst, src, size); } static __always_inline void -collapse_kernel_pcs(u64 *dst, const u64 *src, u64 spilled_size) +collapse_kernel_pcs(pt_regs_t *regs, u64 *dst, const u64 *src, u64 spilled_size) { native_collapse_kernel_pcs(dst, src, spilled_size); } static __always_inline void -collapse_kernel_ps(u64 *dst, const u64 *src, u64 spilled_size) +collapse_kernel_ps(pt_regs_t *regs, u64 *dst, const u64 *src, u64 spilled_size) { native_collapse_kernel_ps(dst, src, spilled_size); } @@ -601,7 +601,8 @@ native_user_hw_stacks_copy(struct e2k_stacks *stacks, return 0; } -static inline void collapse_kernel_hw_stacks(struct e2k_stacks *stacks) +static inline void collapse_kernel_hw_stacks(pt_regs_t *regs, + struct e2k_stacks *stacks) { e2k_pcsp_lo_t k_pcsp_lo = current_thread_info()->k_pcsp_lo; e2k_psp_lo_t k_psp_lo = current_thread_info()->k_psp_lo; @@ -635,7 +636,7 @@ static inline void collapse_kernel_hw_stacks(struct e2k_stacks *stacks) if (spilled_pc_size) { dst = (u64 *) AS(k_pcsp_lo).base; src = (u64 *) (AS(k_pcsp_lo).base + spilled_pc_size); - collapse_kernel_pcs(dst, src, spilled_pc_size); + collapse_kernel_pcs(regs, dst, src, spilled_pc_size); stacks->pcshtp = SZ_OF_CR; @@ -645,7 +646,7 @@ static inline void collapse_kernel_hw_stacks(struct e2k_stacks *stacks) if (spilled_p_size) { dst = (u64 *) AS(k_psp_lo).base; src = (u64 *) (AS(k_psp_lo).base + spilled_p_size); - collapse_kernel_ps(dst, src, spilled_p_size); + collapse_kernel_ps(regs, dst, src, spilled_p_size); AS(pshtp).ind = 0; stacks->pshtp = pshtp; @@ -823,7 +824,7 @@ static inline int do_user_hw_stacks_copy_full(struct e2k_stacks *stacks, * this way we can later FILL using return trick (otherwise there * would be no space in chain stack for the trick). */ - collapse_kernel_hw_stacks(stacks); + collapse_kernel_hw_stacks(regs, stacks); /* * Copy saved %cr registers diff --git a/arch/e2k/include/asm/cpu_features.h b/arch/e2k/include/asm/cpu_features.h index e8a53d7..91940b6 100644 --- a/arch/e2k/include/asm/cpu_features.h +++ b/arch/e2k/include/asm/cpu_features.h @@ -34,6 +34,8 @@ enum { CPU_NO_HWBUG_SOFT_WAIT, CPU_HWBUG_SOFT_WAIT_E8C2, CPU_HWBUG_C3, + CPU_HWBUG_HRET_INTC_CU, + CPU_HWBUG_INTC_CR_WRITE, /* Features, not bugs */ CPU_FEAT_EPIC, diff --git a/arch/e2k/include/asm/e2k-iommu.h b/arch/e2k/include/asm/e2k-iommu.h index 5d619ec..02c5ab1 100644 --- a/arch/e2k/include/asm/e2k-iommu.h +++ b/arch/e2k/include/asm/e2k-iommu.h @@ -1,6 +1,11 @@ #ifndef __ASM_E2K_IOMMU_H #define __ASM_E2K_IOMMU_H +#ifdef CONFIG_EPIC +extern void e2k_iommu_error_interrupt(void); +#else +static inline void e2k_iommu_error_interrupt(void) {} +#endif extern int iommu_panic_off; extern void e2k_iommu_error_interrupt(void); diff --git a/arch/e2k/include/asm/e2k_api.h b/arch/e2k/include/asm/e2k_api.h index 9ae2795..45966f9 100644 --- a/arch/e2k/include/asm/e2k_api.h +++ b/arch/e2k/include/asm/e2k_api.h @@ -924,7 +924,6 @@ _Pragma("no_asm_inline") \ NATIVE_SET_DSREG_CLOSED_NOEXC(reg, (val), 7) #endif - /* * bug #97048 * Closed GNU asm is used for rarely read registers. @@ -1007,6 +1006,24 @@ _Pragma("no_asm_inline") \ : "ri" ((__e2k_u64_t) (val))); \ }) +/* Add ctpr3 clobber to avoid writing CRs between return and ct */ +#define NATIVE_SET_CR_CLOSED_NOEXC(reg_mnemonic, val) \ +({ \ + asm volatile ( \ + ALTERNATIVE_1_ALTINSTR \ + /* CPU_HWBUG_INTC_CR_WRITE version */ \ + "{wait ma_c=1\n" \ + "rwd %0, %%" #reg_mnemonic "}" \ + ALTERNATIVE_2_OLDINSTR \ + /* Default version */ \ + "{rwd %0, %%" #reg_mnemonic "}" \ + ALTERNATIVE_3_FEATURE(%[facility]) \ + : \ + : "ri" ((__e2k_u64_t) (val)), \ + [facility] "i" (CPU_HWBUG_INTC_CR_WRITE) \ + : "ctpr3"); \ +}) + #define NATIVE_SET_DSREGS_CLOSED_NOEXC(reg_mnemonic_lo, reg_mnemonic_hi, \ _val_lo, _val_hi, nop) \ ({ \ @@ -6689,16 +6706,38 @@ do { \ /* Clobbers "ctpr" are here to tell lcc that there is a return inside */ #define E2K_HRET_CLOBBERS "ctpr1", "ctpr2", "ctpr3" +#define E2K_HRET_READ_INTC_PTR_CU \ + ".word 0x04100011\n" /* rrd,0 %intc_ptr_cu, %dr0 */ \ + ".word 0x3f65c080\n" \ + ".word 0x01c00000\n" \ + ".word 0x00000000\n" + +#define E2K_HRET_CLEAR_INTC_INFO_CU \ + ".word 0x04100291\n" /* nop 5 */ \ + ".word 0x3dc0c064\n" /* rwd,0 0x0, %intc_info_cu */ \ + ".word 0x01c00000\n" \ + ".word 0x00000000\n" + #define E2K_HRET(_ret) \ do { \ asm volatile ( \ + ALTERNATIVE_1_ALTINSTR \ + /* CPU_HWBUG_HRET_INTC_CU version */ \ + E2K_HRET_READ_INTC_PTR_CU \ + E2K_HRET_CLEAR_INTC_INFO_CU \ + E2K_HRET_CLEAR_INTC_INFO_CU \ + E2K_HRET_READ_INTC_PTR_CU \ + ALTERNATIVE_2_OLDINSTR \ + /* Default version */ \ + ALTERNATIVE_3_FEATURE(%[facility]) \ "addd 0x0, %[ret], %%r0\n" \ "{.word 0x00005012\n" /* HRET */ \ " .word 0xc0000020\n" \ " .word 0x30000003\n" \ " .word 0x00000000}\n" \ : \ - : [ret] "ir" (_ret) \ + : [facility] "i" (CPU_HWBUG_HRET_INTC_CU), \ + [ret] "ir" (_ret) \ : E2K_HRET_CLOBBERS); \ unreachable(); \ } while (0) diff --git a/arch/e2k/include/asm/e2k_debug.h b/arch/e2k/include/asm/e2k_debug.h index 074cd8c..a98067d 100644 --- a/arch/e2k/include/asm/e2k_debug.h +++ b/arch/e2k/include/asm/e2k_debug.h @@ -32,7 +32,7 @@ }) extern void print_stack_frames(struct task_struct *task, - struct pt_regs *pt_regs, int show_reg_window) __cold; + const struct pt_regs *pt_regs, int show_reg_window) __cold; extern void print_mmap(struct task_struct *task) __cold; extern void print_va_tlb(e2k_addr_t addr, int large_page) __cold; extern void print_all_TC(const trap_cellar_t *TC, int TC_count) __cold; @@ -862,6 +862,18 @@ do { \ current->comm, current->pid, ##__VA_ARGS__); \ } while (0) +extern void __debug_signal_print(const char *message, + struct pt_regs *regs, bool print_stack) __cold; + +static inline void debug_signal_print(const char *message, + struct pt_regs *regs, bool print_stack) +{ + if (likely(!debug_signal)) + return; + + __debug_signal_print(message, regs, print_stack); +} + extern int debug_trap; #endif /* !(__ASSEMBLY__) */ diff --git a/arch/e2k/include/asm/kdebug.h b/arch/e2k/include/asm/kdebug.h index b90ddc4..1178c31 100644 --- a/arch/e2k/include/asm/kdebug.h +++ b/arch/e2k/include/asm/kdebug.h @@ -11,11 +11,6 @@ enum die_val { DIE_BREAKPOINT }; -extern void printk_address(unsigned long address, int reliable) __cold; -extern void show_trace(struct task_struct *t, struct pt_regs *regs, - unsigned long *sp, unsigned long bp) __cold; -extern void __show_regs(struct pt_regs *regs, int all) __cold; -extern void show_regs(struct pt_regs *regs) __cold; extern void die(const char *str, struct pt_regs *regs, long err) __cold; #endif /* _ASM_E2K_KDEBUG_H */ diff --git a/arch/e2k/include/asm/kvm/cpu_hv_regs_access.h b/arch/e2k/include/asm/kvm/cpu_hv_regs_access.h index 175592c..2b490cd 100644 --- a/arch/e2k/include/asm/kvm/cpu_hv_regs_access.h +++ b/arch/e2k/include/asm/kvm/cpu_hv_regs_access.h @@ -201,15 +201,24 @@ static inline void write_SH_CORE_MODE_reg(e2k_core_mode_t core_mode) #endif /* CONFIG_VIRTUALIZATION */ #define READ_G_PREEMPT_TMR_REG() \ - ((e2k_g_preempt_tmr_t) NATIVE_GET_SREG_CLOSED(g_preempt_tmr)) + ((g_preempt_tmr_t) NATIVE_GET_DSREG_CLOSED(g_preempt_tmr)) #define WRITE_G_PREEMPT_TMR_REG(x) \ - NATIVE_SET_SREG_CLOSED_NOEXC(g_preempt_tmr, AW(x), 5) + NATIVE_SET_DSREG_CLOSED_NOEXC(g_preempt_tmr, AW(x), 5) #define READ_INTC_PTR_CU() NATIVE_GET_DSREG_CLOSED(intc_ptr_cu) #define READ_INTC_INFO_CU() NATIVE_GET_DSREG_CLOSED(intc_info_cu) #define WRITE_INTC_INFO_CU(x) \ NATIVE_SET_DSREG_CLOSED_NOEXC(intc_info_cu, x, 5) +/* Clear INTC_INFO_CU header and INTC_PTR_CU */ +static inline void clear_intc_info_cu(void) +{ + READ_INTC_PTR_CU(); + WRITE_INTC_INFO_CU(0ULL); + WRITE_INTC_INFO_CU(0ULL); + READ_INTC_PTR_CU(); +} + static inline void save_intc_info_cu(intc_info_cu_t *info, int *num) { u64 info_ptr, i = 0; @@ -227,14 +236,8 @@ static inline void save_intc_info_cu(intc_info_cu_t *info, int *num) return; } - /* - * CU header should be cleared --- fg@mcst.ru - */ AW(info->header.lo) = READ_INTC_INFO_CU(); AW(info->header.hi) = READ_INTC_INFO_CU(); - READ_INTC_PTR_CU(); - WRITE_INTC_INFO_CU(0ULL); - WRITE_INTC_INFO_CU(0ULL); info_ptr -= 2; /* @@ -254,17 +257,16 @@ static inline void restore_intc_info_cu(const intc_info_cu_t *info, int num) { int i; - /* - * 1) Clear the hardware pointer - */ + /* Clear the pointer, in case we just migrated to new cpu */ READ_INTC_PTR_CU(); - if (num == -1) + + /* Header will be cleared by hardware during GLAUNCH */ + if (num == -1 || num == 0) return; /* - * 2) Write the registers - * - * CU header should be cleared --- fg@mcst.ru + * Restore intercepted events. Header flags aren't used for reexecution, + * so restore 0 in header. */ WRITE_INTC_INFO_CU(0ULL); WRITE_INTC_INFO_CU(0ULL); diff --git a/arch/e2k/include/asm/kvm/ctx_signal_stacks.h b/arch/e2k/include/asm/kvm/ctx_signal_stacks.h new file mode 100644 index 0000000..f19e3c8 --- /dev/null +++ b/arch/e2k/include/asm/kvm/ctx_signal_stacks.h @@ -0,0 +1,33 @@ +/* + * arch/e2k/include/asm/kvm/ctx_signal_stacks.h + * + * This file contains interfaces for managing of separate signal stacks + * for guest's contexts + * + * Copyright 2022 Andrey Alekhin (Andrey.I.Alekhin@mcst.ru) + */ + +#ifndef CTX_SIGNAL_STACKS +#define CTX_SIGNAL_STACKS + +#include + +#include + +enum { + CTX_STACK_READY = 0U, /* Stack is free to take */ + CTX_STACK_BUSY = 1U, /* Stack is currently busy by thread */ + CTX_STACK_COPYING = 2U /* Stack is being copied in fork() */ +}; + +struct rhashtable *alloc_gst_ctx_sig_stacks_ht(void); +void free_gst_ctx_sig_stacks_ht(struct rhashtable *ht); +struct rhashtable *copy_gst_ctx_sig_stacks_ht(void); +int add_gst_ctx_signal_stack(struct rhashtable *ht, + struct signal_stack *signal_stack, + u64 key, int state); +void remove_gst_ctx_signal_stack(u64 key); +int switch_gst_ctx_signal_stack(u64 to_key); +int update_curr_gst_signal_stack(void); + +#endif /* CTX_SIGNAL_STACKS */ diff --git a/arch/e2k/include/asm/kvm/guest/copy-hw-stacks.h b/arch/e2k/include/asm/kvm/guest/copy-hw-stacks.h index 94ece6b..d74c53c 100644 --- a/arch/e2k/include/asm/kvm/guest/copy-hw-stacks.h +++ b/arch/e2k/include/asm/kvm/guest/copy-hw-stacks.h @@ -34,7 +34,7 @@ kvm_kernel_hw_stack_frames_copy(u64 *dst, const u64 *src, unsigned long size) } static __always_inline void -kvm_collapse_kernel_ps(u64 *dst, const u64 *src, u64 spilled_size) +kvm_collapse_kernel_ps(pt_regs_t *regs, u64 *dst, const u64 *src, u64 spilled_size) { e2k_psp_hi_t k_psp_hi; u64 ps_ind, ps_size; @@ -55,6 +55,8 @@ kvm_collapse_kernel_ps(u64 *dst, const u64 *src, u64 spilled_size) k_psp_hi = NATIVE_NV_READ_PSP_HI_REG(); k_psp_hi.PSP_hi_ind = size; HYPERVISOR_update_psp_hi(k_psp_hi.PSP_hi_half); + BUG_ON(regs->copyed.ps_size < spilled_size); + regs->copyed.ps_size -= spilled_size; DebugUST("move spilled procedure part from host top %px to " "bottom %px, size 0x%llx\n", @@ -65,7 +67,7 @@ kvm_collapse_kernel_ps(u64 *dst, const u64 *src, u64 spilled_size) } static __always_inline void -kvm_collapse_kernel_pcs(u64 *dst, const u64 *src, u64 spilled_size) +kvm_collapse_kernel_pcs(pt_regs_t *regs, u64 *dst, const u64 *src, u64 spilled_size) { e2k_pcsp_hi_t k_pcsp_hi; u64 pcs_ind, pcs_size; @@ -86,6 +88,8 @@ kvm_collapse_kernel_pcs(u64 *dst, const u64 *src, u64 spilled_size) k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG(); k_pcsp_hi.PCSP_hi_ind = size; HYPERVISOR_update_pcsp_hi(k_pcsp_hi.PCSP_hi_half); + BUG_ON(regs->copyed.pcs_size < spilled_size); + regs->copyed.pcs_size -= spilled_size; DebugUST("move spilled chain part from host top %px to " "bottom %px, size 0x%llx\n", @@ -504,7 +508,8 @@ kvm_copy_injected_pcs_frames_to_user(pt_regs_t *regs, int frames_num) ATOMIC_GET_HW_PCS_SIZES_BASE_TOP(pcs_ind, pcs_size, pcs_base, pcsh_top); /* guest user stacks part spilled to kernel should be already copyed */ - BUG_ON(PCSHTP_SIGN_EXTEND(regs->copyed.pcs_size != stacks->pcshtp)); + BUG_ON(PCSHTP_SIGN_EXTEND(regs->copyed.pcs_size != stacks->pcshtp && + stacks->pcshtp != SZ_OF_CR)); src = (void *)(pcs_base + regs->copyed.pcs_size); DebugUST("chain stack at kernel from %px, size 0x%lx + 0x%lx, " @@ -647,15 +652,15 @@ kernel_hw_stack_frames_copy(u64 *dst, const u64 *src, unsigned long size) } static __always_inline void -collapse_kernel_ps(u64 *dst, const u64 *src, u64 spilled_size) +collapse_kernel_ps(pt_regs_t *regs, u64 *dst, const u64 *src, u64 spilled_size) { - kvm_collapse_kernel_ps(dst, src, spilled_size); + kvm_collapse_kernel_ps(regs, dst, src, spilled_size); } static __always_inline void -collapse_kernel_pcs(u64 *dst, const u64 *src, u64 spilled_size) +collapse_kernel_pcs(pt_regs_t *regs, u64 *dst, const u64 *src, u64 spilled_size) { - kvm_collapse_kernel_pcs(dst, src, spilled_size); + kvm_collapse_kernel_pcs(regs, dst, src, spilled_size); } static __always_inline int diff --git a/arch/e2k/include/asm/kvm/guest/proc_context_stacks.h b/arch/e2k/include/asm/kvm/guest/proc_context_stacks.h new file mode 100644 index 0000000..ef7f5f1 --- /dev/null +++ b/arch/e2k/include/asm/kvm/guest/proc_context_stacks.h @@ -0,0 +1,78 @@ +#ifndef KVM_GUEST_PROC_CTXT_STACKS +#define KVM_GUEST_PROC_CTXT_STACKS + +#include + +#include +#include +#include +#include + +static inline int +kvm_mkctxt_prepare_hw_user_stacks(void (*user_func)(void), void *args, + u64 args_size, size_t d_stack_sz, + bool protected, void *ps_frames, + e2k_mem_crs_t *cs_frames) +{ + unsigned long ps_frames_k, cs_frames_k; + struct page *pg_ps_frames, *pg_cs_frames; + int ret; + + /* Get kernel address for procedure stack */ + pg_ps_frames = get_user_addr_to_kernel_page((unsigned long)ps_frames); + if (IS_ERR_OR_NULL(pg_ps_frames)) + ret = (IS_ERR(pg_ps_frames)) ? PTR_ERR(pg_ps_frames) : -EINVAL; + else + ps_frames_k = ((unsigned long)page_address(pg_ps_frames)) + + (((unsigned long)ps_frames) & ~PAGE_MASK); + + /* Get kernel address for chain stack */ + pg_cs_frames = get_user_addr_to_kernel_page((unsigned long)cs_frames); + if (IS_ERR_OR_NULL(pg_cs_frames)) + ret |= (IS_ERR(pg_cs_frames)) ? PTR_ERR(pg_cs_frames) : -EINVAL; + else + cs_frames_k = ((unsigned long)page_address(pg_cs_frames)) + + (((unsigned long)cs_frames) & ~PAGE_MASK); + + if (ret) + return ret; + + kvm_proc_ctxt_hw_stacks_t hw_stacks = { + .user_func = user_func, + .args = args, + .args_size = args_size, + .d_stack_sz = d_stack_sz, + .protected = protected, + .gst_mkctxt_trampoline = (u64)&kvm_guest_mkctxt_trampoline, + .ps_frames = (void *)ps_frames_k, + .cs_frames = (e2k_mem_crs_t *)cs_frames_k + }; + + ret = HYPERVISOR_prepare_mkctxt_hw_user_stacks(&hw_stacks); + + put_user_addr_to_kernel_page(pg_ps_frames); + put_user_addr_to_kernel_page(pg_cs_frames); + + return ret; +} + +static inline int +mkctxt_prepare_hw_user_stacks(void (*user_func)(void), void *args, + u64 args_size, size_t d_stack_sz, + bool protected, void *ps_frames, + e2k_mem_crs_t *cs_frames) +{ + if (IS_HV_GM()) { + return native_mkctxt_prepare_hw_user_stacks(user_func, args, + args_size, d_stack_sz, + protected, ps_frames, + cs_frames); + } else { + return kvm_mkctxt_prepare_hw_user_stacks(user_func, args, + args_size, d_stack_sz, + protected, ps_frames, + cs_frames); + } +} + +#endif /* KVM_GUEST_PROC_CTXT_STACKS */ diff --git a/arch/e2k/include/asm/kvm/guest/regs_state.h b/arch/e2k/include/asm/kvm/guest/regs_state.h index 22b1ebb..1f1e090 100644 --- a/arch/e2k/include/asm/kvm/guest/regs_state.h +++ b/arch/e2k/include/asm/kvm/guest/regs_state.h @@ -333,6 +333,12 @@ do { \ #define RESTORE_COMMON_REGS(regs) \ KVM_RESTORE_COMMON_REGS(regs) +#define CLEAR_DAM \ +({ \ + if (IS_HV_GM()) \ + NATIVE_CLEAR_DAM; \ +}) + static inline void save_glob_regs_v3(global_regs_t *gregs) { diff --git a/arch/e2k/include/asm/kvm/guest/signal.h b/arch/e2k/include/asm/kvm/guest/signal.h index f25133a..2ab6c48 100644 --- a/arch/e2k/include/asm/kvm/guest/signal.h +++ b/arch/e2k/include/asm/kvm/guest/signal.h @@ -8,7 +8,12 @@ extern int kvm_signal_setup(struct pt_regs *regs); extern int kvm_longjmp_copy_user_to_kernel_hw_stacks(struct pt_regs *regs, struct pt_regs *new_regs); -extern int kvm_complete_long_jump(struct pt_regs *regs); +extern int kvm_complete_long_jump(struct pt_regs *regs, bool switch_stack, + u64 to_key); +extern void kvm_update_kernel_crs(e2k_mem_crs_t *crs, e2k_mem_crs_t *prev_crs, + e2k_mem_crs_t *p_prev_crs); +extern int kvm_add_ctx_signal_stack(u64 key, bool is_main); +extern void kvm_remove_ctx_signal_stack(u64 key); #ifdef CONFIG_KVM_GUEST_KERNEL /* it is native paravirtualized guest kernel */ @@ -25,13 +30,38 @@ static inline int longjmp_copy_user_to_kernel_hw_stacks(struct pt_regs *regs, return kvm_longjmp_copy_user_to_kernel_hw_stacks(regs, new_regs); } -static inline int complete_long_jump(struct pt_regs *regs) +static inline int complete_long_jump(struct pt_regs *regs, bool switch_stack, + u64 to_key) { - if (likely(IS_HV_GM())) { - return native_complete_long_jump(regs); - } else { - return kvm_complete_long_jump(regs); - } + if (likely(IS_HV_GM())) + return native_complete_long_jump(); + else + return kvm_complete_long_jump(regs, switch_stack, to_key); +} + +static inline void update_kernel_crs(e2k_mem_crs_t *k_crs, e2k_mem_crs_t *crs, + e2k_mem_crs_t *prev_crs, e2k_mem_crs_t *p_prev_crs) +{ + if (likely(IS_HV_GM())) + native_update_kernel_crs(k_crs, crs, prev_crs, p_prev_crs); + else + kvm_update_kernel_crs(crs, prev_crs, p_prev_crs); +} + +static inline int add_ctx_signal_stack(u64 key, bool is_main) +{ + if (likely(IS_HV_GM())) + return native_add_ctx_signal_stack(key, is_main); + else + return kvm_add_ctx_signal_stack(key, is_main); +} + +static inline void remove_ctx_signal_stack(u64 key) +{ + if (likely(IS_HV_GM())) + native_remove_ctx_signal_stack(key); + else + kvm_remove_ctx_signal_stack(key); } #endif /* CONFIG_KVM_GUEST_KERNEL */ diff --git a/arch/e2k/include/asm/kvm/guest/trap_table.h b/arch/e2k/include/asm/kvm/guest/trap_table.h index 10a1979..fa6cd27 100644 --- a/arch/e2k/include/asm/kvm/guest/trap_table.h +++ b/arch/e2k/include/asm/kvm/guest/trap_table.h @@ -18,6 +18,8 @@ extern long kvm_guest_ttable_entry5(int sys_num, extern long kvm_guest_ttable_entry6(int sys_num, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6); +extern void kvm_guest_mkctxt_trampoline(void); + static __always_inline void kvm_init_pt_regs_copyed_fields(struct pt_regs *regs) { #ifdef CONFIG_KVM_GUEST_KERNEL @@ -205,6 +207,12 @@ is_guest_TIRs_frozen(struct pt_regs *regs) return false; /* none any guest */ } +static inline bool is_injected_guest_coredump(struct pt_regs *regs) +{ + /* nested guests is not supported */ + return false; +} + static inline void clear_fork_child_pt_regs(struct pt_regs *childregs) { kvm_clear_fork_child_pt_regs(childregs); diff --git a/arch/e2k/include/asm/kvm/hypercall.h b/arch/e2k/include/asm/kvm/hypercall.h index 0cb4f1b..a99c964 100644 --- a/arch/e2k/include/asm/kvm/hypercall.h +++ b/arch/e2k/include/asm/kvm/hypercall.h @@ -40,6 +40,7 @@ #include #include #include +#include #ifdef CONFIG_KVM_GUEST_HW_HCALL extern unsigned long light_hw_hypercall(unsigned long nr, @@ -254,6 +255,8 @@ static inline unsigned long generic_hypercall6(unsigned long nr, #define KVM_HCALL_FAST_KERNEL_TAGGED_MEMORY_COPY 40 /* fast guest kernel tagged memory set */ #define KVM_HCALL_FAST_KERNEL_TAGGED_MEMORY_SET 41 +/* update last 2 frmaes on guest kernel stack */ +#define KVM_HCALL_UPDATE_GUEST_KERNEL_CRS 42 typedef struct kvm_hw_stacks_flush { @@ -404,7 +407,6 @@ HYPERVISOR_inject_interrupt(void) { return light_hypercall0(KVM_HCALL_INJECT_INTERRUPT); } -extern unsigned long kvm_hypervisor_inject_interrupt(void); static inline unsigned long HYPERVISOR_virqs_handled(void) { @@ -509,6 +511,14 @@ HYPERVISOR_fast_kernel_tagged_memory_set(void *addr, u64 val, u64 tag, size_t le return light_hypercall5(KVM_HCALL_FAST_KERNEL_TAGGED_MEMORY_SET, (unsigned long)addr, val, tag, len, strd_opcode); } +static inline unsigned long +HYPERVISOR_update_guest_kernel_crs(e2k_mem_crs_t *crs, e2k_mem_crs_t *prev_crs, + e2k_mem_crs_t *p_prev_crs) +{ + return light_hypercall3(KVM_HCALL_UPDATE_GUEST_KERNEL_CRS, + (unsigned long)crs, (unsigned long)prev_crs, + (unsigned long)p_prev_crs); +} /* * KVM hypervisor (host) <-> guest generic hypercalls list @@ -689,6 +699,15 @@ HYPERVISOR_fast_kernel_tagged_memory_set(void *addr, u64 val, u64 tag, size_t le /* recovery faulted load */ /* value and tag to global */ /* register */ +#define KVM_HCALL_PREPARE_MKCTXT_HW_USER_STACKS 145 + +#define KVM_HCALL_ADD_CTX_SIGNAL_STACK 146 + /* create separate */ + /* signal stack for context */ + /* on host side */ +#define KVM_HCALL_REMOVE_CTX_SIGNAL_STACK 147 + /* remove signal stack for */ + /* context on host side */ /* @@ -904,10 +923,13 @@ HYPERVISOR_set_clockevent(unsigned long delta) } static inline unsigned long -HYPERVISOR_complete_long_jump(kvm_long_jump_info_t *regs_state) +HYPERVISOR_complete_long_jump(kvm_long_jump_info_t *regs_state, + bool switch_stack, u64 to_key) { - return generic_hypercall1(KVM_HCALL_COMPLETE_LONG_JUMP, - (unsigned long)regs_state); + return generic_hypercall3(KVM_HCALL_COMPLETE_LONG_JUMP, + (unsigned long)regs_state, + (unsigned long)switch_stack, + (unsigned long)to_key); } static inline unsigned long @@ -1566,6 +1588,13 @@ static inline int HYPERVISOR_pv_enable_async_pf(u64 apf_reason_gpa, apf_ready_vector, irq_controller); } #endif /* CONFIG_KVM_ASYNC_PF */ +static inline int +HYPERVISOR_prepare_mkctxt_hw_user_stacks(kvm_proc_ctxt_hw_stacks_t *hw_stacks) +{ + return generic_hypercall1(KVM_HCALL_PREPARE_MKCTXT_HW_USER_STACKS, + (unsigned long)hw_stacks); +} + /* * The structure to flush guest virtual space at the host shadow PTs @@ -1622,4 +1651,17 @@ HYPERVISOR_wait_for_virq(int virq, bool in_progress) return generic_hypercall2(KVM_HCALL_WAIT_FOR_VIRQ, virq, in_progress); } +static inline unsigned long +HYPERVISOR_add_ctx_signal_stack(u64 key, bool is_main) +{ + return generic_hypercall2(KVM_HCALL_ADD_CTX_SIGNAL_STACK, + key, is_main); +} + +static inline void +HYPERVISOR_remove_ctx_signal_stack(u64 key) +{ + generic_hypercall1(KVM_HCALL_REMOVE_CTX_SIGNAL_STACK, key); +} + #endif /* _ASM_E2K_HYPERCALL_H */ diff --git a/arch/e2k/include/asm/kvm/hypervisor.h b/arch/e2k/include/asm/kvm/hypervisor.h index 58e0ba2..0c94000 100644 --- a/arch/e2k/include/asm/kvm/hypervisor.h +++ b/arch/e2k/include/asm/kvm/hypervisor.h @@ -102,5 +102,15 @@ static inline bool kvm_test_hprv_feats_bit(int feature_bit) #define IS_PV_APIC_KVM() kvm_test_hprv_feats_mask(KVM_FEAT_PV_APIC_MASK) #define IS_PV_EPIC_KVM() kvm_test_hprv_feats_mask(KVM_FEAT_PV_EPIC_MASK) +static inline unsigned long kvm_hypervisor_inject_interrupt(void) +{ + /* + * Not yet fully implemented + * The real implementation requires checking for interrupts and only + * after that call the host to inject interrupt + return HYPERVISOR_inject_interrupt(); + */ + return 0; +} #endif /* _ASM_E2K_KVM_HYPERVISOR_H */ diff --git a/arch/e2k/include/asm/kvm/mm.h b/arch/e2k/include/asm/kvm/mm.h index ef6aecd..a92ed16 100644 --- a/arch/e2k/include/asm/kvm/mm.h +++ b/arch/e2k/include/asm/kvm/mm.h @@ -57,6 +57,8 @@ typedef struct gmm_struct { cpumask_t cpu_vm_mask; /* mask of CPUs where the mm is */ /* in use or was some early */ gva_cache_t *gva_cache; /* gva -> gpa,hva cache */ + struct rhashtable *ctx_stacks; /* hash table with signal stacks */ + /* for contexts created by guest */ } gmm_struct_t; /* same as accessor for struct mm_struct's cpu_vm_mask but for guest mm */ @@ -113,7 +115,7 @@ kvm_find_gmmid(gmmid_table_t *gmmid_table, int gmmid_nr) { kvm_nid_t *nid; - nid = kvm_try_find_nid(gmmid_table, gmmid_nr, gmmid_hashfn(gmmid_nr)); + nid = kvm_find_nid(gmmid_table, gmmid_nr, gmmid_hashfn(gmmid_nr)); if (nid == NULL) return NULL; return gmmid_entry(nid); diff --git a/arch/e2k/include/asm/kvm/mmu_pte.h b/arch/e2k/include/asm/kvm/mmu_pte.h index 8a92bf6..e58dc29 100644 --- a/arch/e2k/include/asm/kvm/mmu_pte.h +++ b/arch/e2k/include/asm/kvm/mmu_pte.h @@ -33,6 +33,8 @@ #define PFERR_HW_ACCESS_BIT 17 #define PFERR_USER_ADDR_BIT 18 #define PFERR_ILLEGAL_PAGE_BIT 19 +#define PFERR_DONT_INJECT_BIT 20 +#define PFERR_SPEC_BIT 21 #define PFERR_ACCESS_SIZE_BIT 24 @@ -56,6 +58,8 @@ #define PFERR_HW_ACCESS_MASK (1U << PFERR_HW_ACCESS_BIT) #define PFERR_USER_ADDR_MASK (1U << PFERR_USER_ADDR_BIT) #define PFERR_ILLEGAL_PAGE_MASK (1U << PFERR_ILLEGAL_PAGE_BIT) +#define PFERR_DONT_INJECT_MASK (1U << PFERR_DONT_INJECT_BIT) +#define PFERR_SPEC_MASK (1U << PFERR_SPEC_BIT) #define PFERR_ACCESS_SIZE_MASK (~0U << PFERR_ACCESS_SIZE_BIT) diff --git a/arch/e2k/include/asm/kvm/nid.h b/arch/e2k/include/asm/kvm/nid.h index 9ca63f0..1311d62 100644 --- a/arch/e2k/include/asm/kvm/nid.h +++ b/arch/e2k/include/asm/kvm/nid.h @@ -75,21 +75,6 @@ kvm_find_nid(struct kvm_nid_table *nid_table, int nid_nr, int hash_index) return nid; } -static inline kvm_nid_t * -kvm_try_find_nid(struct kvm_nid_table *nid_table, int nid_nr, int hash_index) -{ - kvm_nid_t *nid; - unsigned long flags; - bool locked; - - locked = raw_spin_trylock_irqsave(&nid_table->nidmap_lock, flags); - nid = kvm_do_find_nid(nid_table, nid_nr, hash_index); - if (likely(locked)) { - raw_spin_unlock_irqrestore(&nid_table->nidmap_lock, flags); - } - return nid; -} - #define for_each_guest_nid_node(node, entry, next, nid_table, \ nid_hlist_member) \ for ((entry) = 0; (entry) < (nid_table)->nid_hash_size; (entry)++) \ diff --git a/arch/e2k/include/asm/kvm/proc_context_stacks.h b/arch/e2k/include/asm/kvm/proc_context_stacks.h new file mode 100644 index 0000000..d59ead5 --- /dev/null +++ b/arch/e2k/include/asm/kvm/proc_context_stacks.h @@ -0,0 +1,9 @@ +#ifndef KVM_PROC_CTXT_STACKS +#define KVM_PROC_CTXT_STACKS + +#include + +unsigned long kvm_prepare_gst_mkctxt_hw_stacks(struct kvm_vcpu *vcpu, + kvm_proc_ctxt_hw_stacks_t *hw_stacks); + +#endif /* KVM_PROC_CTXT_STACKS */ diff --git a/arch/e2k/include/asm/kvm/proc_context_types.h b/arch/e2k/include/asm/kvm/proc_context_types.h new file mode 100644 index 0000000..3e37eb2 --- /dev/null +++ b/arch/e2k/include/asm/kvm/proc_context_types.h @@ -0,0 +1,19 @@ +#ifndef KVM_PROC_CTXT_TYPES +#define KVM_PROC_CTXT_TYPES + +#include + +#include + +typedef struct kvm_proc_ctxt_hw_stacks { + void (*user_func)(void); + void *args; + u64 args_size; + size_t d_stack_sz; + bool protected; + u64 gst_mkctxt_trampoline; + void *ps_frames; + e2k_mem_crs_t *cs_frames; +} kvm_proc_ctxt_hw_stacks_t; + +#endif /* KVM_PROC_CTXT_TYPES */ diff --git a/arch/e2k/include/asm/kvm/ptrace.h b/arch/e2k/include/asm/kvm/ptrace.h index 839dc8d..deaf813 100644 --- a/arch/e2k/include/asm/kvm/ptrace.h +++ b/arch/e2k/include/asm/kvm/ptrace.h @@ -30,7 +30,7 @@ typedef enum inject_caller { FROM_HOST_INJECT = 1 << 0, FROM_PV_VCPU_TRAP_INJECT = 1 << 1, - FROM_PV_VCPU_SYSCALL_INJECT = 1 << 2, + FROM_PV_VCPU_SYSCALL_INJECT = 1 << 2 } inject_caller_t; #ifdef CONFIG_VIRTUALIZATION @@ -588,6 +588,9 @@ check_is_user_address(struct task_struct *task, e2k_addr_t address) typedef struct pv_vcpu_ctxt { inject_caller_t inject_from; /* reason of injection */ int trap_no; /* number of recursive trap */ + int skip_frames; /* number signal stack frame to remove */ + int skip_traps; /* number of traps frames to remove */ + int skip_syscalls; /* number of syscall frames to remove */ u64 sys_rval; /* return value of guest system call */ e2k_psr_t guest_psr; /* guest PSR state before trap */ bool irq_under_upsr; /* is IRQ control under UOSR? */ diff --git a/arch/e2k/include/asm/kvm/pv-emul.h b/arch/e2k/include/asm/kvm/pv-emul.h index 50eb2f2..0bacba7 100644 --- a/arch/e2k/include/asm/kvm/pv-emul.h +++ b/arch/e2k/include/asm/kvm/pv-emul.h @@ -136,6 +136,35 @@ static inline bool kvm_vcpu_in_hypercall(struct kvm_vcpu *vcpu) return vcpu->arch.sw_ctxt.in_hypercall; } +static inline void kvm_vcpu_set_dont_inject(struct kvm_vcpu *vcpu) +{ + vcpu->arch.sw_ctxt.dont_inject = true; +} + +static inline void kvm_vcpu_reset_dont_inject(struct kvm_vcpu *vcpu) +{ + vcpu->arch.sw_ctxt.dont_inject = false; +} + +static inline bool kvm_vcpu_test_dont_inject(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.sw_ctxt.dont_inject; +} + +static inline bool kvm_vcpu_test_and_clear_dont_inject(struct kvm_vcpu *vcpu) +{ + if (likely(!kvm_vcpu_test_dont_inject(vcpu))) + return false; + + kvm_vcpu_reset_dont_inject(vcpu); + return true; +} + +static inline bool host_test_dont_inject(pt_regs_t *regs) +{ + return host_test_intc_emul_mode(regs) && regs->dont_inject; +} + static inline void pv_vcpu_clear_gti(struct kvm_vcpu *vcpu) { if (likely(!vcpu->arch.is_hv && vcpu->arch.is_pv)) { @@ -314,6 +343,11 @@ static inline bool kvm_vcpu_in_hypercall(struct kvm_vcpu *vcpu) return false; } +static inline bool host_test_dont_inject(pt_regs_t *regs) +{ + return false; +} + #endif /* CONFIG_VIRTUALIZATION */ #endif /* ! __ASSEMBLY__ */ diff --git a/arch/e2k/include/asm/kvm/switch.h b/arch/e2k/include/asm/kvm/switch.h index 08d6b6e..84dfb09 100644 --- a/arch/e2k/include/asm/kvm/switch.h +++ b/arch/e2k/include/asm/kvm/switch.h @@ -1258,6 +1258,9 @@ static inline bool host_guest_syscall_enter(struct pt_regs *regs, kvm_switch_to_host_mmu_pid(vcpu, current->mm); kvm_set_intc_emul_flag(regs); + vcpu->mode = OUTSIDE_GUEST_MODE; + smp_wmb(); /* See the comment in kvm_vcpu_exiting_guest_mode() */ + return true; } diff --git a/arch/e2k/include/asm/kvm/thread_info.h b/arch/e2k/include/asm/kvm/thread_info.h index 1284da2..161d148 100644 --- a/arch/e2k/include/asm/kvm/thread_info.h +++ b/arch/e2k/include/asm/kvm/thread_info.h @@ -179,6 +179,7 @@ typedef struct gthread_info { /* on host */ /* NULL for guest kernel threads */ hpa_t nonp_root_hpa; /* physical base of nonpaging root PT */ + u64 curr_ctx_key; /* Key of curr context signal stack */ bool gmm_in_release; /* guest mm is releasing (exit_mm()) */ /* following fields should be updated for each multi-stack process */ diff --git a/arch/e2k/include/asm/kvm/trace_kvm.h b/arch/e2k/include/asm/kvm/trace_kvm.h index 04adcb2..a61a927 100644 --- a/arch/e2k/include/asm/kvm/trace_kvm.h +++ b/arch/e2k/include/asm/kvm/trace_kvm.h @@ -963,16 +963,44 @@ TRACE_EVENT( __entry->aaldi[30], __entry->aaldi[31]) ); +TRACE_EVENT(kvm_pid, + TP_PROTO(kvm_e2k_from_t from, unsigned long vmid, unsigned long vcpu_id, unsigned long pid), + TP_ARGS(from, vmid, vcpu_id, pid), + + TP_STRUCT__entry( + __field( kvm_e2k_from_t, from ) + __field( u64, vmid ) + __field( u64, vcpu_id ) + __field( u64, pid ) + ), + + TP_fast_assign( + __entry->from = from; + __entry->vmid = vmid; + __entry->vcpu_id = vcpu_id; + __entry->pid = pid; + ), + + TP_printk("%s: vmid %llu vcpu %llu mmu pid 0x%llx", + __print_symbolic(__entry->from, + { FROM_GENERIC_HYPERCALL, "generic hcall" }, + { FROM_LIGHT_HYPERCALL, "light hcall" }, + { FROM_PV_INTERCEPT, "pv intc" }, + { FROM_HV_INTERCEPT, "hv intc" }, + { FROM_VCPU_LOAD, "vcpu load" }, + { FROM_VCPU_PUT, "vcpu put" }), + __entry->vmid, __entry->vcpu_id, __entry->pid) +); + TRACE_EVENT( generic_hcall, TP_PROTO(unsigned long hcall_num, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5, - unsigned long arg6, unsigned long gsbr, - unsigned long cpu), + unsigned long arg6, unsigned long gsbr), - TP_ARGS(hcall_num, arg1, arg2, arg3, arg4, arg5, arg6, gsbr, cpu), + TP_ARGS(hcall_num, arg1, arg2, arg3, arg4, arg5, arg6, gsbr), TP_STRUCT__entry( __field( u64, hcall_num ) @@ -983,7 +1011,6 @@ TRACE_EVENT( __field( u64, arg5 ) __field( u64, arg6 ) __field( u64, gsbr ) - __field( u64, cpu ) ), TP_fast_assign( @@ -995,13 +1022,11 @@ TRACE_EVENT( __entry->arg5 = arg5; __entry->arg6 = arg6; __entry->gsbr = gsbr; - __entry->cpu = cpu; ), - TP_printk("CPU#%llu, generic hypercall %llu\n" + TP_printk("nr %llu\n" "Args: 0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx; gsbr: 0x%llx" , - __entry->cpu, __entry->hcall_num, __entry->arg1, __entry->arg2, @@ -1018,9 +1043,9 @@ TRACE_EVENT( TP_PROTO(unsigned long hcall_num, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5, - unsigned long arg6, unsigned long cpu), + unsigned long arg6), - TP_ARGS(hcall_num, arg1, arg2, arg3, arg4, arg5, arg6, cpu), + TP_ARGS(hcall_num, arg1, arg2, arg3, arg4, arg5, arg6), TP_STRUCT__entry( __field( u64, hcall_num ) @@ -1030,7 +1055,6 @@ TRACE_EVENT( __field( u64, arg4 ) __field( u64, arg5 ) __field( u64, arg6 ) - __field( u64, cpu ) ), TP_fast_assign( @@ -1041,13 +1065,11 @@ TRACE_EVENT( __entry->arg4 = arg4; __entry->arg5 = arg5; __entry->arg6 = arg6; - __entry->cpu = cpu; ), - TP_printk("CPU#%llu, light hypercall %llu\n" + TP_printk("nr %llu\n" "Args: 0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx" , - __entry->cpu, __entry->hcall_num, __entry->arg1, __entry->arg2, diff --git a/arch/e2k/include/asm/kvm/trace_kvm_hv.h b/arch/e2k/include/asm/kvm/trace_kvm_hv.h index a424749..d03b449 100644 --- a/arch/e2k/include/asm/kvm/trace_kvm_hv.h +++ b/arch/e2k/include/asm/kvm/trace_kvm_hv.h @@ -212,26 +212,6 @@ TRACE_EVENT( )) ); -TRACE_EVENT( - intc_exit, - - TP_PROTO(int ret), - - TP_ARGS(ret), - - TP_STRUCT__entry( - __field( int, ret ) - ), - - TP_fast_assign( - __entry->ret = ret; - ), - - TP_printk("Intercept exit %s(%d)", - (__entry->ret) ? "to QEMU " : "", - __entry->ret) -); - TRACE_EVENT( intc_stacks, diff --git a/arch/e2k/include/asm/kvm/trap_table.h b/arch/e2k/include/asm/kvm/trap_table.h index ffc39b9..90af49d 100644 --- a/arch/e2k/include/asm/kvm/trap_table.h +++ b/arch/e2k/include/asm/kvm/trap_table.h @@ -157,6 +157,11 @@ is_guest_TIRs_frozen(struct pt_regs *regs) return false; /* none any guest */ } +static inline bool is_injected_guest_coredump(struct pt_regs *regs) +{ + return false; /* none any guest */ +} + static inline bool handle_guest_last_wish(struct pt_regs *regs) { @@ -273,6 +278,7 @@ extern unsigned long kvm_pass_virqs_to_guest(struct pt_regs *regs, unsigned long TIR_hi, unsigned long TIR_lo); extern unsigned long kvm_pass_coredump_trap_to_guest(struct kvm_vcpu *vcpu, struct pt_regs *regs); +extern void kvm_pass_coredump_to_all_vm(struct pt_regs *regs); extern unsigned long kvm_pass_clw_fault_to_guest(struct pt_regs *regs, trap_cellar_t *tcellar); extern unsigned long kvm_pass_page_fault_to_guest(struct pt_regs *regs, @@ -283,12 +289,18 @@ extern int do_hret_last_wish_intc(struct kvm_vcpu *vcpu, struct pt_regs *regs); extern void trap_handler_trampoline(void); extern void syscall_handler_trampoline(void); +extern void host_mkctxt_trampoline(void); +extern void return_pv_vcpu_from_mkctxt(void); extern void trap_handler_trampoline_continue(void); extern void syscall_handler_trampoline_continue(u64 sys_rval); +extern void host_mkctxt_trampoline_continue(void); +extern void return_pv_vcpu_from_mkctxt_continue(void); extern void syscall_fork_trampoline(void); extern void syscall_fork_trampoline_continue(u64 sys_rval); extern notrace long return_pv_vcpu_trap(void); extern notrace long return_pv_vcpu_syscall(void); +extern notrace void pv_vcpu_mkctxt_trampoline_inject(void); +extern notrace void pv_vcpu_mkctxt_complete(void); static __always_inline void kvm_init_guest_traps_handling(struct pt_regs *regs, bool user_mode_trap) @@ -542,6 +554,11 @@ is_guest_TIRs_frozen(struct pt_regs *regs) return kvm_is_guest_TIRs_frozen(regs); } +static inline bool is_injected_guest_coredump(struct pt_regs *regs) +{ + return regs->traps_to_guest == core_dump_mask; +} + static inline bool handle_guest_last_wish(struct pt_regs *regs) { @@ -700,12 +717,13 @@ pass_coredump_trap_to_guest(struct pt_regs *regs) { struct kvm_vcpu *vcpu; - if (!kvm_test_intc_emul_flag(regs)) + if (!kvm_test_intc_emul_flag(regs)) { + kvm_pass_coredump_to_all_vm(regs); return 0; + } vcpu = current_thread_info()->vcpu; - return kvm_pass_coredump_trap_to_guest(vcpu, regs); } diff --git a/arch/e2k/include/asm/kvm/uaccess.h b/arch/e2k/include/asm/kvm/uaccess.h index 1b2fe61..263b8bf 100644 --- a/arch/e2k/include/asm/kvm/uaccess.h +++ b/arch/e2k/include/asm/kvm/uaccess.h @@ -113,7 +113,7 @@ native_copy_from_user_with_tags(void *to, const void __user *from, (res); \ }) -#define __kvm_get_guest_atomic(__slot, gfn, __hk_ptr, offset, \ +#define __kvm_get_guest(__slot, gfn, __hk_ptr, offset, \ gk_ptrp, __writable) \ ({ \ __typeof__(__hk_ptr) __user *gk_ptr; \ @@ -126,12 +126,12 @@ native_copy_from_user_with_tags(void *to, const void __user *from, } else { \ gk_ptr = (__typeof__((__hk_ptr)) *)(addr + offset); \ gk_ptrp = gk_ptr; \ - r = native_get_user((__hk_ptr), gk_ptr); \ + r = __get_user((__hk_ptr), gk_ptr); \ } \ r; \ }) -#define kvm_get_guest_atomic(kvm, gpa, _hk_ptr) \ +#define kvm_get_guest(kvm, gpa, _hk_ptr) \ ({ \ gfn_t gfn = (gpa) >> PAGE_SHIFT; \ struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); \ @@ -139,12 +139,10 @@ native_copy_from_user_with_tags(void *to, const void __user *from, __typeof__(_hk_ptr) __user *unused; \ int r; \ \ - __kvm_get_guest_atomic(slot, gfn, (_hk_ptr), offset, \ - unused, NULL); \ + __kvm_get_guest(slot, gfn, (_hk_ptr), offset, unused, NULL); \ }) -#define kvm_vcpu_get_guest_ptr_atomic(vcpu, gpa, _hk_ptr, \ - _gk_ptrp, _writable) \ +#define kvm_vcpu_get_guest_ptr(vcpu, gpa, _hk_ptr, _gk_ptrp, _writable) \ ({ \ gfn_t gfn = (gpa) >> PAGE_SHIFT; \ struct kvm_memory_slot *slot; \ @@ -152,16 +150,37 @@ native_copy_from_user_with_tags(void *to, const void __user *from, int r; \ \ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); \ - r = __kvm_get_guest_atomic(slot, gfn, (_hk_ptr), offset, \ + r = __kvm_get_guest(slot, gfn, (_hk_ptr), offset, \ _gk_ptrp, _writable); \ r; \ }) -#define kvm_vcpu_get_guest_atomic(vcpu, gpa, ___hk_ptr) \ +#define kvm_vcpu_get_guest(vcpu, gpa, ___hk_ptr) \ ({ \ __typeof__(___hk_ptr) __user *unused; \ \ - kvm_vcpu_get_guest_ptr_atomic(vcpu, gpa, ___hk_ptr, \ - unused, NULL); \ + kvm_vcpu_get_guest_ptr(vcpu, gpa, ___hk_ptr, unused, NULL); \ +}) + +#define kvm_get_guest_atomic(kvm, gpa, __hk_ptr) \ +({ \ + __typeof__(__hk_ptr) __user *gk_ptr; \ + gfn_t gfn = (gpa) >> PAGE_SHIFT; \ + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); \ + int offset = offset_in_page(gpa); \ + bool writable; \ + unsigned long addr; \ + int r; \ + \ + addr = gfn_to_hva_memslot_prot(slot, gfn, &writable); \ + if (unlikely(kvm_is_error_hva(addr))) { \ + r = -EFAULT; \ + } else { \ + gk_ptr = (__typeof__((__hk_ptr)) *)(addr + offset); \ + pagefault_disable(); \ + r = native_get_user((__hk_ptr), gk_ptr); \ + pagefault_enable(); \ + } \ + r; \ }) extern unsigned long kvm_copy_in_user_with_tags(void __user *to, diff --git a/arch/e2k/include/asm/kvm_host.h b/arch/e2k/include/asm/kvm_host.h index fd04dcf..a2add91 100644 --- a/arch/e2k/include/asm/kvm_host.h +++ b/arch/e2k/include/asm/kvm_host.h @@ -414,6 +414,9 @@ typedef enum pf_res { PFRES_RETRY, /* page fault is not handled and can */ /* be retried on guest or should be handled */ /* from begining by hypervisor */ + PFRES_RETRY_MEM, /* not enough memory to handle */ + PFRES_DONT_INJECT, /* page ault should be injected to the guest */ + /* but injection is prohibited */ } pf_res_t; struct kvm_arch_exception; @@ -561,7 +564,7 @@ typedef struct kvm_mmu { gw_attr_t *gw_res); void (*update_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, pgprot_t *spte, const void *pte); - void (*sync_gva)(struct kvm_vcpu *vcpu, gmm_struct_t *gmm, gva_t gva); + int (*sync_gva)(struct kvm_vcpu *vcpu, gmm_struct_t *gmm, gva_t gva); long (*sync_gva_range)(struct kvm_vcpu *vcpu, gmm_struct_t *gmm, gva_t gva_start, gva_t gva_end); int (*sync_page)(struct kvm_vcpu *vcpu, kvm_mmu_page_t *sp); @@ -782,6 +785,7 @@ typedef struct kvm_sw_cpu_context { int osem; bool in_hypercall; bool in_fast_syscall; + bool dont_inject; e2k_usd_lo_t usd_lo; e2k_usd_hi_t usd_hi; @@ -1224,6 +1228,8 @@ struct kvm_vcpu_arch { int node_id; int hard_cpu_id; + + u64 gst_mkctxt_trampoline; }; typedef struct kvm_lpage_info { @@ -1257,6 +1263,7 @@ typedef struct kvm_arch_memory_slot { #define KVM_REQ_VIRQS_INJECTED 22 /* pending VIRQs injected */ #define KVM_REQ_SCAN_IOAPIC 23 /* scan IO-APIC */ #define KVM_REQ_SCAN_IOEPIC 24 /* scan IO-EPIC */ +#define KVM_REQ_TO_COREDUMP 25 /* pending coredump request */ #define kvm_set_pending_virqs(vcpu) \ set_bit(KVM_REQ_PENDING_VIRQS, (void *)&vcpu->requests) @@ -1283,6 +1290,14 @@ do { \ if (test_and_clear_bit(KVM_REG_SHOW_STATE, (void *)&vcpu->requests)) \ wake_up_bit((void *)&vcpu->requests, KVM_REG_SHOW_STATE); \ } while (false) +#define kvm_set_request_to_coredump(vcpu) \ + kvm_make_request(KVM_REQ_TO_COREDUMP, vcpu) +#define kvm_clear_request_to_coredump(vcpu) \ + kvm_clear_request(KVM_REQ_TO_COREDUMP, vcpu) +#define kvm_test_request_to_coredump(vcpu) \ + kvm_test_request(KVM_REQ_TO_COREDUMP, vcpu) +#define kvm_test_and_clear_request_to_coredump(vcpu) \ + kvm_check_request(KVM_REQ_TO_COREDUMP, vcpu) struct kvm_irq_mask_notifier { void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); @@ -1320,7 +1335,6 @@ struct kvm_arch { bool tdp_enable; /* two dimensional paging is supported */ /* by hardware MMU and hypervisor */ bool shadow_pt_set_up; /* shadow PT was set up, skip setup on other VCPUs */ - struct mutex spt_sync_lock; atomic_t vcpus_to_reset; /* atomic counter of VCPUs ready to reset */ kvm_mem_alias_t aliases[KVM_ALIAS_SLOTS]; kvm_kernel_shadow_t shadows[KVM_SHADOW_SLOTS]; @@ -1628,4 +1642,13 @@ static inline void kvm_epic_stop_idle_timer(struct kvm_vcpu *vcpu) { } extern struct work_struct kvm_dump_stacks; extern void wait_for_print_all_guest_stacks(struct work_struct *work); +typedef enum kvm_e2k_from { + FROM_GENERIC_HYPERCALL, + FROM_LIGHT_HYPERCALL, + FROM_PV_INTERCEPT, + FROM_HV_INTERCEPT, + FROM_VCPU_LOAD, + FROM_VCPU_PUT +} kvm_e2k_from_t; + #endif /* _ASM_E2K_KVM_HOST_H */ diff --git a/arch/e2k/include/asm/machdep.h b/arch/e2k/include/asm/machdep.h index 33537ff..5f8f123 100644 --- a/arch/e2k/include/asm/machdep.h +++ b/arch/e2k/include/asm/machdep.h @@ -421,6 +421,25 @@ CPUHAS(CPU_HWBUG_C3, !IS_ENABLED(CONFIG_CPU_E16C), false, cpu == IDR_E16C_MDL && revision == 0); +/* #130291 - HRET does not clean INTC_INFO_CU/INTC_PTR_CU. + * Workaround - clean it before each HRET */ +CPUHAS(CPU_HWBUG_HRET_INTC_CU, + IS_ENABLED(CONFIG_E2K_MACHINE), + IS_ENABLED(CONFIG_CPU_E2C3) || IS_ENABLED(CONFIG_CPU_E12C) || + IS_ENABLED(CONFIG_CPU_E16C), + cpu == IDR_E2C3_MDL || cpu == IDR_E12C_MDL || + cpu == IDR_E16C_MDL); +/* #137536 - intercept (or interrupt), after writing CR, while hardware is + * waiting for fill CF may corrupt all other CRs + * Workaround - add wait ma_c=1 to the same instruction, as CR write + * This workaround covers most possible cases, but not all of them */ +CPUHAS(CPU_HWBUG_INTC_CR_WRITE, + !IS_ENABLED(CONFIG_CPU_E16C) && + !IS_ENABLED(CONFIG_CPU_E2C3), + false, + (cpu == IDR_E16C_MDL && revision == 0 || + cpu == IDR_E2C3_MDL && revision == 0) && + is_hardware_guest); /* * Not bugs but features go here diff --git a/arch/e2k/include/asm/mmu.h b/arch/e2k/include/asm/mmu.h index e9f4bce..66873ee 100644 --- a/arch/e2k/include/asm/mmu.h +++ b/arch/e2k/include/asm/mmu.h @@ -161,6 +161,10 @@ typedef struct { size_t cached_stacks_size; } mm_context_t; +#define INIT_MM_CONTEXT(mm) \ + .context = { \ + .cut_mask_lock = __MUTEX_INITIALIZER(mm.context.cut_mask_lock), \ + } \ /* Version for fast syscalls, so it must be inlined. * Must be used only for current. */ diff --git a/arch/e2k/include/asm/native_cpu_regs_access.h b/arch/e2k/include/asm/native_cpu_regs_access.h index 9a6f2ef..98b95d9 100644 --- a/arch/e2k/include/asm/native_cpu_regs_access.h +++ b/arch/e2k/include/asm/native_cpu_regs_access.h @@ -160,13 +160,13 @@ #define NATIVE_NV_READ_CR1_HI_REG_VALUE() NATIVE_GET_DSREG_OPEN(cr1.hi) #define NATIVE_NV_NOIRQ_WRITE_CR0_LO_REG_VALUE(CR0_lo_value) \ - NATIVE_SET_DSREG_OPEN_NOIRQ(cr0.lo, CR0_lo_value) + NATIVE_SET_CR_CLOSED_NOEXC(cr0.lo, CR0_lo_value) #define NATIVE_NV_NOIRQ_WRITE_CR0_HI_REG_VALUE(CR0_hi_value) \ - NATIVE_SET_DSREG_OPEN_NOIRQ(cr0.hi, CR0_hi_value) + NATIVE_SET_CR_CLOSED_NOEXC(cr0.hi, CR0_hi_value) #define NATIVE_NV_NOIRQ_WRITE_CR1_LO_REG_VALUE(CR1_lo_value) \ - NATIVE_SET_DSREG_OPEN_NOIRQ(cr1.lo, CR1_lo_value) + NATIVE_SET_CR_CLOSED_NOEXC(cr1.lo, CR1_lo_value) #define NATIVE_NV_NOIRQ_WRITE_CR1_HI_REG_VALUE(CR1_hi_value) \ - NATIVE_SET_DSREG_OPEN_NOIRQ(cr1.hi, CR1_hi_value) + NATIVE_SET_CR_CLOSED_NOEXC(cr1.hi, CR1_hi_value) /* * Read/write word Procedure Chain Stack Harware Top Pointer (PCSHTP) diff --git a/arch/e2k/include/asm/page_io.h b/arch/e2k/include/asm/page_io.h index 75d88f4..5533c7c 100644 --- a/arch/e2k/include/asm/page_io.h +++ b/arch/e2k/include/asm/page_io.h @@ -45,7 +45,7 @@ extern void tags_swap_init(unsigned type, unsigned long *map); extern void e2k_remove_swap(struct swap_info_struct *sis); extern void restore_tags_for_data(u64 *, u8 *); extern u32 save_tags_from_data(u64 *, u8 *); -extern void get_page_with_tags(u8 *, u8 *, int *); +extern void get_page_with_tags(u8 **, u8 *, int *); extern u8 *alloc_page_with_tags(void); extern void free_page_with_tags(u8 *); extern int check_tags(unsigned type, unsigned long beg, unsigned long end); diff --git a/arch/e2k/include/asm/preempt.h b/arch/e2k/include/asm/preempt.h index 6269381..5c77e66 100644 --- a/arch/e2k/include/asm/preempt.h +++ b/arch/e2k/include/asm/preempt.h @@ -72,6 +72,9 @@ static __always_inline bool __preempt_count_dec_and_test(void) E2K_SUBD_ATOMIC__SHRD32(__cpu_preempt_reg, 1ull << PREEMPT_COUNTER_SHIFT, old); #ifdef CONFIG_PREEMPT_LAZY + if (unlikely(old == 1)) + return true; + /* preempt count == 0 ? */ if ((__cpu_preempt_reg >> 32ull) & ~1ull) /* as in arm64 */ return false; if (current_thread_info()->preempt_lazy_count) @@ -99,7 +102,7 @@ static __always_inline bool should_resched(int preempt_offset) u64 tmp_par = (u64) (u32) preempt_offset << 1; u64 tmp = __cpu_preempt_reg >> 32ull; - if (tmp == tmp_par) + if (unlikely(tmp == (tmp_par | 1))) return true; /* preempt count == 0 ? */ diff --git a/arch/e2k/include/asm/proc_context_stacks.h b/arch/e2k/include/asm/proc_context_stacks.h new file mode 100644 index 0000000..0dbd802 --- /dev/null +++ b/arch/e2k/include/asm/proc_context_stacks.h @@ -0,0 +1,33 @@ +#ifndef PROC_CTXT_STACKS +#define PROC_CTXT_STACKS + +#include + +#include + +int native_mkctxt_prepare_hw_user_stacks(void (*user_func)(void), void *args, + u64 args_size, size_t d_stack_sz, + bool protected, void *ps_frames, + e2k_mem_crs_t *cs_frames); + +#if defined(CONFIG_PARAVIRT_GUEST) +/* TODO: paravirtualized host/guest kernel */ +#elif defined(CONFIG_KVM_GUEST_KERNEL) +/* it is native guest kernel */ +#include +#else /* ! CONFIG_PARAVIRT_GUEST && ! CONFIG_KVM_GUEST_KERNEL */ +/* it is native kernel with or without virtualization support */ + +static inline int mkctxt_prepare_hw_user_stacks(void (*user_func)(void), + void *args, u64 args_size, size_t d_stack_sz, + bool protected, void *ps_frames, + e2k_mem_crs_t *cs_frames) +{ + return native_mkctxt_prepare_hw_user_stacks(user_func, args, args_size, + d_stack_sz, protected, ps_frames, + cs_frames); +} + +#endif /* ! CONFIG_PARAVIRT_GUEST && ! CONFIG_KVM_GUEST_KERNEL */ + +#endif /* PROC_CTXT_STACKS */ diff --git a/arch/e2k/include/asm/processor.h b/arch/e2k/include/asm/processor.h index bfc81c5..6a798cd 100644 --- a/arch/e2k/include/asm/processor.h +++ b/arch/e2k/include/asm/processor.h @@ -182,9 +182,6 @@ typedef struct thread_struct { #define INIT_THREAD { 0 } -#define INIT_MMAP \ -{ &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL, NULL } - extern void start_thread(struct pt_regs *regs, unsigned long entry, unsigned long sp); extern int native_do_prepare_start_thread_frames(unsigned long entry, diff --git a/arch/e2k/include/asm/ptrace.h b/arch/e2k/include/asm/ptrace.h index 700cf96..94fcef1 100644 --- a/arch/e2k/include/asm/ptrace.h +++ b/arch/e2k/include/asm/ptrace.h @@ -196,6 +196,8 @@ typedef struct pt_regs { bool need_inject; /* flag for unconditional injection */ /* trap to guest to avoid acces to */ /* guest user space in trap context */ + bool dont_inject; /* page fault injection to the guest */ + /* is prohibited */ bool in_hypercall; /* trap is occured in hypercall */ bool is_guest_user; /* trap/system call on/from guest */ /* user */ @@ -801,7 +803,6 @@ extern unsigned long profile_pc(struct pt_regs *regs); #else #define profile_pc(regs) instruction_pointer(regs) #endif -extern void show_regs(struct pt_regs *); extern int syscall_trace_entry(struct pt_regs *regs); extern void syscall_trace_leave(struct pt_regs *regs); diff --git a/arch/e2k/include/asm/regs_state.h b/arch/e2k/include/asm/regs_state.h index ceeeda1..056be93 100644 --- a/arch/e2k/include/asm/regs_state.h +++ b/arch/e2k/include/asm/regs_state.h @@ -93,11 +93,6 @@ static inline void native_set_kernel_CUTD(void) NATIVE_NV_NOIRQ_WRITE_CUTD_REG(k_cutd); } -#define NATIVE_CLEAR_DAM \ -({ \ - NATIVE_SET_MMUREG(dam_inv, 0); \ -}) - /* * Macros to save and restore registers. */ @@ -538,6 +533,8 @@ do { \ E2K_SET_GREGS_TO_THREAD(gbase, g_u, gt_u); \ }) +#define NATIVE_CLEAR_DAM NATIVE_SET_MMUREG(dam_inv, 0) + #if defined(CONFIG_PARAVIRT_GUEST) #include #elif defined(CONFIG_KVM_GUEST_KERNEL) @@ -550,6 +547,8 @@ do { \ #define SET_GREGS_TO_THREAD(gbase, g_user, gtag_user) \ NATIVE_SET_GREGS_TO_THREAD(gbase, g_user, gtag_user) +#define CLEAR_DAM NATIVE_CLEAR_DAM + #endif /* !CONFIG_PARAVIRT_GUEST && !CONFIG_KVM_GUEST_KERNEL */ #else /* E2K_MAXGR_d != 32 */ diff --git a/arch/e2k/include/asm/sclkr.h b/arch/e2k/include/asm/sclkr.h index 3964912..0649a4f 100644 --- a/arch/e2k/include/asm/sclkr.h +++ b/arch/e2k/include/asm/sclkr.h @@ -48,24 +48,15 @@ DECLARE_PER_CPU(int, ema_freq); extern __interrupt u64 fast_syscall_read_sclkr(void); extern struct clocksource *curr_clocksource; extern int redpill; +extern int check_sclkr_monotonic; #define xchg_prev_sclkr_res(res) \ __api_atomic64_fetch_xchg_if_below(res, &prev_sclkr.res.counter, RELAXED_MB) -#define SHF_ALPHA 2 -static __always_inline u64 sclkr_to_ns(u64 sclkr, u64 freq) +static __always_inline u64 _sclkr_to_ns(u64 sclkr, u64 freq) { - u64 sclkr_sec, sclkr_lo, res, before; + u64 sclkr_sec, sclkr_lo, res; e2k_sclkm1_t sclkm1 = READ_SCLKM1_REG(); -#ifdef CONFIG_SMP - struct thread_info *ti = READ_CURRENT_REG(); - struct task_struct *task = (void *) ti - - offsetof(struct task_struct, thread_info); - typeof(ema_freq) *pema_freq = per_cpu_ptr(&ema_freq, task->cpu); -#else - typeof(ema_freq) *pema_freq = - (typeof(ema_freq) *)per_cpu_ptr(&ema_freq, 0); -#endif /* we can not use __this_cpu_read/write(ema_freq) in fast syscall : */ sclkr_sec = sclkr >> 32; @@ -74,29 +65,37 @@ static __always_inline u64 sclkr_to_ns(u64 sclkr, u64 freq) if (sclkr_lo >= freq) sclkr_lo = freq - 1; - /* Using exponential moving average (ema) of frequency - * ema = alpha * cur_freq + (1 - alpha) * ema; - * makes got time more smooth but belated frequency is used - * shorter: ema = ema + (cur_freq - ema) * alpha; - * alpha = 2 / (period + 1) - * if moving average period = 3 alpha = 1/2 or use SHF_ALPHA = 1 - * if moving average period = 7 alpha = 1/4 or use SHF_ALPHA = 2 - * - * 1 << (SHF_ALPHA - 1) is added for rounding. - */ - *pema_freq += (freq - *pema_freq + (1 << (SHF_ALPHA - 1))) >> SHF_ALPHA; - res = sclkr_sec * NSEC_PER_SEC + sclkr_lo * NSEC_PER_SEC / *pema_freq; + res = sclkr_sec * NSEC_PER_SEC + sclkr_lo * NSEC_PER_SEC / freq; /* sclkm3 has a summary time when guest was out of cpu */ if (!redpill && sclkm1.sclkm3) res -= READ_SCLKM3_REG(); - before = xchg_prev_sclkr_res(res); - if (before > res) - res = before; + return res; +} + +static __always_inline u64 sclkr_to_ns(u64 sclkr, u64 freq) +{ + u64 res, before; + res = _sclkr_to_ns(sclkr, freq); + if (check_sclkr_monotonic) { + before = xchg_prev_sclkr_res(res); + if (before > res) + res = before; + } return res; } +static __always_inline u64 sclkr2ns(u64 sclkr, u64 freq) +{ +/* Do not check monotonic as kernel/sched/clock.c says^ + * cpu_clock(i) provides a fast (execution time) high resolution + * clock with bounded drift between CPUs. The value of cpu_clock(i) + * is monotonic for constant i. +*/ + return _sclkr_to_ns(sclkr, freq); +} + static inline bool use_sclkr_sched_clock(void) { return sclkr_initialized; diff --git a/arch/e2k/include/asm/signal.h b/arch/e2k/include/asm/signal.h index 919b628..b071fbf 100644 --- a/arch/e2k/include/asm/signal.h +++ b/arch/e2k/include/asm/signal.h @@ -90,40 +90,6 @@ struct as_sa_handler_arg; #define ptrace_signal_deliver() do { } while (0) -#define DO_SDBGPRINT(message) \ -do { \ - e2k_tir_lo_t tir_lo; \ - void *cr_ip, *tir_ip; \ - \ - tir_lo.TIR_lo_reg = (regs)->trap->TIR_lo; \ - \ - tir_ip = (void *)tir_lo.TIR_lo_ip; \ - cr_ip = (void *)GET_IP_CR0_HI((regs)->crs.cr0_hi); \ - \ - if (tir_ip == cr_ip) \ - pr_info("%s: IP=%px %s(pid=%d)\n", \ - message, tir_ip, current->comm, \ - current->pid); \ - else \ - pr_info("%s: IP=%px(interrupt IP=%px) %s(pid=%d)\n", \ - message, tir_ip, cr_ip, current->comm, \ - current->pid); \ -} while (false) - -#define SDBGPRINT(message) \ -do { \ - if (debug_signal) \ - DO_SDBGPRINT(message); \ -} while (0) - -#define SDBGPRINT_WITH_STACK(message) \ -do { \ - if (debug_signal) { \ - DO_SDBGPRINT(message); \ - dump_stack(); \ - } \ -} while (0) - struct signal_stack; extern unsigned long allocate_signal_stack(unsigned long size); extern void free_signal_stack(struct signal_stack *signal_stack); @@ -133,7 +99,10 @@ extern struct signal_stack_context __user * pop_the_signal_stack(struct signal_stack *signal_stack); extern struct signal_stack_context __user *pop_signal_stack(void); extern struct signal_stack_context __user *get_signal_stack(void); +extern struct signal_stack_context __user * + get_prev_signal_stack(struct signal_stack_context __user *context); extern int setup_signal_stack(struct pt_regs *regs, bool is_signal); +extern int reserve_signal_stack(void); #define GET_SIG_RESTORE_STACK(ti, sbr, usd_lo, usd_hi) \ do { \ @@ -160,12 +129,30 @@ extern int native_signal_setup(struct pt_regs *regs); extern int native_longjmp_copy_user_to_kernel_hw_stacks(struct pt_regs *regs, struct pt_regs *new_regs); -static inline int native_complete_long_jump(struct pt_regs *regs) +static inline int native_complete_long_jump(void) { /* nithing to do for native kernel & host */ return 0; } +static inline void native_update_kernel_crs(e2k_mem_crs_t *k_crs, + e2k_mem_crs_t *crs, e2k_mem_crs_t *prev_crs, + e2k_mem_crs_t *p_prev_crs) +{ + *p_prev_crs = k_crs[0]; + k_crs[0] = *prev_crs; + k_crs[1] = *crs; +} + +static inline int native_add_ctx_signal_stack(u64 key, bool is_main) +{ + return 0; +} + +static inline void native_remove_ctx_signal_stack(u64 key) +{ +} + extern long do_sigreturn(void); extern void sighandler_trampoline(void); extern void sighandler_trampoline_continue(void); @@ -213,10 +200,25 @@ static inline int longjmp_copy_user_to_kernel_hw_stacks(struct pt_regs *regs, return native_longjmp_copy_user_to_kernel_hw_stacks(regs, new_regs); } -static inline int complete_long_jump(struct pt_regs *regs) +static inline int complete_long_jump(struct pt_regs *regs, bool switch_stack, + u64 to_key) { - return native_complete_long_jump(regs); + return native_complete_long_jump(); } +static inline void update_kernel_crs(e2k_mem_crs_t *k_crs, e2k_mem_crs_t *crs, + e2k_mem_crs_t *prev_crs, e2k_mem_crs_t *p_prev_crs) +{ + native_update_kernel_crs(k_crs, crs, prev_crs, p_prev_crs); +} +static inline int add_ctx_signal_stack(u64 key, bool is_main) +{ + return native_add_ctx_signal_stack(key, is_main); +} +static inline void remove_ctx_signal_stack(u64 key) +{ + native_remove_ctx_signal_stack(key); +} + #endif /* CONFIG_KVM_GUEST_KERNEL */ diff --git a/arch/e2k/include/asm/syscall.h b/arch/e2k/include/asm/syscall.h index 1126378..599bdef 100644 --- a/arch/e2k/include/asm/syscall.h +++ b/arch/e2k/include/asm/syscall.h @@ -21,7 +21,7 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - regs->sys_rval = val; + regs->sys_rval = (long) error ?: val; } static inline void syscall_get_arguments(struct task_struct *task, diff --git a/arch/e2k/include/asm/tlbflush.h b/arch/e2k/include/asm/tlbflush.h index 7253fdb..ddc7786 100644 --- a/arch/e2k/include/asm/tlbflush.h +++ b/arch/e2k/include/asm/tlbflush.h @@ -69,11 +69,51 @@ extern void native_flush_tlb_all(void); native_flush_tlb_mm_range((mm), (start), (end), \ PMD_SIZE, FLUSH_TLB_LEVELS_LAST) +/* + * Signal to all users of this mm that it has been flushed. + * Invalid context will be updated while activating or switching to. + * + * Things to consider: + * + * 1) Clearing the whole context for CPUs to which we send the flush + * ipi looks unnecessary, but is needed to avoid race conditions. The + * problem is that there is a window between reading mm_cpumask() and + * deciding which context should be set to 0. In that window situation + * could have changed, so the only safe way is to set mm context on + * ALL cpus to 0. + * + * 2) Setting it to 0 essentially means that the cpus which receive the + * flush ipis cannot flush only a range of pages because they do not + * know the context, so they will flush the whole mm. + */ +static inline void clear_mm_remote_context(mm_context_t *context, int cpu) +{ + int i; + +#pragma loop count (NR_CPUS) + for (i = 0; i < nr_cpu_ids; i++) { + if (i == cpu) + /* That being said, current CPU can still + * flush only the given range of pages. */ + continue; + context->cpumsk[i] = 0; + } +} + static inline void native_flush_tlb_kernel_range(unsigned long start, unsigned long end) { native_flush_tlb_all(); } +extern void mmu_pid_flush_tlb_mm(mm_context_t *context, bool is_active, + cpumask_t *mm_cpumask, int cpu, bool trace_enabled); +extern void mmu_pid_flush_tlb_page(mm_context_t *context, bool is_active, + cpumask_t *mm_cpumask, unsigned long addr, int cpu, + bool trace_enabled); +extern void mmu_pid_flush_tlb_range(mm_context_t *context, bool is_active, + cpumask_t *mm_cpumask, unsigned long start, unsigned long end, + unsigned long stride, u32 levels_mask, int cpu, bool trace_enabled); + extern void generic_local_flush_tlb_mm_range(struct mm_struct *mm, mm_context_t *context, cpumask_t *mm_cpumask, unsigned long start, unsigned long end, diff --git a/arch/e2k/include/asm/trap_table.h b/arch/e2k/include/asm/trap_table.h index e8da9b1..1864fb2 100644 --- a/arch/e2k/include/asm/trap_table.h +++ b/arch/e2k/include/asm/trap_table.h @@ -43,6 +43,7 @@ is_gdb_breakpoint_trap(struct pt_regs *regs) } extern void kernel_stack_overflow(unsigned int overflows); +extern void kernel_data_stack_overflow(void); static inline void native_clear_fork_child_pt_regs(struct pt_regs *childregs) { diff --git a/arch/e2k/include/asm/traps.h b/arch/e2k/include/asm/traps.h index d9602ca..73eb8af 100644 --- a/arch/e2k/include/asm/traps.h +++ b/arch/e2k/include/asm/traps.h @@ -55,6 +55,7 @@ extern void do_trap_cellar(struct pt_regs *regs, int only_system_tc); extern irqreturn_t native_do_interrupt(struct pt_regs *regs); extern void do_nm_interrupt(struct pt_regs *regs); +extern void do_mem_error(struct pt_regs *regs); extern void native_instr_page_fault(struct pt_regs *regs, tc_fault_type_t ftype, const int async_instr); @@ -224,8 +225,8 @@ extern int apply_psp_delta_to_signal_stack(unsigned long base, extern int apply_pcsp_delta_to_signal_stack(unsigned long base, unsigned long size, unsigned long start, unsigned long end, unsigned long delta); -extern int apply_usd_delta_to_signal_stack(unsigned long top, - unsigned long delta, bool incr); +extern int apply_usd_delta_to_signal_stack(unsigned long top, unsigned long delta, + bool incr, unsigned long *chain_stack_border); static inline int host_apply_psp_delta_to_signal_stack(unsigned long base, unsigned long size, unsigned long start, diff --git a/arch/e2k/include/asm/vga.h b/arch/e2k/include/asm/vga.h index 7c0324f..09e0763 100644 --- a/arch/e2k/include/asm/vga.h +++ b/arch/e2k/include/asm/vga.h @@ -83,4 +83,15 @@ static inline u8 vga_readb(volatile const u8 *addr) } #endif /* CONFIG_KVM_GUEST_KERNEL */ +/* + * Our drivers doens't use VGA legacy resources so + * we assume we can't have any conflicts + */ +#define __ARCH_HAS_VGA_CONFLICT +struct pci_dev; +static inline int vga_conflicts(struct pci_dev *p1, struct pci_dev *p2) +{ + return 0; +} + #endif diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index c716ea8..46294ef 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -495,6 +495,38 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm } #endif +/* + * tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end, + * and set corresponding cleared_*. + */ +static inline void tlb_flush_pte_range(struct mmu_gather *tlb, + unsigned long address, unsigned long size) +{ + __tlb_adjust_range(tlb, address, size); + tlb->cleared_ptes = 1; +} + +static inline void tlb_flush_pmd_range(struct mmu_gather *tlb, + unsigned long address, unsigned long size) +{ + __tlb_adjust_range(tlb, address, size); + tlb->cleared_pmds = 1; +} + +static inline void tlb_flush_pud_range(struct mmu_gather *tlb, + unsigned long address, unsigned long size) +{ + __tlb_adjust_range(tlb, address, size); + tlb->cleared_puds = 1; +} + +static inline void tlb_flush_p4d_range(struct mmu_gather *tlb, + unsigned long address, unsigned long size) +{ + __tlb_adjust_range(tlb, address, size); + tlb->cleared_p4ds = 1; +} + #ifndef __tlb_remove_tlb_entry #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) #endif @@ -508,19 +540,17 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm */ #define tlb_remove_tlb_entry(tlb, ptep, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ - tlb->cleared_ptes = 1; \ + tlb_flush_pte_range(tlb, address, PAGE_SIZE); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ do { \ unsigned long _sz = huge_page_size(h); \ - __tlb_adjust_range(tlb, address, _sz); \ if (_sz == PMD_SIZE) \ - tlb->cleared_pmds = 1; \ + tlb_flush_pmd_range(tlb, address, _sz); \ else if (_sz == PUD_SIZE) \ - tlb->cleared_puds = 1; \ + tlb_flush_pud_range(tlb, address, _sz); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) @@ -534,8 +564,7 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ do { \ - __tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE); \ - tlb->cleared_pmds = 1; \ + tlb_flush_pmd_range(tlb, address, HPAGE_PMD_SIZE); \ __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ } while (0) @@ -549,8 +578,7 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm #define tlb_remove_pud_tlb_entry(tlb, pudp, address) \ do { \ - __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE); \ - tlb->cleared_puds = 1; \ + tlb_flush_pud_range(tlb, address, HPAGE_PUD_SIZE); \ __tlb_remove_pud_tlb_entry(tlb, pudp, address); \ } while (0) @@ -575,9 +603,8 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm #ifndef pte_free_tlb #define pte_free_tlb(tlb, ptep, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb_flush_pmd_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ - tlb->cleared_pmds = 1; \ __pte_free_tlb(tlb, ptep, address); \ } while (0) #endif @@ -585,9 +612,8 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm #ifndef pmd_free_tlb #define pmd_free_tlb(tlb, pmdp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb_flush_pud_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ - tlb->cleared_puds = 1; \ __pmd_free_tlb(tlb, pmdp, address); \ } while (0) #endif @@ -596,9 +622,8 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm #ifndef pud_free_tlb #define pud_free_tlb(tlb, pudp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb_flush_p4d_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ - tlb->cleared_p4ds = 1; \ __pud_free_tlb(tlb, pudp, address); \ } while (0) #endif diff --git a/include/linux/console.h b/include/linux/console.h index 39d1bb4..749b689 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -156,6 +156,8 @@ struct console { int cflag; unsigned long printk_seq; int wrote_history; + uint ispeed; + uint ospeed; void *data; struct console *next; }; diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 5576038..2d044cc 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -58,7 +58,7 @@ static inline int elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregse } #endif -#if defined(CONFIG_UM) || defined(CONFIG_IA64) || defined(CONFIG_E2K) +#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64) || defined(CONFIG_E2K) /* * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out * extra segments containing the gate DSO contents. Dumping its diff --git a/include/linux/filter.h b/include/linux/filter.h index 6976320..e83b6db 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -973,6 +973,7 @@ extern int bpf_jit_enable; extern int bpf_jit_harden; extern int bpf_jit_kallsyms; extern long bpf_jit_limit; +extern long bpf_jit_limit_max; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); diff --git a/include/linux/hid.h b/include/linux/hid.h index 85bedeb..ad46ed4 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -831,6 +831,11 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev, return hdev->ll_driver == driver; } +static inline bool hid_is_usb(struct hid_device *hdev) +{ + return hid_is_using_ll_driver(hdev, &usb_hid_driver); +} + #define PM_HINT_FULLON 1<<5 #define PM_HINT_NORMAL 1<<1 diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index c309f43..f8c4d9f 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -130,6 +130,16 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) return ns; } +static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) +{ + if (ns) { + if (refcount_inc_not_zero(&ns->count)) + return ns; + } + + return NULL; +} + extern void put_ipc_ns(struct ipc_namespace *ns); #else static inline struct ipc_namespace *copy_ipcs(unsigned long flags, @@ -146,6 +156,11 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) return ns; } +static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) +{ + return ns; +} + static inline void put_ipc_ns(struct ipc_namespace *ns) { } diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 7aa3d86..a6279fd 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -229,7 +229,7 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset); do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) # define might_sleep_no_state_check() \ - do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) + do { ___might_sleep(__FILE__, __LINE__, 0); } while (0) /** * cant_sleep - annotation for functions that cannot sleep diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index a121fd8..c7764d9 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -155,6 +155,8 @@ struct kretprobe { raw_spinlock_t lock; }; +#define KRETPROBE_MAX_DATA_SIZE 4096 + struct kretprobe_instance { struct hlist_node hlist; struct kretprobe *rp; diff --git a/include/linux/libata.h b/include/linux/libata.h index cb9f84a..8f3856e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -398,7 +398,7 @@ enum { /* This should match the actual table size of * ata_eh_cmd_timeout_table in libata-eh.c. */ - ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 6, + ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 7, /* Horkage types. May be set by libata or controller on drives (some horkage may be drive/controller pair dependent */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 9ed67dd..cffc4d1 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1241,22 +1241,22 @@ * * @binder_set_context_mgr: * Check whether @mgr is allowed to be the binder context manager. - * @mgr contains the task_struct for the task being registered. + * @mgr contains the struct cred for the current binder process. * Return 0 if permission is granted. * @binder_transaction: * Check whether @from is allowed to invoke a binder transaction call * to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. + * @from contains the struct cred for the sending process. + * @to contains the struct cred for the receiving process. * @binder_transfer_binder: * Check whether @from is allowed to transfer a binder reference to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. + * @from contains the struct cred for the sending process. + * @to contains the struct cred for the receiving process. * @binder_transfer_file: * Check whether @from is allowed to transfer @file to @to. - * @from contains the task_struct for the sending task. + * @from contains the struct cred for the sending process. * @file contains the struct file being transferred. - * @to contains the task_struct for the receiving task. + * @to contains the struct cred for the receiving process. * * @ptrace_access_check: * Check permission before allowing the current process to trace the @@ -1456,13 +1456,13 @@ * @what: kernel feature being accessed */ union security_list_options { - int (*binder_set_context_mgr)(struct task_struct *mgr); - int (*binder_transaction)(struct task_struct *from, - struct task_struct *to); - int (*binder_transfer_binder)(struct task_struct *from, - struct task_struct *to); - int (*binder_transfer_file)(struct task_struct *from, - struct task_struct *to, + int (*binder_set_context_mgr)(const struct cred *mgr); + int (*binder_transaction)(const struct cred *from, + const struct cred *to); + int (*binder_transfer_binder)(const struct cred *from, + const struct cred *to); + int (*binder_transfer_file)(const struct cred *from, + const struct cred *to, struct file *file); int (*ptrace_access_check)(struct task_struct *child, diff --git a/include/linux/memblock.h b/include/linux/memblock.h index f491690..64b971b 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -351,8 +351,8 @@ phys_addr_t memblock_phys_alloc_range(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end); phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); -static inline phys_addr_t memblock_phys_alloc(phys_addr_t size, - phys_addr_t align) +static __always_inline phys_addr_t memblock_phys_alloc(phys_addr_t size, + phys_addr_t align) { return memblock_phys_alloc_range(size, align, 0, MEMBLOCK_ALLOC_ACCESSIBLE); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 36516fe..641a01b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8942,16 +8942,22 @@ struct mlx5_ifc_pcmr_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; u8 reserved_at_10[0x10]; + u8 entropy_force_cap[0x1]; u8 entropy_calc_cap[0x1]; u8 entropy_gre_calc_cap[0x1]; - u8 reserved_at_23[0x1b]; + u8 reserved_at_23[0xf]; + u8 rx_ts_over_crc_cap[0x1]; + u8 reserved_at_33[0xb]; u8 fcs_cap[0x1]; u8 reserved_at_3f[0x1]; + u8 entropy_force[0x1]; u8 entropy_calc[0x1]; u8 entropy_gre_calc[0x1]; - u8 reserved_at_43[0x1b]; + u8 reserved_at_43[0xf]; + u8 rx_ts_over_crc[0x1]; + u8 reserved_at_53[0xb]; u8 fcs_chk[0x1]; u8 reserved_at_5f[0x1]; }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bbbd040..edbd64e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3963,7 +3963,8 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) { spin_lock(&txq->_xmit_lock); - txq->xmit_lock_owner = cpu; + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, cpu); } static inline bool __netif_tx_acquire(struct netdev_queue *txq) @@ -3980,26 +3981,32 @@ static inline void __netif_tx_release(struct netdev_queue *txq) static inline void __netif_tx_lock_bh(struct netdev_queue *txq) { spin_lock_bh(&txq->_xmit_lock); - txq->xmit_lock_owner = smp_processor_id(); + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); } static inline bool __netif_tx_trylock(struct netdev_queue *txq) { bool ok = spin_trylock(&txq->_xmit_lock); - if (likely(ok)) - txq->xmit_lock_owner = smp_processor_id(); + + if (likely(ok)) { + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); + } return ok; } static inline void __netif_tx_unlock(struct netdev_queue *txq) { - txq->xmit_lock_owner = -1; + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock(&txq->_xmit_lock); } static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) { - txq->xmit_lock_owner = -1; + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock_bh(&txq->_xmit_lock); } diff --git a/include/linux/of_clk.h b/include/linux/of_clk.h index b27da9f..c86fcad 100644 --- a/include/linux/of_clk.h +++ b/include/linux/of_clk.h @@ -6,6 +6,9 @@ #ifndef __LINUX_OF_CLK_H #define __LINUX_OF_CLK_H +struct device_node; +struct of_device_id; + #if defined(CONFIG_COMMON_CLK) && defined(CONFIG_OF) unsigned int of_clk_get_parent_count(struct device_node *np); diff --git a/include/linux/pci.h b/include/linux/pci.h index ac9858e..f274454 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -212,6 +212,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10), /* Don't use Relaxed Ordering for TLPs directed at this device */ PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11), + /* Device does honor MSI masking despite saying otherwise */ + PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12), }; enum pci_irq_reroute_variant { diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 0f9e8f0..1b80f83 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -254,12 +254,12 @@ do { \ preempt_count_dec(); \ } while (0) -#ifdef CONFIG_PREEMPT_RT +#ifndef CONFIG_PREEMPT_RT # define preempt_enable_no_resched() sched_preempt_enable_no_resched() -# define preempt_check_resched_rt() preempt_check_resched() +# define preempt_check_resched_rt() barrier(); #else # define preempt_enable_no_resched() preempt_enable() -# define preempt_check_resched_rt() barrier(); +# define preempt_check_resched_rt() preempt_check_resched() #endif #define preemptible() (preempt_count() == 0 && !irqs_disabled()) diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 9fe156d..a68972b 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -177,7 +177,7 @@ static inline struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *rpdev /* This shouldn't be possible */ WARN_ON(1); - return ERR_PTR(-ENXIO); + return NULL; } static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 4b1c3b6..36f3011 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -157,7 +157,7 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also * pins the final release of task.io_context. Also protects ->cpuset and - * ->cgroup.subsys[]. And ->vfork_done. + * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist. * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index 2413427..d101505 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -25,7 +25,11 @@ static inline void *task_stack_page(const struct task_struct *task) static inline unsigned long *end_of_stack(const struct task_struct *task) { +#ifdef CONFIG_STACK_GROWSUP + return (unsigned long *)((unsigned long)task->stack + THREAD_SIZE) - 1; +#else return task->stack; +#endif } #elif !defined(__HAVE_THREAD_FUNCTIONS) diff --git a/include/linux/security.h b/include/linux/security.h index df90399..3f6b819 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -249,13 +249,13 @@ extern int security_init(void); extern int early_security_init(void); /* Security operations */ -int security_binder_set_context_mgr(struct task_struct *mgr); -int security_binder_transaction(struct task_struct *from, - struct task_struct *to); -int security_binder_transfer_binder(struct task_struct *from, - struct task_struct *to); -int security_binder_transfer_file(struct task_struct *from, - struct task_struct *to, struct file *file); +int security_binder_set_context_mgr(const struct cred *mgr); +int security_binder_transaction(const struct cred *from, + const struct cred *to); +int security_binder_transfer_binder(const struct cred *from, + const struct cred *to); +int security_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file); int security_ptrace_access_check(struct task_struct *child, unsigned int mode); int security_ptrace_traceme(struct task_struct *parent); int security_capget(struct task_struct *target, @@ -481,25 +481,25 @@ static inline int early_security_init(void) return 0; } -static inline int security_binder_set_context_mgr(struct task_struct *mgr) +static inline int security_binder_set_context_mgr(const struct cred *mgr) { return 0; } -static inline int security_binder_transaction(struct task_struct *from, - struct task_struct *to) +static inline int security_binder_transaction(const struct cred *from, + const struct cred *to) { return 0; } -static inline int security_binder_transfer_binder(struct task_struct *from, - struct task_struct *to) +static inline int security_binder_transfer_binder(const struct cred *from, + const struct cred *to) { return 0; } -static inline int security_binder_transfer_file(struct task_struct *from, - struct task_struct *to, +static inline int security_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file) { return 0; @@ -985,6 +985,11 @@ static inline void security_transfer_creds(struct cred *new, { } +static inline void security_cred_getsecid(const struct cred *c, u32 *secid) +{ + *secid = 0; +} + static inline int security_kernel_act_as(struct cred *cred, u32 secid) { return 0; diff --git a/include/linux/siphash.h b/include/linux/siphash.h index bf21591..0cda618 100644 --- a/include/linux/siphash.h +++ b/include/linux/siphash.h @@ -27,9 +27,7 @@ static inline bool siphash_key_is_zero(const siphash_key_t *key) } u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); -#endif u64 siphash_1u64(const u64 a, const siphash_key_t *key); u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); @@ -82,10 +80,9 @@ static inline u64 ___siphash_aligned(const __le64 *data, size_t len, static inline u64 siphash(const void *data, size_t len, const siphash_key_t *key) { -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) return __siphash_unaligned(data, len, key); -#endif return ___siphash_aligned(data, len, key); } @@ -96,10 +93,8 @@ typedef struct { u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key); -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key); -#endif u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); @@ -135,10 +130,9 @@ static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, static inline u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key) { -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) return __hsiphash_unaligned(data, len, key); -#endif return ___hsiphash_aligned(data, len, key); } diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index cd15c1b..e08ace7 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -178,7 +178,7 @@ void tee_device_unregister(struct tee_device *teedev); * @offset: offset of buffer in user space * @pages: locked pages from userspace * @num_pages: number of locked pages - * @dmabuf: dmabuf used to for exporting to user space + * @refcount: reference counter * @flags: defined by TEE_SHM_* in tee_drv.h * @id: unique id of a shared memory object on this device * @@ -195,7 +195,7 @@ struct tee_shm { unsigned int offset; struct page **pages; size_t num_pages; - struct dma_buf *dmabuf; + refcount_t refcount; u32 flags; int id; }; diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 2bb8ee0..10ff4c3 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -124,7 +124,6 @@ struct usb_hcd { #define HCD_FLAG_RH_RUNNING 5 /* root hub is running? */ #define HCD_FLAG_DEAD 6 /* controller has died? */ #define HCD_FLAG_INTF_AUTHORIZED 7 /* authorize interfaces? */ -#define HCD_FLAG_DEFER_RH_REGISTER 8 /* Defer roothub registration */ /* The flags can be tested using these macros; they are likely to * be slightly faster than test_bit(). @@ -135,7 +134,6 @@ struct usb_hcd { #define HCD_WAKEUP_PENDING(hcd) ((hcd)->flags & (1U << HCD_FLAG_WAKEUP_PENDING)) #define HCD_RH_RUNNING(hcd) ((hcd)->flags & (1U << HCD_FLAG_RH_RUNNING)) #define HCD_DEAD(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEAD)) -#define HCD_DEFER_RH_REGISTER(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEFER_RH_REGISTER)) /* * Specifies if interfaces are authorized by default diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index b465f8f..a960de6 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -7,9 +7,27 @@ #include #include +static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) +{ + switch (gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + return protocol == cpu_to_be16(ETH_P_IP); + case VIRTIO_NET_HDR_GSO_TCPV6: + return protocol == cpu_to_be16(ETH_P_IPV6); + case VIRTIO_NET_HDR_GSO_UDP: + return protocol == cpu_to_be16(ETH_P_IP) || + protocol == cpu_to_be16(ETH_P_IPV6); + default: + return false; + } +} + static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, const struct virtio_net_hdr *hdr) { + if (skb->protocol) + return 0; + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_UDP: @@ -88,9 +106,12 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb->protocol) { __be16 protocol = dev_parse_header_protocol(skb); - virtio_net_hdr_set_proto(skb, hdr); - if (protocol && protocol != skb->protocol) + if (!protocol) + virtio_net_hdr_set_proto(skb, hdr); + else if (!virtio_net_hdr_match_proto(protocol, hdr->gso_type)) return -EINVAL; + else + skb->protocol = protocol; } retry: if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, @@ -120,10 +141,15 @@ retry: if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); + unsigned int nh_off = p_off; struct skb_shared_info *shinfo = skb_shinfo(skb); + /* UFO may not include transport header in gso_size. */ + if (gso_type & SKB_GSO_UDP) + nh_off -= thlen; + /* Too small packets are not really GSO ones. */ - if (skb->len - p_off > gso_size) { + if (skb->len - nh_off > gso_size) { shinfo->gso_size = gso_size; shinfo->gso_type = gso_type; diff --git a/include/linux/wait.h b/include/linux/wait.h index f6c36e6..df0f184 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -206,6 +206,7 @@ void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); +void __wake_up_pollfree(struct wait_queue_head *wq_head); #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) @@ -232,6 +233,31 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); #define wake_up_interruptible_sync_poll(x, m) \ __wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, poll_to_key(m)) +/** + * wake_up_pollfree - signal that a polled waitqueue is going away + * @wq_head: the wait queue head + * + * In the very rare cases where a ->poll() implementation uses a waitqueue whose + * lifetime is tied to a task rather than to the 'struct file' being polled, + * this function must be called before the waitqueue is freed so that + * non-blocking polls (e.g. epoll) are notified that the queue is going away. + * + * The caller must also RCU-delay the freeing of the wait_queue_head, e.g. via + * an explicit synchronize_rcu() or call_rcu(), or via SLAB_TYPESAFE_BY_RCU. + */ +static inline void wake_up_pollfree(struct wait_queue_head *wq_head) +{ + /* + * For performance reasons, we don't always take the queue lock here. + * Therefore, we might race with someone removing the last entry from + * the queue, and proceed while they still hold the queue lock. + * However, rcu_read_lock() is required to be held in such cases, so we + * can safely proceed with an RCU-delayed free. + */ + if (waitqueue_active(wq_head)) + __wake_up_pollfree(wq_head); +} + #define ___wait_cond_timeout(condition) \ ({ \ bool __cond = (condition); \ diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h index b3504fc..2d3c482 100644 --- a/include/net/bond_alb.h +++ b/include/net/bond_alb.h @@ -126,7 +126,7 @@ struct tlb_slave_info { struct alb_bond_info { struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */ u32 unbalanced_load; - int tx_rebalance_counter; + atomic_t tx_rebalance_counter; int lp_counter; /* -------- rlb parameters -------- */ int rlb_enabled; diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 7fed319..25eae5c 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -68,7 +68,7 @@ struct fib_rules_ops { int (*action)(struct fib_rule *, struct flowi *, int, struct fib_lookup_arg *); - bool (*suppress)(struct fib_rule *, + bool (*suppress)(struct fib_rule *, int, struct fib_lookup_arg *); int (*match)(struct fib_rule *, struct flowi *, int); diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index bd0f159..05ecaef 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -451,6 +451,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void fib6_nh_release(struct fib6_nh *fib6_nh); +void fib6_nh_release_dsts(struct fib6_nh *fib6_nh); int call_fib6_entry_notifiers(struct net *net, enum fib_event_type event_type, diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index cb6c125..3bf9ecb 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -412,7 +412,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, #ifdef CONFIG_IP_ROUTE_CLASSID static inline int fib_num_tclassid_users(struct net *net) { - return net->ipv4.fib_num_tclassid_users; + return atomic_read(&net->ipv4.fib_num_tclassid_users); } #else static inline int fib_num_tclassid_users(struct net *net) diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 3e7d2c0..af9e127 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -47,6 +47,7 @@ struct ipv6_stub { struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void (*fib6_nh_release)(struct fib6_nh *fib6_nh); + void (*fib6_nh_release_dsts)(struct fib6_nh *fib6_nh); void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt); int (*ip6_del_rt)(struct net *net, struct fib6_info *rt); void (*fib6_rt_update)(struct net *net, struct fib6_info *rt, diff --git a/include/net/llc.h b/include/net/llc.h index df282d9..9c10b12 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -72,7 +72,9 @@ struct llc_sap { static inline struct hlist_head *llc_sk_dev_hash(struct llc_sap *sap, int ifindex) { - return &sap->sk_dev_hash[ifindex % LLC_SK_DEV_HASH_ENTRIES]; + u32 bucket = hash_32(ifindex, LLC_SK_DEV_HASH_BITS); + + return &sap->sk_dev_hash[bucket]; } static inline diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 4d6291e..e07d276 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -253,6 +253,7 @@ static inline void *neighbour_priv(const struct neighbour *n) #define NEIGH_UPDATE_F_OVERRIDE 0x00000001 #define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002 #define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004 +#define NEIGH_UPDATE_F_USE 0x10000000 #define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000 #define NEIGH_UPDATE_F_ISROUTER 0x40000000 #define NEIGH_UPDATE_F_ADMIN 0x80000000 @@ -505,10 +506,15 @@ static inline int neigh_output(struct neighbour *n, struct sk_buff *skb, { struct hh_cache *hh = &n->hh; - if ((n->nud_state & NUD_CONNECTED) && hh->hh_len && !skip_cache) + /* n->nud_state and hh->hh_len could be changed under us. + * neigh_hh_output() is taking care of the race later. + */ + if (!skip_cache && + (READ_ONCE(n->nud_state) & NUD_CONNECTED) && + READ_ONCE(hh->hh_len)) return neigh_hh_output(hh, skb); - else - return n->output(n, skb); + + return n->output(n, skb); } static inline struct neighbour * diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 676349b..6683ecc 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -61,7 +61,7 @@ struct netns_ipv4 { #endif bool fib_has_custom_local_routes; #ifdef CONFIG_IP_ROUTE_CLASSID - int fib_num_tclassid_users; + atomic_t fib_num_tclassid_users; #endif struct hlist_head *fib_table_hash; bool fib_offload_disabled; diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 3397901..004e49f 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -30,6 +30,7 @@ enum nci_flag { NCI_UP, NCI_DATA_EXCHANGE, NCI_DATA_EXCHANGE_TO, + NCI_UNREG, }; /* NCI device states */ diff --git a/include/net/nl802154.h b/include/net/nl802154.h index ddcee12..145acb8 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -19,6 +19,8 @@ * */ +#include + #define NL802154_GENL_NAME "nl802154" enum nl802154_commands { @@ -150,10 +152,9 @@ enum nl802154_attrs { }; enum nl802154_iftype { - /* for backwards compatibility TODO */ - NL802154_IFTYPE_UNSPEC = -1, + NL802154_IFTYPE_UNSPEC = (~(__u32)0), - NL802154_IFTYPE_NODE, + NL802154_IFTYPE_NODE = 0, NL802154_IFTYPE_MONITOR, NL802154_IFTYPE_COORD, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index b47b5b9..5b0b3a2 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -314,6 +314,8 @@ struct Qdisc_ops { struct netlink_ext_ack *extack); void (*attach)(struct Qdisc *sch); int (*change_tx_queue_len)(struct Qdisc *, unsigned int); + void (*change_real_num_tx)(struct Qdisc *sch, + unsigned int new_real_tx); int (*dump)(struct Qdisc *, struct sk_buff *); int (*dump_stats)(struct Qdisc *, struct gnet_dump *); @@ -690,6 +692,8 @@ void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *); void qdisc_class_hash_destroy(struct Qdisc_class_hash *); int dev_qdisc_change_tx_queue_len(struct net_device *dev); +void dev_qdisc_change_real_num_tx(struct net_device *dev, + unsigned int new_real_tx); void dev_init_scheduler(struct net_device *dev); void dev_shutdown(struct net_device *dev); void dev_activate(struct net_device *dev); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 3ab5c6b..35c108a 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -103,6 +103,7 @@ extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); +typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *); void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, @@ -113,9 +114,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, void *p); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p); +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, struct sctp_info *info); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index fd7c3f7..cb05e50 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1345,6 +1345,7 @@ struct sctp_endpoint { u32 secid; u32 peer_secid; + struct rcu_head rcu; }; /* Recover the outter endpoint structure. */ @@ -1360,7 +1361,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base) struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t); void sctp_endpoint_free(struct sctp_endpoint *); void sctp_endpoint_put(struct sctp_endpoint *); -void sctp_endpoint_hold(struct sctp_endpoint *); +int sctp_endpoint_hold(struct sctp_endpoint *ep); void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *); struct sctp_association *sctp_endpoint_lookup_assoc( const struct sctp_endpoint *ep, diff --git a/include/net/strparser.h b/include/net/strparser.h index 1d20b98..bec1439 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -54,10 +54,24 @@ struct strp_msg { int offset; }; +struct _strp_msg { + /* Internal cb structure. struct strp_msg must be first for passing + * to upper layer. + */ + struct strp_msg strp; + int accum_len; +}; + +struct sk_skb_cb { +#define SK_SKB_CB_PRIV_LEN 20 + unsigned char data[SK_SKB_CB_PRIV_LEN]; + struct _strp_msg strp; +}; + static inline struct strp_msg *strp_msg(struct sk_buff *skb) { return (struct strp_msg *)((void *)skb->cb + - offsetof(struct qdisc_skb_cb, data)); + offsetof(struct sk_skb_cb, strp)); } /* Structure for an attached lower socket */ diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index 0689d9b..f6a0f09 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -52,7 +52,10 @@ static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT struct tcf_tunnel_key *t = to_tunnel_key(a); - struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + struct tcf_tunnel_key_params *params; + + params = rcu_dereference_protected(t->params, + lockdep_is_held(&a->tcfa_lock)); return ¶ms->tcft_enc_metadata->u.tun_info; #else @@ -69,7 +72,7 @@ tcf_tunnel_info_copy(const struct tc_action *a) if (tun) { size_t tun_size = sizeof(*tun) + tun->options_len; struct ip_tunnel_info *tun_copy = kmemdup(tun, tun_size, - GFP_KERNEL); + GFP_ATOMIC); return tun_copy; } diff --git a/include/net/tls.h b/include/net/tls.h index 697df45..7f220e0 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -360,6 +360,7 @@ int tls_sk_query(struct sock *sk, int optname, char __user *optval, int __user *optlen); int tls_sk_attach(struct sock *sk, int optname, char __user *optval, unsigned int optlen); +void tls_err_abort(struct sock *sk, int err); int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); @@ -465,12 +466,6 @@ static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) #endif } -static inline void tls_err_abort(struct sock *sk, int err) -{ - sk->sk_err = err; - sk->sk_error_report(sk); -} - static inline bool tls_bigint_increment(unsigned char *seq, int len) { int i; @@ -499,7 +494,7 @@ static inline void tls_advance_record_sn(struct sock *sk, struct cipher_context *ctx) { if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size)) - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); if (prot->version != TLS_1_3_VERSION) tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, diff --git a/include/net/udp.h b/include/net/udp.h index fabf507..d9d39cc 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -480,8 +480,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, * CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial * packets in udp_gro_complete_segment. As does UDP GSO, verified by * udp_send_skb. But when those packets are looped in dev_loopback_xmit - * their ip_summed is set to CHECKSUM_UNNECESSARY. Reset in this - * specific case, where PARTIAL is both correct and required. + * their ip_summed CHECKSUM_NONE is changed to CHECKSUM_UNNECESSARY. + * Reset in this specific case, where PARTIAL is both correct and + * required. */ if (skb->pkt_type == PACKET_LOOPBACK) skb->ip_summed = CHECKSUM_PARTIAL; diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index ab22759..7b3ef7a 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -30,7 +30,7 @@ enum rdma_nl_flags { * constant as well and the compiler checks they are the same. */ #define MODULE_ALIAS_RDMA_NETLINK(_index, _val) \ - static inline void __chk_##_index(void) \ + static inline void __maybe_unused __chk_##_index(void) \ { \ BUILD_BUG_ON(_index != _val); \ } \ diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h index ef88b20..23dc8de 100644 --- a/include/sound/hdaudio_ext.h +++ b/include/sound/hdaudio_ext.h @@ -88,6 +88,8 @@ struct hdac_ext_stream *snd_hdac_ext_stream_assign(struct hdac_bus *bus, struct snd_pcm_substream *substream, int type); void snd_hdac_ext_stream_release(struct hdac_ext_stream *azx_dev, int type); +void snd_hdac_ext_stream_decouple_locked(struct hdac_bus *bus, + struct hdac_ext_stream *azx_dev, bool decouple); void snd_hdac_ext_stream_decouple(struct hdac_bus *bus, struct hdac_ext_stream *azx_dev, bool decouple); void snd_hdac_ext_stop_streams(struct hdac_bus *bus); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 7c9716f..59d7ebb 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -781,8 +781,9 @@ struct se_device { atomic_long_t read_bytes; atomic_long_t write_bytes; /* Active commands on this virtual SE device */ - atomic_t simple_cmds; - atomic_t dev_ordered_sync; + atomic_t non_ordered; + bool ordered_sync_in_progress; + atomic_t delayed_cmd_count; atomic_t dev_qf_count; u32 export_count; spinlock_t delayed_cmd_lock; @@ -804,6 +805,7 @@ struct se_device { struct list_head dev_sep_list; struct list_head dev_tmr_list; struct work_struct qf_work_queue; + struct work_struct delayed_cmd_work; struct list_head delayed_cmd_list; struct list_head state_list; struct list_head qf_cmd_list; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 1796ff9..a7613ef 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -793,20 +793,20 @@ TRACE_EVENT(f2fs_lookup_start, TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(const char *, name) + __string(name, dentry->d_name.name) __field(unsigned int, flags) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->ino = dir->i_ino; - __entry->name = dentry->d_name.name; + __assign_str(name, dentry->d_name.name); __entry->flags = flags; ), TP_printk("dev = (%d,%d), pino = %lu, name:%s, flags:%u", show_dev_ino(__entry), - __entry->name, + __get_str(name), __entry->flags) ); @@ -820,7 +820,7 @@ TRACE_EVENT(f2fs_lookup_end, TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(const char *, name) + __string(name, dentry->d_name.name) __field(nid_t, cino) __field(int, err) ), @@ -828,14 +828,14 @@ TRACE_EVENT(f2fs_lookup_end, TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->ino = dir->i_ino; - __entry->name = dentry->d_name.name; + __assign_str(name, dentry->d_name.name); __entry->cino = ino; __entry->err = err; ), TP_printk("dev = (%d,%d), pino = %lu, name:%s, ino:%u, err:%d", show_dev_ino(__entry), - __entry->name, + __get_str(name), __entry->cino, __entry->err) ); diff --git a/include/uapi/asm-generic/poll.h b/include/uapi/asm-generic/poll.h index 41b509f..f9c520c 100644 --- a/include/uapi/asm-generic/poll.h +++ b/include/uapi/asm-generic/poll.h @@ -29,7 +29,7 @@ #define POLLRDHUP 0x2000 #endif -#define POLLFREE (__force __poll_t)0x4000 /* currently only for epoll */ +#define POLLFREE (__force __poll_t)0x4000 #define POLL_BUSY_LOOP (__force __poll_t)0x8000 diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 1fc8faa..22d6bf3 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -851,8 +851,17 @@ __SYSCALL(__NR_pidfd_open, sys_pidfd_open) __SYSCALL(__NR_clone3, sys_clone3) #endif +#ifdef CONFIG_MCST +#define __NR_el_posix 500 +__SYSCALL(__NR_el_posix, sys_el_posix) +#define __NR_macctl 501 +__SYSCALL(__NR_macctl, sys_macctl) +#undef __NR_syscalls +#define __NR_syscalls 502 +#else #undef __NR_syscalls #define __NR_syscalls 436 +#endif /* CONFIG_MCST */ /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/major.h b/include/uapi/linux/major.h index f565e82..77a27a8 100644 --- a/include/uapi/linux/major.h +++ b/include/uapi/linux/major.h @@ -68,6 +68,9 @@ #define APBLOCK_MAJOR 38 /* AP1000 Block device */ #define DDV_MAJOR 39 /* AP1000 DDV block device */ #define NBD_MAJOR 43 /* Network block device */ +#ifdef CONFIG_MCST +#define MCST_AUX_TTY_MAJOR 44 +#endif #define RISCOM8_NORMAL_MAJOR 48 #define DAC960_MAJOR 48 /* 48..55 */ #define RISCOM8_CALLOUT_MAJOR 49 diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index f6e3c8c..4fa4e97 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -263,7 +263,7 @@ enum nfc_sdp_attr { #define NFC_SE_ENABLED 0x1 struct sockaddr_nfc { - sa_family_t sa_family; + __kernel_sa_family_t sa_family; __u32 dev_idx; __u32 target_idx; __u32 nfc_protocol; @@ -271,14 +271,14 @@ struct sockaddr_nfc { #define NFC_LLCP_MAX_SERVICE_NAME 63 struct sockaddr_nfc_llcp { - sa_family_t sa_family; + __kernel_sa_family_t sa_family; __u32 dev_idx; __u32 target_idx; __u32 nfc_protocol; __u8 dsap; /* Destination SAP, if known */ __u8 ssap; /* Source SAP to be bound to */ char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */; - size_t service_name_len; + __kernel_size_t service_name_len; }; /* NFC socket protocols */ diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 29d6e93..b485d8b 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -502,6 +502,12 @@ #define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */ #define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */ #define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ +#define PCI_EXP_DEVCTL_PAYLOAD_128B 0x0000 /* 128 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_256B 0x0020 /* 256 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_512B 0x0040 /* 512 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_1024B 0x0060 /* 1024 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_2048B 0x0080 /* 2048 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_4096B 0x00a0 /* 4096 Bytes */ #define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */ #define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */ #define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h index 3f40501..b39cdbc 100644 --- a/include/xen/interface/io/ring.h +++ b/include/xen/interface/io/ring.h @@ -1,21 +1,53 @@ -/* SPDX-License-Identifier: GPL-2.0 */ /****************************************************************************** * ring.h * * Shared producer-consumer ring macros. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Tim Deegan and Andrew Warfield November 2004. */ #ifndef __XEN_PUBLIC_IO_RING_H__ #define __XEN_PUBLIC_IO_RING_H__ +/* + * When #include'ing this header, you need to provide the following + * declaration upfront: + * - standard integers types (uint8_t, uint16_t, etc) + * They are provided by stdint.h of the standard headers. + * + * In addition, if you intend to use the FLEX macros, you also need to + * provide the following, before invoking the FLEX macros: + * - size_t + * - memcpy + * - grant_ref_t + * These declarations are provided by string.h of the standard headers, + * and grant_table.h from the Xen public headers. + */ + #include typedef unsigned int RING_IDX; /* Round a 32-bit unsigned constant down to the nearest power of two. */ -#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) +#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) #define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x)) #define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x)) #define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x)) @@ -27,82 +59,79 @@ typedef unsigned int RING_IDX; * A ring contains as many entries as will fit, rounded down to the nearest * power of two (so we can mask with (size-1) to loop around). */ -#define __CONST_RING_SIZE(_s, _sz) \ - (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ - sizeof(((struct _s##_sring *)0)->ring[0]))) - +#define __CONST_RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ + sizeof(((struct _s##_sring *)0)->ring[0]))) /* * The same for passing in an actual pointer instead of a name tag. */ -#define __RING_SIZE(_s, _sz) \ - (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) /* * Macros to make the correct C datatypes for a new kind of ring. * * To make a new ring datatype, you need to have two message structures, - * let's say struct request, and struct response already defined. + * let's say request_t, and response_t already defined. * * In a header where you want the ring datatype declared, you then do: * - * DEFINE_RING_TYPES(mytag, struct request, struct response); + * DEFINE_RING_TYPES(mytag, request_t, response_t); * * These expand out to give you a set of types, as you can see below. * The most important of these are: * - * struct mytag_sring - The shared ring. - * struct mytag_front_ring - The 'front' half of the ring. - * struct mytag_back_ring - The 'back' half of the ring. + * mytag_sring_t - The shared ring. + * mytag_front_ring_t - The 'front' half of the ring. + * mytag_back_ring_t - The 'back' half of the ring. * * To initialize a ring in your code you need to know the location and size * of the shared memory area (PAGE_SIZE, for instance). To initialise * the front half: * - * struct mytag_front_ring front_ring; - * SHARED_RING_INIT((struct mytag_sring *)shared_page); - * FRONT_RING_INIT(&front_ring, (struct mytag_sring *)shared_page, - * PAGE_SIZE); + * mytag_front_ring_t front_ring; + * SHARED_RING_INIT((mytag_sring_t *)shared_page); + * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); * * Initializing the back follows similarly (note that only the front * initializes the shared ring): * - * struct mytag_back_ring back_ring; - * BACK_RING_INIT(&back_ring, (struct mytag_sring *)shared_page, - * PAGE_SIZE); + * mytag_back_ring_t back_ring; + * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); */ -#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ - \ -/* Shared ring entry */ \ -union __name##_sring_entry { \ - __req_t req; \ - __rsp_t rsp; \ -}; \ - \ -/* Shared ring page */ \ -struct __name##_sring { \ - RING_IDX req_prod, req_event; \ - RING_IDX rsp_prod, rsp_event; \ - uint8_t pad[48]; \ - union __name##_sring_entry ring[1]; /* variable-length */ \ -}; \ - \ -/* "Front" end's private variables */ \ -struct __name##_front_ring { \ - RING_IDX req_prod_pvt; \ - RING_IDX rsp_cons; \ - unsigned int nr_ents; \ - struct __name##_sring *sring; \ -}; \ - \ -/* "Back" end's private variables */ \ -struct __name##_back_ring { \ - RING_IDX rsp_prod_pvt; \ - RING_IDX req_cons; \ - unsigned int nr_ents; \ - struct __name##_sring *sring; \ -}; - +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ + \ +/* Shared ring entry */ \ +union __name##_sring_entry { \ + __req_t req; \ + __rsp_t rsp; \ +}; \ + \ +/* Shared ring page */ \ +struct __name##_sring { \ + RING_IDX req_prod, req_event; \ + RING_IDX rsp_prod, rsp_event; \ + uint8_t __pad[48]; \ + union __name##_sring_entry ring[1]; /* variable-length */ \ +}; \ + \ +/* "Front" end's private variables */ \ +struct __name##_front_ring { \ + RING_IDX req_prod_pvt; \ + RING_IDX rsp_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* "Back" end's private variables */ \ +struct __name##_back_ring { \ + RING_IDX rsp_prod_pvt; \ + RING_IDX req_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ /* * Macros for manipulating rings. * @@ -119,105 +148,99 @@ struct __name##_back_ring { \ */ /* Initialising empty rings */ -#define SHARED_RING_INIT(_s) do { \ - (_s)->req_prod = (_s)->rsp_prod = 0; \ - (_s)->req_event = (_s)->rsp_event = 1; \ - memset((_s)->pad, 0, sizeof((_s)->pad)); \ +#define SHARED_RING_INIT(_s) do { \ + (_s)->req_prod = (_s)->rsp_prod = 0; \ + (_s)->req_event = (_s)->rsp_event = 1; \ + (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ } while(0) -#define FRONT_RING_INIT(_r, _s, __size) do { \ - (_r)->req_prod_pvt = 0; \ - (_r)->rsp_cons = 0; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ - (_r)->sring = (_s); \ +#define FRONT_RING_ATTACH(_r, _s, _i, __size) do { \ + (_r)->req_prod_pvt = (_i); \ + (_r)->rsp_cons = (_i); \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ } while (0) -#define BACK_RING_INIT(_r, _s, __size) do { \ - (_r)->rsp_prod_pvt = 0; \ - (_r)->req_cons = 0; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ - (_r)->sring = (_s); \ +#define FRONT_RING_INIT(_r, _s, __size) FRONT_RING_ATTACH(_r, _s, 0, __size) + +#define BACK_RING_ATTACH(_r, _s, _i, __size) do { \ + (_r)->rsp_prod_pvt = (_i); \ + (_r)->req_cons = (_i); \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ } while (0) -/* Initialize to existing shared indexes -- for recovery */ -#define FRONT_RING_ATTACH(_r, _s, __size) do { \ - (_r)->sring = (_s); \ - (_r)->req_prod_pvt = (_s)->req_prod; \ - (_r)->rsp_cons = (_s)->rsp_prod; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ -} while (0) - -#define BACK_RING_ATTACH(_r, _s, __size) do { \ - (_r)->sring = (_s); \ - (_r)->rsp_prod_pvt = (_s)->rsp_prod; \ - (_r)->req_cons = (_s)->req_prod; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ -} while (0) +#define BACK_RING_INIT(_r, _s, __size) BACK_RING_ATTACH(_r, _s, 0, __size) /* How big is this ring? */ -#define RING_SIZE(_r) \ +#define RING_SIZE(_r) \ ((_r)->nr_ents) /* Number of free requests (for use on front side only). */ -#define RING_FREE_REQUESTS(_r) \ +#define RING_FREE_REQUESTS(_r) \ (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) /* Test if there is an empty slot available on the front ring. * (This is only meaningful from the front. ) */ -#define RING_FULL(_r) \ +#define RING_FULL(_r) \ (RING_FREE_REQUESTS(_r) == 0) /* Test if there are outstanding messages to be processed on a ring. */ -#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ +#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ ((_r)->sring->rsp_prod - (_r)->rsp_cons) -#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ - ({ \ - unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ - unsigned int rsp = RING_SIZE(_r) - \ - ((_r)->req_cons - (_r)->rsp_prod_pvt); \ - req < rsp ? req : rsp; \ - }) +#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ +}) /* Direct access to individual ring elements, by index. */ -#define RING_GET_REQUEST(_r, _idx) \ +#define RING_GET_REQUEST(_r, _idx) \ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) +#define RING_GET_RESPONSE(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) + /* - * Get a local copy of a request. + * Get a local copy of a request/response. * - * Use this in preference to RING_GET_REQUEST() so all processing is + * Use this in preference to RING_GET_{REQUEST,RESPONSE}() so all processing is * done on a local copy that cannot be modified by the other end. * * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this - * to be ineffective where _req is a struct which consists of only bitfields. + * to be ineffective where dest is a struct which consists of only bitfields. */ -#define RING_COPY_REQUEST(_r, _idx, _req) do { \ - /* Use volatile to force the copy into _req. */ \ - *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx); \ +#define RING_COPY_(type, r, idx, dest) do { \ + /* Use volatile to force the copy into dest. */ \ + *(dest) = *(volatile typeof(dest))RING_GET_##type(r, idx); \ } while (0) -#define RING_GET_RESPONSE(_r, _idx) \ - (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) +#define RING_COPY_REQUEST(r, idx, req) RING_COPY_(REQUEST, r, idx, req) +#define RING_COPY_RESPONSE(r, idx, rsp) RING_COPY_(RESPONSE, r, idx, rsp) /* Loop termination condition: Would the specified index overflow the ring? */ -#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) /* Ill-behaved frontend determination: Can there be this many requests? */ -#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) +/* Ill-behaved backend determination: Can there be this many responses? */ +#define RING_RESPONSE_PROD_OVERFLOW(_r, _prod) \ + (((_prod) - (_r)->rsp_cons) > RING_SIZE(_r)) -#define RING_PUSH_REQUESTS(_r) do { \ - virt_wmb(); /* back sees requests /before/ updated producer index */ \ - (_r)->sring->req_prod = (_r)->req_prod_pvt; \ +#define RING_PUSH_REQUESTS(_r) do { \ + virt_wmb(); /* back sees requests /before/ updated producer index */\ + (_r)->sring->req_prod = (_r)->req_prod_pvt; \ } while (0) -#define RING_PUSH_RESPONSES(_r) do { \ - virt_wmb(); /* front sees responses /before/ updated producer index */ \ - (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ +#define RING_PUSH_RESPONSES(_r) do { \ + virt_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ } while (0) /* @@ -250,40 +273,40 @@ struct __name##_back_ring { \ * field appropriately. */ -#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ - RING_IDX __old = (_r)->sring->req_prod; \ - RING_IDX __new = (_r)->req_prod_pvt; \ - virt_wmb(); /* back sees requests /before/ updated producer index */ \ - (_r)->sring->req_prod = __new; \ - virt_mb(); /* back sees new requests /before/ we check req_event */ \ - (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ - (RING_IDX)(__new - __old)); \ +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->req_prod; \ + RING_IDX __new = (_r)->req_prod_pvt; \ + virt_wmb(); /* back sees requests /before/ updated producer index */\ + (_r)->sring->req_prod = __new; \ + virt_mb(); /* back sees new requests /before/ we check req_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ + (RING_IDX)(__new - __old)); \ } while (0) -#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ - RING_IDX __old = (_r)->sring->rsp_prod; \ - RING_IDX __new = (_r)->rsp_prod_pvt; \ - virt_wmb(); /* front sees responses /before/ updated producer index */ \ - (_r)->sring->rsp_prod = __new; \ - virt_mb(); /* front sees new responses /before/ we check rsp_event */ \ - (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ - (RING_IDX)(__new - __old)); \ +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->rsp_prod; \ + RING_IDX __new = (_r)->rsp_prod_pvt; \ + virt_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = __new; \ + virt_mb(); /* front sees new resps /before/ we check rsp_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ + (RING_IDX)(__new - __old)); \ } while (0) -#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ - (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ - if (_work_to_do) break; \ - (_r)->sring->req_event = (_r)->req_cons + 1; \ - virt_mb(); \ - (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ + if (_work_to_do) break; \ + (_r)->sring->req_event = (_r)->req_cons + 1; \ + virt_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ } while (0) -#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ - (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ - if (_work_to_do) break; \ - (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ - virt_mb(); \ - (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ + if (_work_to_do) break; \ + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ + virt_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ } while (0)