diff --git a/Makefile b/Makefile index bd9ad8a..08e685c 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 4 SUBLEVEL = 91 -EXTRAVERSION = -2.13 +EXTRAVERSION = -3.6 NAME = Kleptomaniac Octopus # *DOCUMENTATION* @@ -506,9 +506,6 @@ KBUILD_CFLAGS := -Wall -Wundef -Werror=strict-prototypes -Wno-trigraphs \ -Wno-format-security \ -std=gnu89 endif -ifeq ($(call cc-lcc-yn),y) -KBUILD_CFLAGS += -fno-ident -endif KBUILD_CPPFLAGS := -D__KERNEL__ KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := diff --git a/arch/e2k/Makefile b/arch/e2k/Makefile index 50d58cb..34cb7c0 100644 --- a/arch/e2k/Makefile +++ b/arch/e2k/Makefile @@ -17,7 +17,8 @@ LD = $(shell $(CC) -print-prog-name=ld) OBJCOPY = $(shell $(CC) -print-prog-name=objcopy) KBUILD_CFLAGS += -fkernel -gline -masm-inline $(call cc-option,-fforbid-fp) \ - $(call cc-option,-fmax-errors=5) $(call cc-option,-fno-loop-apb) + $(call cc-option,-fmax-errors=5) $(call cc-option,-fno-loop-apb) \ + -fno-ident ifeq ($(PROFILE_GENERATE), 1) KBUILD_CFLAGS += -fprofile-generate-kernel @@ -31,10 +32,10 @@ KBUILD_CFLAGS += $(call cc-option,-finline-functions,) \ $(call cc-option,-finline-functions-called-once,) # Some uninteresting or broken warnings can be disabled with #pragma's only -KBUILD_CFLAGS += -Wno-array-bounds -Wno-duplicate-type-qualifier \ - -Wno-builtin-functions-redefined -Wno-reduced-alignment \ - -Wno-unused-value -Wno-overflow -Wno-signed-one-bit-field \ - -include $(srctree)/arch/e2k/include/asm/override-lcc-warnings.h +KBUILD_CFLAGS += -Wno-array-bounds -Wno-builtin-functions-redefined \ + -Wno-reduced-alignment -Wno-overflow -Wno-signed-one-bit-field \ + -Wno-alignment-reduction-ignored \ + -include $(srctree)/arch/e2k/include/asm/override-lcc-warnings.h LDFLAGS_vmlinux := CHECKFLAGS += -D__e2k__ diff --git a/arch/e2k/include/asm-l/epic.h b/arch/e2k/include/asm-l/epic.h index 973b099..a1ba022 100644 --- a/arch/e2k/include/asm-l/epic.h +++ b/arch/e2k/include/asm-l/epic.h @@ -67,6 +67,11 @@ static inline bool read_epic_bsp(void) return reg.bits.bsp_core; } +static inline u32 epic_vector_prio(u32 vector) +{ + return 1 + ((vector >> 8) & 0x3); +} + extern void __init_recv setup_prepic(void); extern void ack_epic_irq(void); extern void epic_send_IPI(unsigned int dest_id, int vector); diff --git a/arch/e2k/include/asm-l/epic_regs.h b/arch/e2k/include/asm-l/epic_regs.h index 9f96ed2..f1c1ede 100644 --- a/arch/e2k/include/asm-l/epic_regs.h +++ b/arch/e2k/include/asm-l/epic_regs.h @@ -79,9 +79,9 @@ union cepic_epic_int2 { dlvm : 3, __reserved2 : 4, gst_id : 12, - __reserved3 : 12, + __reserved3 : 8, gst_dst : 10, - __reserved4 : 10; + __reserved4 : 14; } __packed bits; }; @@ -323,12 +323,12 @@ typedef struct kvm_epic_page { u32 id; u32 cpr; u32 esr; - u32 esr2; - u32 cir; + union cepic_esr2 esr2; + union cepic_cir cir; atomic_t esr_new; u32 svr; - u64 icr; - u32 timer_lvtt; + union cepic_icr icr; + union cepic_timer_lvtt timer_lvtt; u32 timer_init; u32 timer_cur; u32 timer_div; @@ -338,13 +338,13 @@ typedef struct kvm_epic_page { u32 nm_timer_div; u32 pnmirr_mask; /*04c*/ u32 __reserved1[45]; -/*100*/ atomic64_t pmirr[16]; +/*100*/ atomic64_t pmirr[CEPIC_PMIRR_NR_DREGS]; /*180*/ u32 __reserved2[24]; /*1e0*/ atomic_t pnmirr; u32 __reserved3[263]; /*600*/ u8 pnmirr_byte[16]; /*610*/ u32 __reserved4[124]; -/*800*/ u8 pmirr_byte[1024]; +/*800*/ u8 pmirr_byte[CEPIC_PMIRR_NR_BITS]; } epic_page_t; #elif defined(__BIG_ENDIAN) @@ -417,9 +417,9 @@ union cepic_epic_int { union cepic_epic_int2 { u64 raw; struct { - u64 __reserved4 : 10, + u64 __reserved4 : 14, gst_dst : 10, - __reserved3 : 12, + __reserved3 : 8, gst_id : 12, __reserved2 : 4, dlvm : 3, diff --git a/arch/e2k/include/asm-l/l_pmc.h b/arch/e2k/include/asm-l/l_pmc.h index 1a80d13..f9006a9 100644 --- a/arch/e2k/include/asm-l/l_pmc.h +++ b/arch/e2k/include/asm-l/l_pmc.h @@ -124,7 +124,7 @@ struct l_pmc { extern struct l_pmc l_pmc[MAX_NUM_PMCS]; -#if defined(CONFIG_L_PMC) || defined(CONFIG_S2_PMC) +#if defined(CONFIG_L_PMC_MODULE) || defined(CONFIG_L_PMC) || defined(CONFIG_S2_PMC) extern int spmc_get_temp_cur0(void); int pmc_l_gpufreq_set_scale(unsigned char scale); int pmc_l_gpufreq_get_scale(void); diff --git a/arch/e2k/include/asm-l/l_timer.h b/arch/e2k/include/asm-l/l_timer.h index f362e05..6afbc52 100644 --- a/arch/e2k/include/asm-l/l_timer.h +++ b/arch/e2k/include/asm-l/l_timer.h @@ -46,6 +46,7 @@ typedef struct lt_regs { extern unsigned long long lt_phys_base; extern lt_regs_t *lt_regs; +extern long lt_clock_rate; extern void setup_lt_timer(void); extern int __init init_lt_clocksource(void); diff --git a/arch/e2k/include/asm-l/mpspec.h b/arch/e2k/include/asm-l/mpspec.h index f1c795b..3d86c40 100644 --- a/arch/e2k/include/asm-l/mpspec.h +++ b/arch/e2k/include/asm-l/mpspec.h @@ -378,6 +378,7 @@ extern int mp_find_iolink_io_apicid(int node, int link); extern int mp_fix_io_apicid(unsigned int src_apicid, unsigned int new_apicid); void mp_pci_add_resources(struct list_head *resources, struct iohub_sysdata *sd); +extern int mp_iohubs_num; #else static inline int mp_fix_io_apicid(unsigned int src_apicid, unsigned int new_apicid) diff --git a/arch/e2k/include/asm/aau_regs_access.h b/arch/e2k/include/asm/aau_regs_access.h index e2b3b5f..78e2355 100644 --- a/arch/e2k/include/asm/aau_regs_access.h +++ b/arch/e2k/include/asm/aau_regs_access.h @@ -537,14 +537,14 @@ do { \ \ /* prefetch data to restore */ \ if (AS(aasr).stb) \ - prefetchw_range(aau->aastis, sizeof(aau->aastis) + \ + prefetch_nospec_range(aau->aastis, sizeof(aau->aastis) + \ sizeof(aau->aasti_tags)); \ if (AS(aasr).iab) \ - prefetchw_range(aau->aainds, sizeof(aau->aainds) + \ + prefetch_nospec_range(aau->aainds, sizeof(aau->aainds) + \ sizeof(aau->aaind_tags) + sizeof(aau->aaincrs) + \ sizeof(aau->aaincr_tags) + sizeof(aau->aads)); \ if (AAU_STOPPED(aasr)) \ - prefetchw_range(aau->aaldi, sizeof(aau->aaldi)); \ + prefetch_nospec_range(aau->aaldi, sizeof(aau->aaldi)); \ \ /* Make sure prefetches are issued */ \ barrier(); \ diff --git a/arch/e2k/include/asm/alternative.h b/arch/e2k/include/asm/alternative.h index 12d0505..e3b33dc 100644 --- a/arch/e2k/include/asm/alternative.h +++ b/arch/e2k/include/asm/alternative.h @@ -177,14 +177,19 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); * oldinstr is padded with jump and nops at compile time if altinstr is * longer. altinstr is padded with jump and nops at run-time during patching. */ -#define alternative(oldinstr, altinstr, facility, clobbers...) \ - asm volatile (ALTERNATIVE(oldinstr, altinstr, facility) \ - ::: clobbers) +#define alternative(oldinstr, altinstr, _facility, clobbers...) \ + _Pragma("no_asm_inline") \ + asm volatile (ALTERNATIVE(oldinstr, altinstr, %[facility]) \ + :: [facility] "i" (_facility) : clobbers) -#define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \ - asm volatile (ALTERNATIVE_2(oldinstr, altinstr1, facility1, \ - altinstr2, facility2) \ - ::: clobbers) +#define alternative_2(oldinstr, altinstr1, _facility1, altinstr2, _facility2) \ + _Pragma("no_asm_inline") \ + asm volatile (ALTERNATIVE_2(oldinstr, altinstr1, %[facility1], \ + altinstr2, %[facility2]) \ + : \ + : [facility1] "i" (_facility1), \ + [facility2] "i" (_facility2) \ + : clobbers) /* * How to use: diff --git a/arch/e2k/include/asm/barrier.h b/arch/e2k/include/asm/barrier.h index 0c3617f..8dee3f3 100644 --- a/arch/e2k/include/asm/barrier.h +++ b/arch/e2k/include/asm/barrier.h @@ -4,17 +4,18 @@ #include #include +#include #include #if CONFIG_CPU_ISET >= 6 /* Cannot use this on V5 because of load-after-store dependencies - * compiled kernel won't honour them */ -# define mb() E2K_WAIT_V6(_st_c | _ld_c | _sas | _sal | _las | _lal) +# define mb() E2K_WAIT(_st_c | _ld_c | _sas | _sal | _las | _lal) #else # define mb() E2K_WAIT(_st_c | _ld_c) #endif -#define wmb() E2K_WAIT_ST_C_SAS() -#define rmb() E2K_WAIT_LD_C_LAL() +#define wmb() E2K_WAIT(_st_c | _sas) +#define rmb() E2K_WAIT(_ld_c | _lal) /* * For smp_* variants add _mt modifier @@ -22,12 +23,12 @@ #if CONFIG_CPU_ISET >= 6 /* Cannot use this on V5 because of load-after-store dependencies - * compiled kernel won't honour them */ -# define __smp_mb() E2K_WAIT_V6(_st_c | _ld_c | _sas | _sal | _las | _lal | _mt) +# define __smp_mb() E2K_WAIT(_st_c | _ld_c | _sas | _sal | _las | _lal | _mt) #else # define __smp_mb() E2K_WAIT(_st_c | _ld_c) #endif -#define __smp_wmb() E2K_WAIT_ST_C_SAS_MT() -#define __smp_rmb() E2K_WAIT_LD_C_LAL_MT() +#define __smp_wmb() E2K_WAIT(_st_c | _sas | _mt) +#define __smp_rmb() E2K_WAIT(_ld_c | _lal | _mt) #define dma_rmb() __smp_rmb() #define dma_wmb() __smp_wmb() @@ -37,7 +38,7 @@ #if CONFIG_CPU_ISET >= 5 # define __smp_mb__after_atomic() barrier() -# define __smp_mb__before_atomic() E2K_WAIT_ST_C_SAS_LD_C_SAL_MT() +# define __smp_mb__before_atomic() E2K_WAIT(_st_c | _las | _ld_c | _lal | _mt) #elif CONFIG_CPU_ISET >= 3 /* Atomic operations are serializing since e2s */ # define __smp_mb__after_atomic() \ @@ -70,7 +71,7 @@ do { \ compiletime_assert(sizeof(*p) == 1 || sizeof(*p) == 2 || \ sizeof(*p) == 4 || sizeof(*p) == 8, \ "Need native word sized stores/loads for atomicity."); \ - E2K_WAIT_ST_C_SAS_LD_C_SAL_MT(); \ + E2K_WAIT(_st_c | _sas | _ld_c | _sal | _mt); \ WRITE_ONCE(*(p), (v)); \ } while (0) #endif /* CONFIG_CPU_ISET >= 6 */ diff --git a/arch/e2k/include/asm/checksum.h b/arch/e2k/include/asm/checksum.h index 9bddf84..2a9db95 100644 --- a/arch/e2k/include/asm/checksum.h +++ b/arch/e2k/include/asm/checksum.h @@ -112,7 +112,7 @@ static inline __wsum csum_partial(const void *buff, int len, __wsum sum) !cpu_has(CPU_HWBUG_UNALIGNED_LOADS)) { sum = csum_add(sum, ip_fast_csum_nofold_maybe_unaligned(buff, len >> 2)); } else { - E2K_PREFETCH_L1((__force void *) buff); + prefetch((__force void *) buff); sum = __csum_partial(buff, len, sum); } return sum; diff --git a/arch/e2k/include/asm/console.h b/arch/e2k/include/asm/console.h index e4740ae..42fa589 100644 --- a/arch/e2k/include/asm/console.h +++ b/arch/e2k/include/asm/console.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/e2k/include/asm/copy-hw-stacks.h b/arch/e2k/include/asm/copy-hw-stacks.h index 7a20489..ee45fca 100644 --- a/arch/e2k/include/asm/copy-hw-stacks.h +++ b/arch/e2k/include/asm/copy-hw-stacks.h @@ -214,7 +214,7 @@ native_collapse_kernel_ps(u64 *dst, const u64 *src, u64 spilled_size) size = k_psp_hi.PSP_hi_ind - spilled_size; BUG_ON(!IS_ALIGNED(size, ALIGN_PSTACK_TOP_SIZE) || (s64) size < 0); - prefetchw_range(src, size); + prefetch_nospec_range(src, size); native_kernel_hw_stack_frames_copy(dst, src, size); k_psp_hi.PSP_hi_ind -= spilled_size; @@ -327,7 +327,7 @@ static inline int __copy_user_to_current_hw_stack(void *dst, void __user *src, E2K_FLUSHR; SET_USR_PFAULT("$.recovery_memcpy_fault"); - fast_tagged_memory_copy_from_user(dst, src, size, regs, + fast_tagged_memory_copy_from_user(dst, src, size, NULL, regs, TAGGED_MEM_STORE_REC_OPC | MAS_BYPASS_L1_CACHE << LDST_REC_OPC_MAS_SHIFT, TAGGED_MEM_LOAD_REC_OPC | @@ -395,8 +395,8 @@ static inline int copy_e2k_stack_to_user(void __user *dst, void *src, } static __always_inline int -user_hw_stack_frames_copy(void __user *dst, void *src, unsigned long copy_size, - const pt_regs_t *regs, unsigned long hw_stack_ind, bool is_pcsp) +user_hw_stack_frames_copy(void __user *dst, void *src, long copy_size, + const pt_regs_t *regs, long hw_stack_ind, bool is_pcsp) { unsigned long ts_flag; @@ -414,7 +414,7 @@ user_hw_stack_frames_copy(void __user *dst, void *src, unsigned long copy_size, SET_USR_PFAULT("$.recovery_memcpy_fault"); ts_flag = set_ts_flag(TS_KERNEL_SYSCALL); - fast_tagged_memory_copy_to_user(dst, src, copy_size, regs, + fast_tagged_memory_copy_to_user(dst, src, copy_size, NULL, regs, TAGGED_MEM_STORE_REC_OPC | MAS_BYPASS_L1_CACHE << LDST_REC_OPC_MAS_SHIFT, TAGGED_MEM_LOAD_REC_OPC | diff --git a/arch/e2k/include/asm/cpu_regs.h b/arch/e2k/include/asm/cpu_regs.h index 52640e4..20c86e1 100644 --- a/arch/e2k/include/asm/cpu_regs.h +++ b/arch/e2k/include/asm/cpu_regs.h @@ -6,11 +6,54 @@ #include #include -#include #ifndef __ASSEMBLY__ #include -#include + +/* + * Read Core Mode Register (CORE_MODE) to the structure + * Register fields access: fff = AS_STRACT(CORE_MODE).xxx + * Register entire access: reg_entire = AS_WORD(CORE_MODE) + */ +#define NATIVE_READ_CORE_MODE_REG() \ +({ \ + e2k_core_mode_t CORE_MODE; \ + CORE_MODE.CORE_MODE_reg = NATIVE_READ_CORE_MODE_REG_VALUE(); \ + CORE_MODE; \ +}) +#define READ_CORE_MODE_REG() \ +({ \ + e2k_core_mode_t CORE_MODE; \ + CORE_MODE.CORE_MODE_reg = READ_CORE_MODE_REG_VALUE(); \ + CORE_MODE; \ +}) +#define BOOT_READ_CORE_MODE_REG() \ +({ \ + e2k_core_mode_t CORE_MODE; \ + CORE_MODE.CORE_MODE_reg = BOOT_READ_CORE_MODE_REG_VALUE(); \ + CORE_MODE; \ +}) + +/* Fix header dependency hell. cpu_regs_access.h eventually includes + * macros for paravirtualized guest which in turn rely on IS_HV_GM(), + * and IS_HV_GM() relies in READ_CORE_MODE_REG() defined in this file. */ +#include + +/* + * Write Core Mode Register (CORE_MODE) from the structure + * Register fields filling: AS_STRACT(CORE_MODE).xxx = fff + * Register entire filling: AS_WORD(CORE_MODE) = CORE_MODE_value + */ +#define NATIVE_WRITE_CORE_MODE_REG(CORE_MODE) \ + NATIVE_WRITE_CORE_MODE_REG_VALUE(CORE_MODE.CORE_MODE_reg) +#define BOOT_NATIVE_WRITE_CORE_MODE_REG(CORE_MODE) \ + BOOT_NATIVE_WRITE_CORE_MODE_REG_VALUE(CORE_MODE.CORE_MODE_reg) +#define WRITE_CORE_MODE_REG(CORE_MODE) \ + WRITE_CORE_MODE_REG_VALUE(CORE_MODE.CORE_MODE_reg) +#define BOOT_WRITE_CORE_MODE_REG(CORE_MODE) \ + BOOT_WRITE_CORE_MODE_REG_VALUE(CORE_MODE.CORE_MODE_reg) + + #define NATIVE_STRIP_PCSHTP_WINDOW() NATIVE_WRITE_PCSHTP_REG_SVALUE(0) #define STRIP_PCSHTP_WINDOW() WRITE_PCSHTP_REG_SVALUE(0) @@ -2965,11 +3008,11 @@ native_boot_init_BGR_reg(void) /* * Read/Write Control Unit HardWare registers (CU_HW0/CU_HW1) */ -#define READ_CU_HW0_REG() READ_CU_HW0_REG_VALUE() -#define READ_CU_HW1_REG() READ_CU_HW1_REG_VALUE() +#define READ_CU_HW0_REG() ((e2k_cu_hw0_t) { .word = READ_CU_HW0_REG_VALUE() }) +#define READ_CU_HW1_REG() READ_CU_HW1_REG_VALUE() -#define WRITE_CU_HW0_REG(reg) WRITE_CU_HW0_REG_VALUE(reg) -#define WRITE_CU_HW1_REG(reg) WRITE_CU_HW1_REG_VALUE(reg) +#define WRITE_CU_HW0_REG(reg) WRITE_CU_HW0_REG_VALUE(reg.word) +#define WRITE_CU_HW1_REG(reg) WRITE_CU_HW1_REG_VALUE(reg) /* * Read low/high double-word Recovery point register (RPR) @@ -3194,11 +3237,7 @@ write_DIBSR_reg(e2k_dibsr_t DIBSR) #define NATIVE_WRITE_DIMCR_REG(DIMCR) \ NATIVE_WRITE_DIMCR_REG_VALUE(DIMCR.DIMCR_reg) #define WRITE_DIMCR_REG(DIMCR) WRITE_DIMCR_REG_VALUE(DIMCR.DIMCR_reg) -static inline void -write_DIMCR_reg(e2k_dimcr_t DIMCR) -{ - WRITE_DIMCR_REG(DIMCR); -} + #define NATIVE_WRITE_DIBAR0_REG(DIBAR0) NATIVE_WRITE_DIBAR0_REG_VALUE(DIBAR0) #define NATIVE_WRITE_DIBAR1_REG(DIBAR1) NATIVE_WRITE_DIBAR1_REG_VALUE(DIBAR1) #define NATIVE_WRITE_DIBAR2_REG(DIBAR2) NATIVE_WRITE_DIBAR2_REG_VALUE(DIBAR2) @@ -3314,44 +3353,6 @@ read_CUIR_reg(void) return READ_CUIR_REG(); } -/* - * Read Core Mode Register (CORE_MODE) to the structure - * Register fields access: fff = AS_STRACT(CORE_MODE).xxx - * Register entire access: reg_entire = AS_WORD(CORE_MODE) - */ -#define NATIVE_READ_CORE_MODE_REG() \ -({ \ - e2k_core_mode_t CORE_MODE; \ - CORE_MODE.CORE_MODE_reg = NATIVE_READ_CORE_MODE_REG_VALUE(); \ - CORE_MODE; \ -}) -#define READ_CORE_MODE_REG() \ -({ \ - e2k_core_mode_t CORE_MODE; \ - CORE_MODE.CORE_MODE_reg = READ_CORE_MODE_REG_VALUE(); \ - CORE_MODE; \ -}) -#define BOOT_READ_CORE_MODE_REG() \ -({ \ - e2k_core_mode_t CORE_MODE; \ - CORE_MODE.CORE_MODE_reg = BOOT_READ_CORE_MODE_REG_VALUE(); \ - CORE_MODE; \ -}) - -/* - * Write Core Mode Register (CORE_MODE) from the structure - * Register fields filling: AS_STRACT(CORE_MODE).xxx = fff - * Register entire filling: AS_WORD(CORE_MODE) = CORE_MODE_value - */ -#define NATIVE_WRITE_CORE_MODE_REG(CORE_MODE) \ - NATIVE_WRITE_CORE_MODE_REG_VALUE(CORE_MODE.CORE_MODE_reg) -#define BOOT_NATIVE_WRITE_CORE_MODE_REG(CORE_MODE) \ - BOOT_NATIVE_WRITE_CORE_MODE_REG_VALUE(CORE_MODE.CORE_MODE_reg) -#define WRITE_CORE_MODE_REG(CORE_MODE) \ - WRITE_CORE_MODE_REG_VALUE(CORE_MODE.CORE_MODE_reg) -#define BOOT_WRITE_CORE_MODE_REG(CORE_MODE) \ - BOOT_WRITE_CORE_MODE_REG_VALUE(CORE_MODE.CORE_MODE_reg) - /* * Read word Processor State Register (PSR) to the structure * Register fields access: fff = AS_STRACT(PSR).xxx diff --git a/arch/e2k/include/asm/cpu_regs_types.h b/arch/e2k/include/asm/cpu_regs_types.h index 13bde84..cf797e2 100644 --- a/arch/e2k/include/asm/cpu_regs_types.h +++ b/arch/e2k/include/asm/cpu_regs_types.h @@ -991,29 +991,26 @@ typedef union instr_hs { u32 __pad : 14; u32 c0 : 1; /* CS0 */ u32 c1 : 1; /* CS1 */ - u32 __pad2 : 16; + u32 __pad2 : 4; + u32 ale0 : 1; + u32 ale1 : 1; + u32 ale2 : 1; + u32 ale3 : 1; + u32 ale4 : 1; + u32 ale5 : 1; + u32 al0 : 1; + u32 al1 : 1; + u32 al2 : 1; + u32 al3 : 1; + u32 al4 : 1; + u32 al5 : 1; }; - struct { - u32 mdl : 4; - u32 lng : 3; - u32 nop : 3; - u32 lm : 1; - u32 x : 1; - u32 s : 1; - u32 sw : 1; - u32 c : 2; - u32 cd : 2; - u32 pl : 2; - u32 ale : 6; - u32 al : 6; - } fields; - instr_syl_t word; /* as entire syllable */ + instr_syl_t word; /* as entire syllable */ } instr_hs_t; #define E2K_INSTR_HS_LNG_MASK 0x70 -#define E2K_GET_INSTR_SIZE(hs) \ - ((AS_STRUCT(hs).lng + 1) * sizeof(instr_item_t)) +#define E2K_GET_INSTR_SIZE(hs) ((hs.lng + 1) * sizeof(instr_item_t)) /* * Stubs sullable structure @@ -1037,23 +1034,6 @@ typedef union instr_ss { u32 eap : 1; /* [29] end array prefetch */ u32 ipd : 2; /* [31:30] instruction prefetch depth */ }; - struct { - u32 ctcond : 9; - u32 x : 1; - u32 ctop : 2; - u32 aa : 4; - u32 alc : 2; - u32 abp : 2; - u32 xx : 1; - u32 abn : 2; - u32 abg : 2; - u32 xxx : 1; - u32 vfdi : 1; - u32 srp : 1; - u32 bap : 1; - u32 eap : 1; - u32 ipd : 2; - } fields; instr_syl_t word; /* as entire syllable */ } instr_ss_t; @@ -1061,39 +1041,45 @@ typedef union instr_ss { * ALU syllables structure */ -typedef struct instr_alsf2_fields { - u32 dst : 8; /* [ 7: 0] destination */ - u32 src2 : 8; /* [15: 8] source register #2 */ - u32 opce : 8; /* [23:16] opcode extension */ - u32 cop : 7; /* [30:24] code of operation */ - u32 spec : 1; /* [31] speculative mode */ -} instr_alsf2_fields_t; - -typedef union instr_alsf2 { - instr_alsf2_fields_t fields; /* as fields */ - instr_syl_t word; /* as entire syllable */ -} instr_alsf2_t; - -typedef union instr_als { - instr_alsf2_fields_t f2; /* as fields */ - instr_syl_t word; /* as entire syllable */ +typedef union { + union { + struct { + u32 dst : 8; /* [ 7: 0] destination */ + u32 src2 : 8; /* [15: 8] source register #2 */ + u32 opce : 8; /* [23:16] opcode extension */ + u32 cop : 7; /* [30:24] code of operation */ + u32 spec : 1; /* [31] speculative mode */ + }; + struct { + u32 : 24; + u32 opc : 8; + }; + } alf2; + instr_syl_t word; /* as entire syllable */ } instr_als_t; -typedef struct instr_alesf2_fields { - u32 opce : 8; /* [ 7: 0] opcode 2 extension */ - u32 opc2 : 8; /* [15: 8] opcode 2 */ -} instr_alesf2_fields_t; - -typedef union instr_alesf2 { - instr_alesf2_fields_t fields; /* as fields */ - instr_semisyl_t word; /* as entire syllable */ -} instr_alesf2_t; - typedef union instr_ales { - instr_alesf2_fields_t f2; /* as fields */ - instr_semisyl_t word; /* as entire syllable */ + struct { + u16 src3 : 8; + u16 opc2 : 8; + } alef1; + struct { + u16 opce : 8; + u16 opc2 : 8; + } alef2; + instr_semisyl_t word; /* as entire syllable */ } instr_ales_t; +typedef union { + struct { + u8 __pad : 5; + u8 rt5 : 1; + u8 rt6 : 1; + u8 rt7 : 1; + }; + u8 word; +} instr_src_t; + #define INSTR_SRC2_GREG_VALUE 0xe0 #define INSTR_SRC2_GREG_MASK 0xe0 #define INSTR_SRC2_GREG_NUM_MASK 0x1f @@ -1176,6 +1162,7 @@ typedef union { } instr_cs1_t; #define CS1_OPC_SETEI 2 +#define CS1_OPC_WAIT 3 #define CS1_OPC_CALL 5 @@ -1226,6 +1213,15 @@ struct e2k_wd_fields { /* Current window descriptor (WD) */ typedef union e2k_wd { + struct { + u64 : 3; + u64 base_d : E2K_WD_SIZE - 3; + u64 : 16 - E2K_WD_SIZE + 3; + u64 size_d : E2K_WD_SIZE - 3; + u64 : 16 - E2K_WD_SIZE + 3; + u64 psize_d : E2K_WD_SIZE - 3; + u64 : 32 - E2K_WD_SIZE; + }; struct { u64 base : E2K_WD_SIZE; /* [10: 0] window base: */ /* %r0 physical address */ @@ -1388,6 +1384,13 @@ typedef struct e2k_br_fields { /* Structure of br reg */ u32 pcur : 5; /* [27:23] */ } e2k_br_fields_t; typedef union e2k_br { + struct { + u32 rbs : 6; + u32 rsz : 6; + u32 rcur : 6; + u32 psz : 5; + u32 pcur : 5; + }; e2k_br_fields_t fields; /* as fields */ u32 word; /* as entire register */ } e2k_br_t; @@ -1398,6 +1401,11 @@ typedef union e2k_br { #define BR_pcur fields.pcur #define BR_reg word +static inline int br_rsz_full_d(e2k_br_t br) +{ + return 2 * (br.rsz + 1); +} + /* see 5.25.1. */ typedef union e2k_rpr_lo_struct { @@ -1456,11 +1464,15 @@ typedef union e2k_bgr { /* CR0 */ -typedef struct e2k_cr0_hi_fields { /* Structure of cr0_hi chain reg */ +typedef struct { /* Structure of cr0_hi chain reg */ u64 unused : 3; /* [ 2: 0] */ u64 ip : 61; /* [63: 3] */ } e2k_cr0_hi_fields_t; -typedef union e2k_cr0_hi { +typedef union { + struct { + u64 : 3; + u64 ip : 61; + }; e2k_cr0_hi_fields_t fields; /* as fields */ u64 word; /* as entire register */ } e2k_cr0_hi_t; @@ -1468,10 +1480,10 @@ typedef union e2k_cr0_hi { #define CR0_hi_half word /* [63: 0] - entire high */ #define CR0_hi_IP CR0_hi_half /* [63: 0] - IP */ -typedef struct e2k_cr0_lo_fields { /* Structure of cr0_lo chain reg */ +typedef struct { /* Structure of cr0_lo chain reg */ u64 pf : 64; /* [63: 0] */ } e2k_cr0_lo_fields_t; -typedef union e2k_cr0_lo { +typedef union { e2k_cr0_lo_fields_t fields; /* as fields */ u64 word; /* as entire register */ } e2k_cr0_lo_t; @@ -1480,7 +1492,7 @@ typedef union e2k_cr0_lo { /* CR1 */ -typedef union e2k_cr1_hi_fields { /* Structure of cr1_hi chain reg */ +typedef union { /* Structure of cr1_hi chain reg */ struct { u64 br : 28; /* [27: 0] */ u64 unused : 7; /* [34:28] */ @@ -1496,7 +1508,21 @@ typedef union e2k_cr1_hi_fields { /* Structure of cr1_hi chain reg */ u64 __x1 : 36; /* [63:28] */ }; } e2k_cr1_hi_fields_t; -typedef union e2k_cr1_hi { +typedef union { + struct { + u64 br : 28; + u64 : 7; + u64 wdbl : 1; + u64 ussz : 28; + }; + struct { + u64 rbs : 6; + u64 rsz : 6; + u64 rcur : 6; + u64 psz : 5; + u64 pcur : 5; + u64 : 36; + }; e2k_cr1_hi_fields_t fields; /* as fields */ u64 word; /* as entire register */ } e2k_cr1_hi_t; @@ -1510,7 +1536,7 @@ typedef union e2k_cr1_hi { #define CR1_hi_pcur fields.pcur /* [27:23] - current of rotate preds */ #define CR1_hi_half word /* [63: 0] - entire high */ -typedef union e2k_cr1_lo_fields { /* Structure of cr1_lo chain reg */ +typedef union { /* Structure of cr1_lo chain reg */ struct { u64 unused1 : 16; /* [15:0] */ u64 ein : 8; /* [23:16] */ @@ -1535,7 +1561,29 @@ typedef union e2k_cr1_lo_fields { /* Structure of cr1_lo chain reg */ /* enable */ }; } e2k_cr1_lo_fields_t; -typedef union e2k_cr1_lo { +typedef union { + struct { + u64 : 16; + u64 ein : 8; + u64 ss : 1; + u64 wfx : 1; + u64 wpsz : 7; + u64 wbs : 7; + u64 cuir : 17; + u64 psr : 7; + }; + struct { + u64 : 40; + u64 cui : 16; + u64 ic : 1; /* iset <= v5 */ + u64 pm : 1; + u64 ie : 1; + u64 sge : 1; + u64 lw : 1; + u64 uie : 1; + u64 nmie : 1; + u64 unmie : 1; + }; e2k_cr1_lo_fields_t fields; /* as fields */ u64 word; /* as entire register */ } e2k_cr1_lo_t; @@ -1655,7 +1703,7 @@ typedef union { /* PSR */ -typedef struct e2k_psr_fields { /* Structure of psr reg */ +typedef struct { u32 pm : 1; /* [ 0] */ u32 ie : 1; /* [ 1] */ u32 sge : 1; /* [ 2] */ @@ -1666,7 +1714,17 @@ typedef struct e2k_psr_fields { /* Structure of psr reg */ /* enable */ u32 unused : 25; /* [31: 7] */ } e2k_psr_fields_t; -typedef union e2k_psr { +typedef union { + struct { + u32 pm : 1; + u32 ie : 1; + u32 sge : 1; + u32 lw : 1; + u32 uie : 1; + u32 nmie : 1; + u32 unmie : 1; + u32 : 25; + }; e2k_psr_fields_t fields; /* as fields */ u32 word; /* as entire register */ } e2k_psr_t; @@ -1901,6 +1959,35 @@ typedef struct e2k_mem_crstack { #define SZ_OF_CR sizeof(e2k_mem_crs_t) +typedef union { + struct { + u64 trwm_itag : 3; + u64 trwm_idata : 3; + u64 trwm_cf : 3; + u64 ib_snoop_dsbl : 1; + u64 bist_cf : 1; + u64 bist_tu : 1; + u64 bist_itag : 1; + u64 bist_itlbtag : 1; + u64 bist_itlbdata : 1; + u64 bist_idata_nm : 4; + u64 bist_idata_cnt : 10; + u64 pipe_frz_dsbl : 1; /* Since iset v5 */ + u64 rf_clean_dsbl : 1; + /* iset v6 */ + u64 virt_dsbl : 1; + u64 upt_sec_ad_shift_dsbl : 1; + u64 pdct_stat_enbl : 1; + u64 pdct_dyn_enbl : 1; + u64 pdct_rbr_enbl : 1; + u64 pdct_ret_enbl : 1; + u64 pdct_retst_enbl : 1; + u64 pdct_cond_enbl : 1; + }; + u64 word; +} e2k_cu_hw0_t; + + /* * Trap Info Registers */ @@ -2260,6 +2347,12 @@ typedef union { } e2k_dimcr_t; #define DIMCR_reg word +static inline bool dimcr_enabled(e2k_dimcr_t dimcr, int monitor) +{ + return (monitor == 0) ? (AS(dimcr)[0].user || AS(dimcr)[0].system) + : (AS(dimcr)[1].user || AS(dimcr)[1].system); +} + typedef union { struct { u32 b0 : 1; @@ -2356,24 +2449,6 @@ typedef struct e2k_svd_gregs_struct { u8 tag; /* any time too */ } e2k_svd_gregs_t; -/* CU_HW0 register */ -#define _CU_HW0_TRWM_ITAG_MASK 0x000000007 /* IB tag */ -#define _CU_HW0_TRWM_IDATA_MASK 0x000000038 /* IB data */ -#define _CU_HW0_TRWM_CF_MASK 0x0000001c0 /* Chain File */ -#define _CU_HW0_IB_SNOOP_DISABLE_MASK 0x000000200 /* Disable IB snooping */ -#define _CU_HW0_BIST_CF_MASK 0x000000400 /* Chain File */ -#define _CU_HW0_BIST_TU_MASK 0x000000800 /* Trap Unit */ -#define _CU_HW0_BIST_ITAG_MASK 0x000001000 /* IB tag */ -#define _CU_HW0_BIST_ITLB_TAG_MASK 0x000002000 /* ITLB tag */ -#define _CU_HW0_BIST_ITLB_DATA_MASK 0x000004000 /* ITLB data */ -#define _CU_HW0_BIST_IDATA_NM_MASK 0x000078000 /* IB data */ -#define _CU_HW0_BIST_IDATA_CNT_MASK 0x01ff80000 /* IB tag */ -#define _CU_HW0_PIPE_FROST_DISABLE_MASK 0x020000000 /* Instruction pipe */ -#define _CU_HW0_RF_CLEAN_DISABLE_MASK 0x040000000 /* Register File */ -#define _CU_HW0_VIRT_DISABLE_MASK 0x080000000 /* Disable hardware */ - /* virtualization support */ -#define _CU_HW0_UPT_SEC_AD_SHIFT_DSBL_MASK 0x100000000 /* Disable address shift in */ - /* MMU_CR.upt mode */ struct hw_stacks { e2k_psp_lo_t psp_lo; diff --git a/arch/e2k/include/asm/e2k_api.h b/arch/e2k/include/asm/e2k_api.h index db6948a..cebc5c9 100644 --- a/arch/e2k/include/asm/e2k_api.h +++ b/arch/e2k/include/asm/e2k_api.h @@ -3,6 +3,7 @@ #include #include +#include #ifndef __ASSEMBLY__ @@ -1330,12 +1331,30 @@ do { \ #define STORE_NV_MAS(_addr, _val, _mas, size_letter, clobber) \ do { \ - _Pragma("no_asm_inline") \ - asm NOT_VOLATILE ("st" #size_letter" %[addr], %[val], mas=%[mas]" \ - : [addr] "=m" (*(_addr)) \ - : [val] "r" (_val), \ - [mas] "i" (_mas) \ - : clobber); \ + if ((_mas) == MAS_STORE_RELEASE_V6(MAS_MT_0) || \ + (_mas) == MAS_STORE_RELEASE_V6(MAS_MT_1)) { \ + _Pragma("no_asm_inline") \ + asm NOT_VOLATILE ( \ + ALTERNATIVE( \ + /* Default version */ \ + "{wait st_c=1, ld_c=1\n" \ + " st" #size_letter" %[addr], %[val]}", \ + /* CPU_NO_HWBUG_SOFT_WAIT version */ \ + "{st" #size_letter" %[addr], %[val], mas=%[mas]}", \ + %[facility]) \ + : [addr] "=m" (*(_addr)) \ + : [val] "r" (_val), \ + [mas] "i" (_mas), \ + [facility] "i" (CPU_NO_HWBUG_SOFT_WAIT) \ + : clobber); \ + } else { \ + _Pragma("no_asm_inline") \ + asm NOT_VOLATILE ("st" #size_letter" %[addr], %[val], mas=%[mas]" \ + : [addr] "=m" (*(_addr)) \ + : [val] "r" (_val), \ + [mas] "i" (_mas) \ + : clobber); \ + } \ } while (0) /* @@ -1352,12 +1371,12 @@ do { \ # define READ_MAS_BARRIER_AFTER(mas) \ do { \ if ((mas) == MAS_IOADDR) \ - E2K_WAIT_LD_C_LAL_SAL(); \ + __E2K_WAIT(_ld_c | _lal | _sal); \ } while (0) # define WRITE_MAS_BARRIER_BEFORE(mas) \ do { \ if ((mas) == MAS_IOADDR) \ - E2K_WAIT_ST_C_SAS_LD_C_SAL(); \ + __E2K_WAIT(_st_c | _sas | _ld_c | _sal); \ } while (0) /* * Not required by documentation, but this is how @@ -1366,7 +1385,7 @@ do { \ # define WRITE_MAS_BARRIER_AFTER(mas) \ do { \ if ((mas) == MAS_IOADDR) \ - E2K_WAIT_ST_C_SAS(); \ + __E2K_WAIT(_st_c | _sas); \ } while (0) #elif CONFIG_CPU_ISET == 0 @@ -1379,7 +1398,7 @@ do { \ # define WRITE_MAS_BARRIER_BEFORE(mas) \ do { \ if ((mas) == MAS_IOADDR) \ - __E2K_WAIT(_st_c | _ld_c); \ + __E2K_WAIT(_st_c | _sas | _ld_c | _sal); \ } while (0) /* * Not required by documentation, but this is how @@ -1388,7 +1407,7 @@ do { \ # define WRITE_MAS_BARRIER_AFTER(mas) \ do { \ if ((mas) == MAS_IOADDR) \ - __E2K_WAIT(_st_c); \ + __E2K_WAIT(_st_c | _sas); \ } while (0) #else @@ -1519,10 +1538,6 @@ do { \ }) -/* - * Prefetching with fully speculative load is - * needed when the passed address can be invalid. - */ #if !defined(CONFIG_BOOT_E2K) && !defined(E2K_P2V) # define E2K_PREFETCH_L2_SPEC(addr) \ do { \ @@ -1533,42 +1548,23 @@ do { \ "i" (MAS_LOAD_SPEC | MAS_BYPASS_L1_CACHE)); \ } while (0) -# define E2K_PREFETCH_L2_SPEC_OFFSET(addr, offset) \ +# define E2K_PREFETCH_L2_NOSPEC_OFFSET(addr, offset) \ do { \ int unused; \ - asm ("ldb,sm %1, %2, %%empty, mas=%3\n" \ - : "=r" (unused) \ - : "r" (addr), \ - "i" (offset), \ - "i" (MAS_LOAD_SPEC | MAS_BYPASS_L1_CACHE)); \ -} while (0) - -# define E2K_PREFETCH_L2_OFFSET(addr, offset) \ -do { \ - int unused; \ - asm ("ldb,sm %1, %2, %%empty, mas=%3\n" \ + asm ("ldb %1, %2, %%empty, mas=%3\n" \ : "=r" (unused) \ : "r" (addr), \ "i" (offset), \ "i" (MAS_BYPASS_L1_CACHE)); \ } while (0) -# define E2K_PREFETCH_L2_256(addr) \ +# define E2K_PREFETCH_L2_NOSPEC_256(addr) \ do { \ int unused; \ - asm ( "ldb,0,sm %1, 0, %%empty, mas=%2\n" \ - "ldb,2,sm %1, 64, %%empty, mas=%2\n" \ - "ldb,3,sm %1, 128, %%empty, mas=%2\n" \ - "ldb,5,sm %1, 192, %%empty, mas=%2" \ - : "=r" (unused) \ - : "r" (addr), \ - "i" (MAS_BYPASS_L1_CACHE)); \ -} while (0) - -# define E2K_PREFETCH_L2(addr) \ -do { \ - int unused; \ - asm ("ldb,sm %1, 0, %%empty, mas=%2" \ + asm ( "ldb,0 %1, 0, %%empty, mas=%2\n" \ + "ldb,2 %1, 64, %%empty, mas=%2\n" \ + "ldb,3 %1, 128, %%empty, mas=%2\n" \ + "ldb,5 %1, 192, %%empty, mas=%2" \ : "=r" (unused) \ : "r" (addr), \ "i" (MAS_BYPASS_L1_CACHE)); \ @@ -1583,6 +1579,14 @@ do { \ "i" (MAS_LOAD_SPEC)); \ } while (0) +# define E2K_PREFETCH_L1_NOSPEC(addr) \ +do { \ + int unused; \ + asm ("ldb %1, 0, %%empty" \ + : "=r" (unused) \ + : "r" (addr)); \ +} while (0) + # define E2K_PREFETCH_L1_SPEC_OFFSET(addr, offset) \ do { \ int unused; \ @@ -1592,49 +1596,15 @@ do { \ "i" (offset), \ "i" (MAS_LOAD_SPEC)); \ } while (0) - -# define E2K_PREFETCH_L1_OFFSET(addr, offset) \ -do { \ - int unused; \ - asm ("ldb,sm %1, %2, %%empty\n" \ - : "=r" (unused) \ - : "r" (addr), \ - "i" (offset)); \ -} while (0) - -# define E2K_PREFETCH_L1_256(addr) \ -do { \ - int unused; \ - asm ( "ldb,0,sm %1, 0, %%empty\n" \ - "ldb,2,sm %1, 64, %%empty\n" \ - "ldb,3,sm %1, 128, %%empty\n" \ - "ldb,5,sm %1, 192, %%empty" \ - : "=r" (unused) \ - : "r" (addr)); \ -} while (0) - -# define E2K_PREFETCH_L1(addr) \ -do { \ - int unused; \ - asm ("ldb,3 %1, 0, %%empty" \ - : "=r" (unused) \ - : "r" (addr)); \ -} while (0) #else -# define E2K_PREFETCH_L2_SPEC_OFFSET(addr, offset) \ - do { (void) (addr); (void) (offset); } while (0) -# define E2K_PREFETCH_L2_OFFSET(addr, offset) \ +# define E2K_PREFETCH_L2_SPEC(addr) do { (void) (addr); } while (0) +# define E2K_PREFETCH_L2_NOSPEC_OFFSET(addr, offset) \ do { (void) (addr); (void) (offset); } while (0) +# define E2K_PREFETCH_L2_NOSPEC_256(addr) do { (void) (addr); } while (0) +# define E2K_PREFETCH_L1_SPEC(addr) do { (void) (addr); } while (0) +# define E2K_PREFETCH_L1_NOSPEC(addr) do { (void) (addr); } while (0) # define E2K_PREFETCH_L1_SPEC_OFFSET(addr, offset) \ do { (void) (addr); (void) (offset); } while (0) -# define E2K_PREFETCH_L1_OFFSET(addr, offset) \ - do { (void) (addr); (void) (offset); } while (0) -# define E2K_PREFETCH_L2_SPEC(addr) do { (void) (addr); } while (0) -# define E2K_PREFETCH_L2_256(addr) do { (void) (addr); } while (0) -# define E2K_PREFETCH_L2(addr) do { (void) (addr); } while (0) -# define E2K_PREFETCH_L1_SPEC(addr) do { (void) (addr); } while (0) -# define E2K_PREFETCH_L1_256(addr) do { (void) (addr); } while (0) -# define E2K_PREFETCH_L1(addr) do { (void) (addr); } while (0) #endif /* @@ -1743,20 +1713,22 @@ do { \ #define NATIVE_RECOVERY_LOAD_TO_THE_GREG_VR_ATOMIC_QP(_addr, _opc, \ greg_no, _vr) \ do { \ - u64 val; \ - u64 __opc = (_opc); \ + u64 tmp, __opc = (_opc); \ + /* #133760 Use a real quadro register when repeating atomic load */ \ asm ( "{disp %%ctpr1, qpswitchd_sm\n" \ " nop 4\n" \ - " ldrd,0 [ %[addr] + %[opc] ], %[val]\n" \ - " ldrd,2 [ %[addr] + %[opc_8] ], %%g" #greg_no "\n" \ + " ldrd,0 [ %[addr] + %[opc] ], %%db[0]\n" \ + " ldrd,2 [ %[addr] + %[opc_8] ], %%db[1]\n" \ " cmpesb,1 %[vr], 0, %%pred19}\n" \ - "{movts,0 %%g" #greg_no ", %[val] ? %%pred19\n" \ + "{movts,0 %%g" #greg_no ", %%b[0] ? %%pred19\n" \ + " movtd,1 %%db[1], %%dg" #greg_no "}\n" \ + "{movtd,0 %%db[0], %[tmp]\n" \ " addd,2 %[greg], 0, %%db[0]\n" \ " call %%ctpr1, wbs=%#}\n" \ - "{movtd %[val], %%dg" #greg_no "}\n" \ - : [val] "=&r" (val) \ - : [addr] "r" (_addr), [vr] "ir" ((u32) (_vr)), \ - [opc] "r" (__opc), [opc_8] "r" (__opc | 8ull), \ + "{movtd,0 %[tmp], %%dg" #greg_no "}\n" \ + : [tmp] "=&r" (tmp) \ + : [opc] "r" (__opc), [opc_8] "r" (__opc | 8ull), \ + [addr] "r" (_addr), [vr] "ir" ((u32) (_vr)), \ [greg] "i" ((u64) (greg_no)) \ : "call", "memory", "pred19", "g" #greg_no); \ } while (false) @@ -1764,36 +1736,37 @@ do { \ #define NATIVE_RECOVERY_LOAD_TO_THE_GREG_VR_ATOMIC_QP_OR_Q(_addr, _opc, \ greg_no_lo, greg_no_hi, _vr, _qp_load) \ do { \ - u64 val; \ - u64 __opc = (_opc); \ + u64 tmp, __opc = (_opc); \ + /* #133760 Use a real quadro register when repeating atomic load */ \ if (_qp_load) { \ asm ( "{disp %%ctpr1, qpswitchd_sm\n" \ " nop 4\n" \ - " ldrd,0 [ %[addr] + %[opc] ], %[val]\n" \ - " ldrd,2 [ %[addr] + %[opc_8] ], %%g" #greg_no_lo "\n" \ + " ldrd,0 [ %[addr] + %[opc] ], %%db[0]\n" \ + " ldrd,2 [ %[addr] + %[opc_8] ], %%db[1]\n" \ " cmpesb,1 %[vr], 0, %%pred19}\n" \ - "{movts,0 %%g" #greg_no_lo ", %[val] ? %%pred19\n" \ + "{movts,0 %%g" #greg_no_lo ", %%b[0] ? %%pred19\n" \ + " movtd,1 %%db[1], %%dg" #greg_no_lo "}\n" \ + "{movtd,0 %%db[0], %[tmp]\n" \ " addd,2 %[greg], 0, %%db[0]\n" \ " call %%ctpr1, wbs=%#}\n" \ - "{movtd %[val], %%dg" #greg_no_lo "}\n" \ - : [val] "=&r" (val) \ + "{movtd %[tmp], %%dg" #greg_no_lo "}\n" \ + : [tmp] "=&r" (tmp) \ : [addr] "r" (_addr), [vr] "ir" ((u32) (_vr)), \ [opc] "r" (__opc), [opc_8] "r" (__opc | 8ull), \ [greg] "i" ((u64) (greg_no_lo)) \ : "call", "memory", "pred19", "g" #greg_no_lo); \ } else { \ asm ( "{nop 4\n" \ - " ldrd,0 [ %[addr] + %[opc] ], %[val]\n" \ + " ldrd,0 [ %[addr] + %[opc] ], %%g" #greg_no_lo "\n" \ " ldrd,2 [ %[addr] + %[opc_8] ], %%g" #greg_no_hi "\n" \ - " cmpesb,1 %[vr], 0, %%pred19}\n" \ - "{nop 1\n" \ - " movts,0 %%g" #greg_no_lo ", %[val] ? %%pred19}\n" \ - "{movtd,0 %[val], %%dg" #greg_no_lo "}\n" \ - : [val] "=&r" (val) \ + " movts,1 %%g" #greg_no_lo ", %[tmp]\n" \ + " cmpesb,4 %[vr], 0, %%pred19}\n" \ + "{movts,0 %[tmp], %%g" #greg_no_lo " ? %%pred19}\n" \ + : [tmp] "=&r" (tmp) \ : [addr] "r" (_addr), [vr] "ir" ((u32) (_vr)), \ [opc] "r" (__opc), [opc_8] "r" (__opc | 8ull), \ [greg] "i" ((u64) (greg_no_lo)) \ - : "call", "memory", "pred19", "g" #greg_no_lo); \ + : "memory", "pred19", "g" #greg_no_lo, "g" #greg_no_hi); \ } \ } while (false) @@ -2290,7 +2263,10 @@ do { \ #define NATIVE_MOVE_TAGGED_DWORD_WITH_OPC_VR_ATOMIC(_from, _to, _to_hi, \ _vr, _opc) \ do { \ - u64 prev, val, val_8; \ + u64 prev; \ + /* #133760 Use a real quadro register when repeating atomic load */ \ + register u64 val asm("%b[0]"); \ + register u64 val_8 asm("%b[1]"); \ u64 __opc = (_opc); \ asm ( "{cmpesb %[vr], 0, %%pred19}\n" \ "{ldrd,0 [ %[from] + %[opc] ], %[val]\n" \ @@ -4629,158 +4605,104 @@ do { \ #if !defined CONFIG_E2K_MACHINE || \ defined CONFIG_E2K_ES2_DSP || defined CONFIG_E2K_ES2_RU || \ (defined CONFIG_E2K_E2S && defined CONFIG_NUMA) - # define WORKAROUND_WAIT_HWBUG(num) (((num) & (_st_c | _all_c | _sas)) ? \ ((num) | _ma_c) : (num)) -# define E2K_WAIT_ST_C_SAS() E2K_WAIT(_st_c) -# define E2K_WAIT_ST_C_SAS_MT() E2K_WAIT(_st_c) -# define E2K_WAIT_LD_C_LAL() E2K_WAIT(_ld_c) -# define E2K_WAIT_LD_C_LAL_MT() E2K_WAIT(_ld_c) -# define E2K_WAIT_LD_C_LAL_SAL() E2K_WAIT(_ld_c) -# define E2K_WAIT_ST_C_SAS_LD_C_SAL() E2K_WAIT(_st_c | _ld_c) -# define E2K_WAIT_ST_C_SAS_LD_C_SAL_MT() E2K_WAIT(_st_c | _ld_c) - #else - # define WORKAROUND_WAIT_HWBUG(num) num - -/* BUG 79245 - use .word to encode relaxed barriers */ -# define E2K_WAIT_ST_C_SAS() \ -({ \ - int unused; \ - _Pragma("no_asm_inline") \ - asm NOT_VOLATILE (".word 0x00008001\n" \ - ".word 0x30000084\n" \ - : "=r" (unused) :: "memory"); \ -}) -# define E2K_WAIT_LD_C_LAL() \ -({ \ - int unused; \ - _Pragma("no_asm_inline") \ - asm NOT_VOLATILE (".word 0x00008001\n" \ - ".word 0x30000408\n" \ - : "=r" (unused) :: "memory"); \ -}) -# define E2K_WAIT_ST_C_SAS_MT() \ -({ \ - int unused; \ - _Pragma("no_asm_inline") \ - asm NOT_VOLATILE (".word 0x00008001\n" \ - ".word 0x30000884\n" \ - : "=r" (unused) :: "memory"); \ -}) -# define E2K_WAIT_LD_C_LAL_SAL() \ -({ \ - int unused; \ - _Pragma("no_asm_inline") \ - asm NOT_VOLATILE (".word 0x00008001\n" \ - ".word 0x30000508\n" \ - : "=r" (unused) :: "memory"); \ -}) -# define E2K_WAIT_LD_C_LAL_MT() \ -({ \ - int unused; \ - _Pragma("no_asm_inline") \ - asm NOT_VOLATILE (".word 0x00008001\n" \ - ".word 0x30000c08\n" \ - : "=r" (unused) :: "memory"); \ -}) -# define E2K_WAIT_ST_C_SAS_LD_C_SAL() \ -({ \ - int unused; \ - _Pragma("no_asm_inline") \ - asm NOT_VOLATILE (".word 0x00008001\n" \ - ".word 0x3000018c\n" \ - : "=r" (unused) :: "memory"); \ -}) -# define E2K_WAIT_ST_C_SAS_LD_C_SAL_MT() \ -({ \ - int unused; \ - _Pragma("no_asm_inline") \ - asm NOT_VOLATILE (".word 0x00008001\n" \ - ".word 0x3000098c\n" \ - : "=r" (unused) :: "memory"); \ -}) #endif -#define E2K_WAIT_V6(_num) \ -({ \ - int unused, num = WORKAROUND_WAIT_HWBUG(_num); \ - /* "trap=1" requires special handling, see C1_wait_trap() */ \ - asm NOT_VOLATILE("{wait mem_mod=%[mem_mod], int=%[intr], mt=%[mt], " \ - " lal=%[lal], las=%[las], sal=%[sal], sas=%[sas], " \ - " ma_c=%[ma_c], fl_c=%[fl_c], ld_c = %[ld_c], " \ - " st_c=%[st_c], all_e=%[all_e], all_c=%[all_c]}"\ - : "=r" (unused) \ - : [all_c] "i" (((num) & 0x1)), \ - [all_e] "i" (((num) & 0x2) >> 1), \ - [st_c] "i" (((num) & 0x4) >> 2), \ - [ld_c] "i" (((num) & 0x8) >> 3), \ - [fl_c] "i" (((num) & 0x10) >> 4), \ - [ma_c] "i" (((num) & 0x20) >> 5), \ - [sas] "i" (((num) & 0x80) >> 7), \ - [sal] "i" (((num) & 0x100) >> 8), \ - [las] "i" (((num) & 0x200) >> 9), \ - [lal] "i" (((num) & 0x400) >> 10), \ - [mt] "i" (((num) & 0x800) >> 11), \ - [intr] "i" (((num) & 0x1000) >> 12), \ - [mem_mod] "i" (((num) & 0x2000) >> 13) \ - : "memory" ); \ - if ((num & (_all_c | _ma_c | _lal | _las)) || \ - (num & _ld_c) && !(num & _sal) || \ - (num & _st_c) && !(num & _sas)) \ - NATIVE_HWBUG_AFTER_LD_ACQ(); \ -}) - - -#define E2K_WAIT_V5(_num) \ -({ \ - int unused, num = WORKAROUND_WAIT_HWBUG(_num); \ - /* "trap=1" requires special handling, see C1_wait_trap() */ \ - asm NOT_VOLATILE ("{wait sal=%[sal], sas=%[sas], ma_c=%[ma_c], " \ - " fl_c=%[fl_c], ld_c=%[ld_c], st_c=%[st_c], " \ - " all_e=%[all_e], all_c=%[all_c]}" \ - : "=r" (unused) \ - : [all_c] "i" (((num) & 0x1)), \ - [all_e] "i" (((num) & 0x2) >> 1), \ - [st_c] "i" (((num) & 0x4) >> 2), \ - [ld_c] "i" (((num) & 0x8) >> 3), \ - [fl_c] "i" (((num) & 0x10) >> 4), \ - [ma_c] "i" (((num) & 0x20) >> 5), \ - [sas] "i" (((num) & 0x80) >> 7), \ - [sal] "i" (((num) & 0x100) >> 8) \ - : "memory" ); \ - if ((num & (_all_c | _ma_c)) || \ - (num & _ld_c) && !(num & _sal) || \ - (num & _st_c) && !(num & _sas)) \ - NATIVE_HWBUG_AFTER_LD_ACQ(); \ -}) - #define __E2K_WAIT(_num) \ -({ \ +do { \ int unused, num = WORKAROUND_WAIT_HWBUG(_num); \ - if ((_num) & ~(_st_c | _ld_c)) \ + instr_cs1_t cs1 = { \ + .opc = CS1_OPC_WAIT, \ + .param = num \ + }; \ + \ + /* Use "asm volatile" around tricky barriers such as _ma_c, _fl_c, etc */ \ + if ((_num) & ~(_st_c | _ld_c | _sas | _sal | _las | _lal | _mt)) \ asm volatile ("" ::: "memory"); \ - asm NOT_VOLATILE ("{wait ma_c=%6, fl_c=%5, " \ - "ld_c = %4, st_c=%3, all_e=%2, all_c=%1}" \ - : "=r" (unused) \ - : "i" (((num) & 0x1)), \ - "i" (((num) & 0x2) >> 1), \ - "i" (((num) & 0x4) >> 2), \ - "i" (((num) & 0x8) >> 3), \ - "i" (((num) & 0x10) >> 4), \ - "i" (((num) & 0x20) >> 5) \ - : "memory" ); \ - if ((_num) & ~(_st_c | _ld_c)) \ + \ + /* CPU_NO_HWBUG_SOFT_WAIT: use faster workaround for "lal" barriers */ \ + if ((_num) == (_ld_c | _lal) || (_num) == (_ld_c | _lal | _mt)) { \ + _Pragma("no_asm_inline") \ + asm NOT_VOLATILE (ALTERNATIVE( \ + /* Default version - add nop 5 */ \ + ".word 0x00008281\n" \ + ".word %[cs1]\n", \ + /* CPU_NO_HWBUG_SOFT_WAIT version */ \ + ".word 0x00008001\n" \ + ".word %[cs1]\n", \ + %[facility]) \ + : "=r" (unused) \ + : [cs1] "i" (cs1.word), \ + [facility] "i" (CPU_NO_HWBUG_SOFT_WAIT) \ + : "memory"); \ + } else { \ + instr_cs1_t cs1_no_soft_barriers = { \ + .opc = CS1_OPC_WAIT, \ + .param = num & ~(_lal | _las | _sal | _sas) \ + }; \ + /* #79245 - use .word to encode relaxed barriers */ \ + _Pragma("no_asm_inline") \ + asm NOT_VOLATILE (ALTERNATIVE( \ + /* Default version */ \ + ".word 0x00008001\n" \ + ".word %[cs1_no_soft_barriers]\n", \ + /* CPU_NO_HWBUG_SOFT_WAIT version - use soft barriers */ \ + ".word 0x00008001\n" \ + ".word %[cs1]\n", \ + %[facility]) \ + : "=r" (unused) \ + : [cs1] "i" (cs1.word), \ + [cs1_no_soft_barriers] "i" (cs1_no_soft_barriers.word), \ + [facility] "i" (CPU_NO_HWBUG_SOFT_WAIT) \ + : "memory"); \ + } \ + \ + /* Use "asm volatile" around tricky barriers such as _ma_c */ \ + if ((_num) & ~(_st_c | _ld_c | _sas | _sal | _las | _lal | _mt)) \ asm volatile ("" ::: "memory"); \ -}) +} while (0) #define E2K_WAIT(num) \ -({ \ +do { \ __E2K_WAIT(num); \ if (num & (_st_c | _ld_c | _all_c | _ma_c)) \ NATIVE_HWBUG_AFTER_LD_ACQ(); \ -}) +} while (0) + +#define _mem_mod 0x2000 /* watch for modification */ +#define _int 0x1000 /* stop the conveyor untill interrupt */ +#define _mt 0x800 +#define _lal 0x400 /* load-after-load modifier for _ld_c */ +#define _las 0x200 /* load-after-store modifier for _st_c */ +#define _sal 0x100 /* store-after-load modifier for _ld_c */ +#define _sas 0x80 /* store-after-store modifier for _st_c */ +/* "trap=1" requires special handling, see C1_wait_trap() so don't + * define it here, as using it in E2K_WAIT() makes no sense. */ +#define _ma_c 0x20 /* stop until all memory operations complete */ +#define _fl_c 0x10 /* stop until TLB/cache flush operations complete */ +#define _ld_c 0x8 /* stop until all load operations complete */ +#define _st_c 0x4 /* stop until all store operations complete */ +#define _all_e 0x2 /* stop until prev. operations issue all exceptions */ +#define _all_c 0x1 /* stop until prev. operations complete */ + +/* + * IMPORTANT NOTE!!! + * Do not add 'sas' and 'sal' here, as they are modifiers + * for st_c/ld_c which make them _less_ restrictive. + */ +#define E2K_WAIT_OP_ALL_MASK (_ma_c | _fl_c | _ld_c | _st_c | _all_c | _all_e) + +#define E2K_WAIT_MA E2K_WAIT(_ma_c) +#define E2K_WAIT_FLUSH E2K_WAIT(_fl_c) +#define E2K_WAIT_LD E2K_WAIT(_ld_c) +#define E2K_WAIT_ST E2K_WAIT(_st_c) +#define E2K_WAIT_ALL_OP E2K_WAIT(_all_c) +#define E2K_WAIT_ALL_EX E2K_WAIT(_all_e) +#define E2K_WAIT_ALL E2K_WAIT(E2K_WAIT_OP_ALL_MASK) +#define __E2K_WAIT_ALL __E2K_WAIT(E2K_WAIT_OP_ALL_MASK) /* Wait for the load to finish before issuing * next memory loads/stores. */ @@ -4794,64 +4716,6 @@ do { \ NATIVE_HWBUG_AFTER_LD_ACQ(); \ } while (0) -/* - * CPU 'WAIT' operation fields structure - */ -#define E2K_WAIT_OP_MA_C_MASK 0x20 /* wait for all previous memory */ - /* access operatons complete */ -#define E2K_WAIT_OP_FL_C_MASK 0x10 /* wait for all previous flush */ - /* cache operatons complete */ -#define E2K_WAIT_OP_LD_C_MASK 0x08 /* wait for all previous load */ - /* operatons complete */ -#define E2K_WAIT_OP_ST_C_MASK 0x04 /* wait for all previous store */ - /* operatons complete */ -#define E2K_WAIT_OP_ALL_E_MASK 0x02 /* wait for all previous operatons */ - /* issue all possible exceptions */ -#define E2K_WAIT_OP_ALL_C_MASK 0x01 /* wait for all previous operatons */ - /* complete */ -#define E2K_WAIT_OP_ALL_MASK (E2K_WAIT_OP_MA_C_MASK | \ - E2K_WAIT_OP_FL_C_MASK | \ - E2K_WAIT_OP_LD_C_MASK | \ - E2K_WAIT_OP_ST_C_MASK | \ - E2K_WAIT_OP_ALL_C_MASK | \ - E2K_WAIT_OP_ALL_E_MASK) - -#define E2K_WAIT_MA E2K_WAIT(E2K_WAIT_OP_MA_C_MASK) -#define E2K_WAIT_FLUSH E2K_WAIT(E2K_WAIT_OP_FL_C_MASK) -#define E2K_WAIT_LD E2K_WAIT(E2K_WAIT_OP_LD_C_MASK) -#define E2K_WAIT_ST E2K_WAIT(E2K_WAIT_OP_ST_C_MASK) -#define E2K_WAIT_ALL_OP E2K_WAIT(E2K_WAIT_OP_ALL_C_MASK) -#define E2K_WAIT_ALL_EX E2K_WAIT(E2K_WAIT_OP_ALL_E_MASK) -#define E2K_WAIT_ALL E2K_WAIT(E2K_WAIT_OP_ALL_MASK) -#define __E2K_WAIT_ALL __E2K_WAIT(E2K_WAIT_OP_ALL_MASK) - -/* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. - * - * For now, "wmb()" doesn't actually do anything, as all - * Intel CPU's follow what Intel calls a *Processor Order*, - * in which all writes are seen in the program order even - * outside the CPU. - * - */ - -#define _mem_mod 0x2000 /* watch for modification */ -#define _int 0x1000 /* stop the conveyor untill interrupt */ -#define _mt 0x800 -#define _lal 0x400 /* load-after-load modifier for _ld_c */ -#define _las 0x200 /* load-after-store modifier for _st_c */ -#define _sal 0x100 /* store-after-load modifier for _ld_c */ -#define _sas 0x80 /* store-after-store modifier for _st_c */ -#define _trap 0x40 /* stop the conveyor untill interrupt */ -#define _ma_c 0x20 -#define _fl_c 0x10 /* stop until TLB/cache flush operations complete */ -#define _ld_c 0x8 /* stop until all load operations complete */ -#define _st_c 0x4 /* stop until store operations complete */ -#define _all_e 0x2 -#define _all_c 0x1 - #define E2K_FLUSHTS \ do { \ _Pragma("no_asm_inline") \ @@ -5282,6 +5146,8 @@ do { \ #define E2K_JUMP(func) E2K_JUMP_WITH_ARGUMENTS(func, 0) +#define E2K_JUMP_WITH_ARG(func, arg) E2K_JUMP_WITH_ARGUMENTS(func, 1, arg) + #define E2K_JUMP_WITH_ARGUMENTS(func, num_args, ...) \ __E2K_JUMP_WITH_ARGUMENTS_##num_args(func, ##__VA_ARGS__) @@ -6212,7 +6078,7 @@ do { \ "mas=%[mas] ? %%pred23}\n" \ : \ : [addr] "r" (_addr), [fmt] "r" (_fmt), \ - [ind] "r" (_ind), [mas] "r" (_mas) \ + [ind] "r" (_ind), [mas] "i" (_mas) \ : "memory", "pred20", "pred21", "pred22", "pred23", \ "g" #_greg_no \ ); \ @@ -6358,7 +6224,7 @@ do { \ : [data] "=&r" (_data) \ : [from] "r" (_from), [to] "r" (_to), \ [fmt] "r" (_fmt), [ind] "r" (_ind), \ - [first_time] "r" (_first_time), [mas] "r" (_mas) \ + [first_time] "r" (_first_time), [mas] "i" (_mas) \ : "memory", "pred19", "pred20", "pred21", "pred22", "pred23" \ ); \ } while (0) @@ -6787,250 +6653,6 @@ do { \ unreachable(); \ } while (0) - -typedef unsigned long long __e2k_syscall_arg_t; - -#define E2K_SYSCALL_CLOBBERS \ - "ctpr1", "ctpr2", "ctpr3", \ - "b[0]", "b[1]", "b[2]", "b[3]", \ - "b[4]", "b[5]", "b[6]", "b[7]" - -/* Transaction operation transaction of argument type - * __e2k_syscall_arg_t */ -#ifdef __ptr64__ -#define __E2K_SYSCAL_ARG_ADD "addd,s" -#else -#define __E2K_SYSCAL_ARG_ADD "adds,s" -#endif - -#define __E2K_SYSCALL_0(_trap, _sys_num, _arg1) \ -({ \ - register __e2k_syscall_arg_t __res; \ - asm volatile ("{\n" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[sys_num], %%b[0]\n\t" \ - "sdisp %%ctpr1, %[trap]\n\t" \ - "}\n" \ - "call %%ctpr1, wbs = %#\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %%b[0], %[res]" \ - : [res] "=r" (__res) \ - : [trap] "i" ((int) (_trap)), \ - [sys_num] "ri" ((__e2k_syscall_arg_t) (_sys_num)) \ - : E2K_SYSCALL_CLOBBERS); \ - __res; \ -}) - -#define __E2K_SYSCALL_1(_trap, _sys_num, _arg1) \ -({ \ - register __e2k_syscall_arg_t __res; \ - asm volatile ("{\n" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[sys_num], %%b[0]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg1], %%b[1]\n\t" \ - "sdisp %%ctpr1, %[trap]\n\t" \ - "}\n" \ - "call %%ctpr1, wbs = %#\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %%b[0], %[res]" \ - : [res] "=r" (__res) \ - : [trap] "i" ((int) (_trap)), \ - [sys_num] "ri" ((__e2k_syscall_arg_t) (_sys_num)), \ - [arg1] "ri" ((__e2k_syscall_arg_t) (_arg1)) \ - : E2K_SYSCALL_CLOBBERS); \ - __res; \ -}) - -#define __E2K_SYSCALL_2(_trap, _sys_num, _arg1, _arg2) \ -({ \ - register __e2k_syscall_arg_t __res; \ - asm volatile ("{\n" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[sys_num], %%b[0]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg1], %%b[1]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg2], %%b[2]\n\t" \ - "sdisp %%ctpr1, %[trap]\n\t" \ - "}\n" \ - "call %%ctpr1, wbs = %#\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %%b[0], %[res]" \ - : [res] "=r" (__res) \ - : [trap] "i" ((int) (_trap)), \ - [sys_num] "ri" ((__e2k_syscall_arg_t) (_sys_num)), \ - [arg1] "ri" ((__e2k_syscall_arg_t) (_arg1)), \ - [arg2] "ri" ((__e2k_syscall_arg_t) (_arg2)) \ - : E2K_SYSCALL_CLOBBERS); \ - __res; \ -}) - -#define __E2K_SYSCALL_3(_trap, _sys_num, _arg1, _arg2, _arg3) \ -({ \ - register __e2k_syscall_arg_t __res; \ - asm volatile ("{\n" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[sys_num], %%b[0]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg1], %%b[1]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg2], %%b[2]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg3], %%b[3]\n\t" \ - "sdisp %%ctpr1, %[trap]\n\t" \ - "}\n" \ - "call %%ctpr1, wbs = %#\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %%b[0], %[res]" \ - : [res] "=r" (__res) \ - : [trap] "i" ((int) (_trap)), \ - [sys_num] "ri" ((__e2k_syscall_arg_t) (_sys_num)), \ - [arg1] "ri" ((__e2k_syscall_arg_t) (_arg1)), \ - [arg2] "ri" ((__e2k_syscall_arg_t) (_arg2)), \ - [arg3] "ri" ((__e2k_syscall_arg_t) (_arg3)) \ - : E2K_SYSCALL_CLOBBERS); \ - __res; \ -}) - -#define __E2K_SYSCALL_4(_trap, _sys_num, _arg1, _arg2, _arg3, _arg4) \ -({ \ - register __e2k_syscall_arg_t __res; \ - asm volatile ("{\n" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[sys_num], %%b[0]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg1], %%b[1]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg2], %%b[2]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg3], %%b[3]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg4], %%b[4]\n\t" \ - "sdisp %%ctpr1, %[trap]\n\t" \ - "}\n" \ - "call %%ctpr1, wbs = %#\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %%b[0], %[res]" \ - : [res] "=r" (__res) \ - : [trap] "i" ((int) (_trap)), \ - [sys_num] "ri" ((__e2k_syscall_arg_t) (_sys_num)), \ - [arg1] "ri" ((__e2k_syscall_arg_t) (_arg1)), \ - [arg2] "ri" ((__e2k_syscall_arg_t) (_arg2)), \ - [arg3] "ri" ((__e2k_syscall_arg_t) (_arg3)), \ - [arg4] "ri" ((__e2k_syscall_arg_t) (_arg4)) \ - : E2K_SYSCALL_CLOBBERS); \ - __res; \ -}) - -#define __E2K_SYSCALL_5(_trap, _sys_num, _arg1, _arg2, _arg3, _arg4, _arg5) \ -({ \ - register __e2k_syscall_arg_t __res; \ - asm volatile ("{\n" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[sys_num], %%b[0]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg1], %%b[1]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg2], %%b[2]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg3], %%b[3]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg4], %%b[4]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg5], %%b[5]\n\t" \ - "sdisp %%ctpr1, %[trap]\n\t" \ - "}\n" \ - "call %%ctpr1, wbs = %#\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %%b[0], %[res]" \ - : [res] "=r" (__res) \ - : [trap] "i" ((int) (_trap)), \ - [sys_num] "ri" ((__e2k_syscall_arg_t) (_sys_num)), \ - [arg1] "ri" ((__e2k_syscall_arg_t) (_arg1)), \ - [arg2] "ri" ((__e2k_syscall_arg_t) (_arg2)), \ - [arg3] "ri" ((__e2k_syscall_arg_t) (_arg3)), \ - [arg4] "ri" ((__e2k_syscall_arg_t) (_arg4)), \ - [arg5] "ri" ((__e2k_syscall_arg_t) (_arg5)) \ - : E2K_SYSCALL_CLOBBERS); \ - __res; \ -}) - -#define __E2K_SYSCALL_6(_trap, _sys_num, _arg1, \ - _arg2, _arg3, _arg4, _arg5, _arg6) \ -({ \ - register __e2k_syscall_arg_t __res; \ - asm volatile ("{\n" \ - "sdisp %%ctpr1, %[trap]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[sys_num], %%b[0]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg1], %%b[1]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg2], %%b[2]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg3], %%b[3]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg4], %%b[4]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg5], %%b[5]\n\t" \ - "}\n" \ - "{\n" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg6], %%b[6]\n\t" \ - "call %%ctpr1, wbs = %#\n\t" \ - "}\n" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %%b[0], %[res]" \ - : [res] "=r" (__res) \ - : [trap] "i" ((int) (_trap)), \ - [sys_num] "ri" ((__e2k_syscall_arg_t) (_sys_num)), \ - [arg1] "ri" ((__e2k_syscall_arg_t) (_arg1)), \ - [arg2] "ri" ((__e2k_syscall_arg_t) (_arg2)), \ - [arg3] "ri" ((__e2k_syscall_arg_t) (_arg3)), \ - [arg4] "ri" ((__e2k_syscall_arg_t) (_arg4)), \ - [arg5] "ri" ((__e2k_syscall_arg_t) (_arg5)), \ - [arg6] "ri" ((__e2k_syscall_arg_t) (_arg6)) \ - : E2K_SYSCALL_CLOBBERS); \ - __res; \ -}) - -#define __E2K_SYSCALL_7(_trap, _sys_num, _arg1, \ - _arg2, _arg3, _arg4, _arg5, _arg6, _arg7) \ -({ \ - register __e2k_syscall_arg_t __res; \ - asm volatile ("{\n" \ - "sdisp %%ctpr1, %[trap]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[sys_num], %%b[0]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg1], %%b[1]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg2], %%b[2]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg3], %%b[3]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg4], %%b[4]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg5], %%b[5]\n\t" \ - "}\n" \ - "{\n" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg6], %%b[6]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg7], %%b[7]\n\t" \ - "call %%ctpr1, wbs = %#\n\t" \ - "}\n" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %%b[0], %[res]" \ - : [res] "=r" (__res) \ - : [trap] "i" ((int) (_trap)), \ - [sys_num] "ri" ((__e2k_syscall_arg_t) (_sys_num)), \ - [arg1] "ri" ((__e2k_syscall_arg_t) (_arg1)), \ - [arg2] "ri" ((__e2k_syscall_arg_t) (_arg2)), \ - [arg3] "ri" ((__e2k_syscall_arg_t) (_arg3)), \ - [arg4] "ri" ((__e2k_syscall_arg_t) (_arg4)), \ - [arg5] "ri" ((__e2k_syscall_arg_t) (_arg5)), \ - [arg6] "ri" ((__e2k_syscall_arg_t) (_arg6)), \ - [arg7] "ri" ((__e2k_syscall_arg_t) (_arg7)) \ - : E2K_SYSCALL_CLOBBERS); \ - __res; \ -}) - -#define E2K_SYSCALL(trap, sys_num, num_args, args...) \ - __E2K_SYSCALL_##num_args(trap, sys_num, args) - -#define ASM_CALL_8_ARGS(func_name_to_call, _arg0, _arg1, _arg2, _arg3, \ - _arg4, _arg5, _arg6, _arg7) \ -({ \ - register __e2k_syscall_arg_t __res; \ - asm volatile ( \ - "{\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg0], %%b[0]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg1], %%b[1]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg2], %%b[2]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg3], %%b[3]\n\t" \ - "disp %%ctpr1, " #func_name_to_call "\n\t" \ - "}\n\t" \ - "{\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg4], %%b[4]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg5], %%b[5]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg6], %%b[6]\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %[arg7], %%b[7]\n\t" \ - "call %%ctpr1, wbs = %#\n\t" \ - "}\n\t" \ - __E2K_SYSCAL_ARG_ADD " 0x0, %%b[0], %[res]\n\t" \ - : \ - [res] "=r" (__res) \ - : \ - [arg0] "ri" ((__e2k_syscall_arg_t) (_arg0)), \ - [arg1] "ri" ((__e2k_syscall_arg_t) (_arg1)), \ - [arg2] "ri" ((__e2k_syscall_arg_t) (_arg2)), \ - [arg3] "ri" ((__e2k_syscall_arg_t) (_arg3)), \ - [arg4] "ri" ((__e2k_syscall_arg_t) (_arg4)), \ - [arg5] "ri" ((__e2k_syscall_arg_t) (_arg5)), \ - [arg6] "ri" ((__e2k_syscall_arg_t) (_arg6)), \ - [arg7] "ri" ((__e2k_syscall_arg_t) (_arg7)) \ - : E2K_SYSCALL_CLOBBERS); \ - __res; \ -}) - #define __arch_this_cpu_read(_var, size) \ ({ \ typeof(_var) __ret; \ diff --git a/arch/e2k/include/asm/e2k_syswork.h b/arch/e2k/include/asm/e2k_syswork.h index f02d7fb..1a0b15a 100644 --- a/arch/e2k/include/asm/e2k_syswork.h +++ b/arch/e2k/include/asm/e2k_syswork.h @@ -92,7 +92,7 @@ extern void ____trace_bprintk_fixed_args(unsigned long ip, long do_longjmp(u64 retval, u64 jmp_sigmask, e2k_cr0_hi_t jmp_cr0_hi, e2k_cr1_lo_t jmp_cr1_lo, e2k_pcsp_lo_t jmp_pcsp_lo, e2k_pcsp_hi_t jmp_pcsp_hi, u32 jmp_br, u32 jmp_psize, - u32 fpcr, u32 fpsr, u32 pfpfr, bool restore_fpu); + e2k_fpcr_t fpcr, e2k_fpsr_t fpsr, e2k_pfpfr_t pfpfr, bool restore_fpu); long write_current_chain_stack(unsigned long dst, void __user *buf, unsigned long size); diff --git a/arch/e2k/include/asm/fast_syscalls.h b/arch/e2k/include/asm/fast_syscalls.h index 3995c77..b210806 100644 --- a/arch/e2k/include/asm/fast_syscalls.h +++ b/arch/e2k/include/asm/fast_syscalls.h @@ -240,7 +240,7 @@ FAST_SYS_CLOCK_GETTIME(const clockid_t which_clock, struct timespec __user *tp) struct thread_info *const ti = READ_CURRENT_REG(); int r; - prefetchw(&fsys_data); + prefetch_nospec(&fsys_data); #ifdef CONFIG_KVM_HOST_MODE if (unlikely(test_ti_status_flag(ti, TS_HOST_AT_VCPU_MODE))) diff --git a/arch/e2k/include/asm/head.h b/arch/e2k/include/asm/head.h index 629c82b..5785692 100644 --- a/arch/e2k/include/asm/head.h +++ b/arch/e2k/include/asm/head.h @@ -267,40 +267,4 @@ */ #define E2K_KERNEL_CONTEXT 0x000 -/* - * CPU 'WAIT' operation fields structure - */ -#define E2K_WAIT_OP_MA_C_MASK 0x20 /* wait for all previous memory */ - /* access operatons complete */ -#define E2K_WAIT_OP_FL_C_MASK 0x10 /* wait for all previous flush */ - /* cache operatons complete */ -#define E2K_WAIT_OP_LD_C_MASK 0x08 /* wait for all previous load */ - /* operatons complete */ -#define E2K_WAIT_OP_ST_C_MASK 0x04 /* wait for all previous store */ - /* operatons complete */ -#define E2K_WAIT_OP_ALL_E_MASK 0x02 /* wait for all previous operatons */ - /* issue all possible exceptions */ -#define E2K_WAIT_OP_ALL_C_MASK 0x01 /* wait for all previous operatons */ - /* complete */ - -/* - * IMPORTANT NOTE!!! - * Do not add 'sas' and 'sal' here, as they are modifiers - * for st_c/ld_c which make them _less_ restrictive. - */ -#define E2K_WAIT_OP_ALL_MASK (E2K_WAIT_OP_MA_C_MASK | \ - E2K_WAIT_OP_FL_C_MASK | \ - E2K_WAIT_OP_LD_C_MASK | \ - E2K_WAIT_OP_ST_C_MASK | \ - E2K_WAIT_OP_ALL_C_MASK | \ - E2K_WAIT_OP_ALL_E_MASK) - -#define E2K_WAIT_MA E2K_WAIT(E2K_WAIT_OP_MA_C_MASK) -#define E2K_WAIT_FLUSH E2K_WAIT(E2K_WAIT_OP_FL_C_MASK) -#define E2K_WAIT_LD E2K_WAIT(E2K_WAIT_OP_LD_C_MASK) -#define E2K_WAIT_ST E2K_WAIT(E2K_WAIT_OP_ST_C_MASK) -#define E2K_WAIT_ALL_OP E2K_WAIT(E2K_WAIT_OP_ALL_C_MASK) -#define E2K_WAIT_ALL_EX E2K_WAIT(E2K_WAIT_OP_ALL_E_MASK) -#define E2K_WAIT_ALL E2K_WAIT(E2K_WAIT_OP_ALL_MASK) - #endif /* !(_E2K_HEAD_H) */ diff --git a/arch/e2k/include/asm/hw_stacks.h b/arch/e2k/include/asm/hw_stacks.h index a90abfb..500a2a1 100644 --- a/arch/e2k/include/asm/hw_stacks.h +++ b/arch/e2k/include/asm/hw_stacks.h @@ -680,6 +680,10 @@ put_crs(e2k_mem_crs_t *crs, e2k_addr_t base, e2k_addr_t cr_ind) return ret; } +extern int chain_stack_frame_init(e2k_mem_crs_t *crs, void *fn_ptr, + size_t dstack_size, e2k_psr_t psr, + int wbs, int wpsz, bool user); + extern void __update_psp_regs(unsigned long base, unsigned long size, unsigned long new_fp, e2k_psp_lo_t *psp_lo, e2k_psp_hi_t *psp_hi); diff --git a/arch/e2k/include/asm/io.h b/arch/e2k/include/asm/io.h index bc59709..43e46bb 100644 --- a/arch/e2k/include/asm/io.h +++ b/arch/e2k/include/asm/io.h @@ -192,11 +192,11 @@ static inline void native_writeq(u64 value, volatile void __iomem *addr) */ #if CONFIG_CPU_ISET >= 6 -# define __io_par() E2K_WAIT_V6(_ld_c | _sal | _lal) -# define __io_pbw() E2K_WAIT_V6(_st_c | _sas | _ld_c | _sal) +# define __io_par() __E2K_WAIT(_ld_c | _sal | _lal) +# define __io_pbw() __E2K_WAIT(_st_c | _sas | _ld_c | _sal) /* Not required by documentation, but this is how * x86 works and how most of the drivers are tested. */ -# define __io_paw() E2K_WAIT_V6(_st_c | _sas) +# define __io_paw() __E2K_WAIT(_st_c | _sas) #else # define __io_par() \ do { \ diff --git a/arch/e2k/include/asm/kvm/boot_spinlock_slow.h b/arch/e2k/include/asm/kvm/boot_spinlock_slow.h index 17bdaec..bde9c7e 100644 --- a/arch/e2k/include/asm/kvm/boot_spinlock_slow.h +++ b/arch/e2k/include/asm/kvm/boot_spinlock_slow.h @@ -38,6 +38,7 @@ extern int kvm_boot_spin_locked_slow(struct kvm_vcpu *vcpu, void *lock); extern int kvm_boot_spin_unlock_slow(struct kvm_vcpu *vcpu, void *lock, bool add_to_unlock); +extern int vcpu_boot_spinlock_init(struct kvm_vcpu *vcpu); extern int kvm_boot_spinlock_init(struct kvm *kvm); extern void kvm_boot_spinlock_destroy(struct kvm *kvm); diff --git a/arch/e2k/include/asm/kvm/cpu_regs_access.h b/arch/e2k/include/asm/kvm/cpu_regs_access.h index 2c82ad3..3b47ead 100644 --- a/arch/e2k/include/asm/kvm/cpu_regs_access.h +++ b/arch/e2k/include/asm/kvm/cpu_regs_access.h @@ -914,6 +914,7 @@ * Read/write double-word Loop Status Register (LSR) */ #define KVM_READ_LSR_REG_VALUE() GUEST_GET_CPU_DSREG(LSR) +#define KVM_READ_LSR1_REG_VALUE() GUEST_GET_CPU_DSREG(LSR1) #define KVM_WRITE_LSR_REG_VALUE(LSR_value) \ GUEST_SET_CPU_DSREG(LSR, LSR_value) @@ -922,6 +923,7 @@ * Read/write double-word Initial Loop Counters Register (ILCR) */ #define KVM_READ_ILCR_REG_VALUE() GUEST_GET_CPU_DSREG(ILCR) +#define KVM_READ_ILCR1_REG_VALUE() GUEST_GET_CPU_DSREG(ILCR1) #define KVM_WRITE_ILCR_REG_VALUE(ILCR_value) \ GUEST_SET_CPU_DSREG(ILCR, ILCR_value) @@ -1069,11 +1071,11 @@ #define KVM_READ_DIMAR1_REG_VALUE() NATIVE_GET_DSREG_OPEN(dimar1) #define KVM_WRITE_DIBCR_REG_VALUE(DIBCR_value) \ - NATIVE_SET_SREG_CLOSED_NOEXC(dibcr, DIBCR_value, 4) + GUEST_SET_CPU_SREG(DIBCR, DIBCR_value) #define KVM_WRITE_DIBSR_REG_VALUE(DIBSR_value) \ NATIVE_SET_SREG_CLOSED_NOEXC(dibsr, DIBSR_value, 4) #define KVM_WRITE_DIMCR_REG_VALUE(DIMCR_value) \ - NATIVE_SET_DSREG_CLOSED_NOEXC(dimcr, DIMCR_value, 4) + GUEST_SET_CPU_DSREG(DIMCR, DIMCR_value) #define KVM_WRITE_DIBAR0_REG_VALUE(DIBAR0_value) \ NATIVE_SET_DSREG_CLOSED_NOEXC(dibar0, DIBAR0_value, 4) #define KVM_WRITE_DIBAR1_REG_VALUE(DIBAR1_value) \ diff --git a/arch/e2k/include/asm/kvm/debug.h b/arch/e2k/include/asm/kvm/debug.h index 9c99d45..4c31794 100644 --- a/arch/e2k/include/asm/kvm/debug.h +++ b/arch/e2k/include/asm/kvm/debug.h @@ -12,6 +12,7 @@ #include extern bool kvm_debug; +extern bool kvm_ftrace_dump; /* * Some definitions to print/dump/show stacks diff --git a/arch/e2k/include/asm/kvm/gmmu_context.h b/arch/e2k/include/asm/kvm/gmmu_context.h index eaa0114..cf44760 100644 --- a/arch/e2k/include/asm/kvm/gmmu_context.h +++ b/arch/e2k/include/asm/kvm/gmmu_context.h @@ -75,7 +75,14 @@ kvm_mmu_set_init_gmm_root(struct kvm_vcpu *vcpu, hpa_t root) if (gmm == NULL) return; - KVM_BUG_ON(VALID_PAGE(gmm->root_hpa)); + spin_lock(&vcpu->kvm->mmu_lock); + if (likely(VALID_PAGE(gmm->root_hpa))) { + /* root has been already set */ + if (VALID_PAGE(root)) { + KVM_BUG_ON(root != gmm->root_hpa); + } + goto out_unlock; + } if (VALID_PAGE(root)) { gmm->root_hpa = root; } @@ -87,6 +94,10 @@ kvm_mmu_set_init_gmm_root(struct kvm_vcpu *vcpu, hpa_t root) gmm->u_pptb = vcpu->arch.mmu.get_vcpu_u_pptb(vcpu); gmm->os_pptb = vcpu->arch.mmu.get_vcpu_os_pptb(vcpu); gmm->u_vptb = vcpu->arch.mmu.get_vcpu_u_vptb(vcpu); + +out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + return; } static inline pgd_t * kvm_mmu_get_gmm_root(struct gmm_struct *gmm) @@ -104,7 +115,8 @@ kvm_mmu_load_the_gmm_root(struct kvm_vcpu *vcpu, gmm_struct_t *gmm) GTI_BUG_ON(vcpu == NULL); root = kvm_mmu_get_gmm_root(gmm); - GTI_BUG_ON(root == NULL); + if (unlikely(root == NULL)) + return NULL; if (unlikely(!u_space)) { if (unlikely(is_sep_virt_spaces(vcpu))) { @@ -206,20 +218,27 @@ switch_guest_pgd(pgd_t *next_pgd) 0, USER_PTRS_PER_PGD); } } else { - pgd_to_set = next_pgd; +#ifdef CONFIG_COPY_USER_PGD_TO_KERNEL_ROOT_PT + if (!MMU_IS_SEPARATE_PT() && THERE_IS_DUP_KERNEL) + pgd_to_set = NULL; + else +#endif /* CONFIG_COPY_USER_PGD_TO_KERNEL_ROOT_PT */ + pgd_to_set = next_pgd; } KVM_BUG_ON(PCSHTP_SIGN_EXTEND(NATIVE_READ_PCSHTP_REG_SVALUE()) != 0); - reload_root_pgd(pgd_to_set); - /* FIXME: support of guest secondary space is not yet implemented - reload_secondary_page_dir(mm); - */ + if (pgd_to_set != NULL) { + reload_root_pgd(pgd_to_set); + /* FIXME: support of guest secondary space is not yet implemented + reload_secondary_page_dir(mm); + */ + } } #define DO_NOT_USE_ACTIVE_GMM /* turn OFF optimization */ -static inline void +static inline gmm_struct_t * switch_guest_mm(gthread_info_t *next_gti, struct gmm_struct *next_gmm) { struct kvm_vcpu *vcpu = current_thread_info()->vcpu; @@ -261,17 +280,27 @@ switch_guest_mm(gthread_info_t *next_gti, struct gmm_struct *next_gmm) if (likely(!pv_vcpu_is_init_gmm(vcpu, next_gmm))) { next_pgd = kvm_mmu_load_gmm_root(current_thread_info(), next_gti); + if (unlikely(next_pgd == NULL)) { + next_gmm = pv_vcpu_get_init_gmm(vcpu); + goto to_init_root; + } pv_vcpu_set_gmm(vcpu, next_gmm); } else { +to_init_root: next_pgd = kvm_mmu_load_init_root(vcpu); pv_vcpu_clear_gmm(vcpu); } switch_guest_pgd(next_pgd); +#ifdef CONFIG_SMP + /* Stop flush ipis for the previous mm */ + if (likely(active_gmm != next_gmm)) + cpumask_clear_cpu(raw_smp_processor_id(), gmm_cpumask(active_gmm)); +#endif /* CONFIG_SMP */ pv_vcpu_set_active_gmm(vcpu, next_gmm); DebugKVMSW("task to switch is guest user thread, and its mm is not " "already active, so switch and make active mm %px #%d\n", next_gmm, next_gmm->nid.nr); - return; + return next_gmm; out: if (DEBUG_KVM_SWITCH_MODE) { /* any function call can fill old state of hardware stacks */ @@ -279,9 +308,10 @@ out: NATIVE_FLUSHCPU; E2K_WAIT(_all_e); } + return next_gmm; } -static inline void +static inline bool kvm_switch_to_init_guest_mm(struct kvm_vcpu *vcpu) { gthread_info_t *cur_gti = pv_vcpu_get_gti(vcpu); @@ -293,14 +323,19 @@ kvm_switch_to_init_guest_mm(struct kvm_vcpu *vcpu) active_gmm = pv_vcpu_get_active_gmm(vcpu); if (unlikely(init_gmm == active_gmm)) { /* already on init mm */ - return; + return false; } KVM_BUG_ON(cur_gti->gmm != active_gmm); root = kvm_mmu_load_the_gmm_root(vcpu, init_gmm); switch_guest_pgd(root); +#ifdef CONFIG_SMP + /* Stop flush ipis for the previous mm */ + cpumask_clear_cpu(raw_smp_processor_id(), gmm_cpumask(active_gmm)); +#endif /* CONFIG_SMP */ cur_gti->gmm_in_release = true; pv_vcpu_set_active_gmm(vcpu, init_gmm); pv_vcpu_clear_gmm(vcpu); + return true; } static inline void diff --git a/arch/e2k/include/asm/kvm/gpid.h b/arch/e2k/include/asm/kvm/gpid.h index c1d9b9b..b8eeb04 100644 --- a/arch/e2k/include/asm/kvm/gpid.h +++ b/arch/e2k/include/asm/kvm/gpid.h @@ -36,6 +36,7 @@ extern void kvm_free_gpid(gpid_t *gpid, kvm_gpid_table_t *gpid_table); extern int kvm_gpidmap_init(struct kvm *kvm, kvm_gpid_table_t *gpid_table, kvm_nidmap_t *gpid_nidmap, int gpidmap_entries, struct hlist_head *gpid_hash, int gpid_hash_bits); +extern void kvm_gpidmap_reset(struct kvm *kvm, kvm_gpid_table_t *gpid_table); extern void kvm_gpidmap_destroy(kvm_gpid_table_t *gpid_table); #define for_each_guest_thread_info(gpid, entry, next, gpid_table) \ diff --git a/arch/e2k/include/asm/kvm/gregs.h b/arch/e2k/include/asm/kvm/gregs.h index a32ba9f..4b09c01 100644 --- a/arch/e2k/include/asm/kvm/gregs.h +++ b/arch/e2k/include/asm/kvm/gregs.h @@ -131,41 +131,4 @@ /* not used */ #endif /* CONFIG_VIRTUALIZATION */ -static inline void -copy_h_gregs_to_gregs(global_regs_t *dst, const host_gregs_t *src) -{ - tagged_memcpy_8(&dst->g[HOST_GREGS_PAIRS_START], src->g, - sizeof(src->g)); -} - -static inline void -copy_h_gregs_to_h_gregs(host_gregs_t *dst, const host_gregs_t *src) -{ - tagged_memcpy_8(dst->g, src->g, sizeof(src->g)); -} - -static inline void -get_h_gregs_from_gregs(host_gregs_t *dst, const global_regs_t *src) -{ - tagged_memcpy_8(dst->g, &src->g[HOST_GREGS_PAIRS_START], - sizeof(dst->g)); -} - -static inline void -copy_h_gregs_to_l_gregs(local_gregs_t *dst, const host_gregs_t *src) -{ - BUG_ON(HOST_GREGS_PAIRS_START < LOCAL_GREGS_START); - tagged_memcpy_8(&dst->g[HOST_GREGS_PAIRS_START - LOCAL_GREGS_START], - src->g, sizeof(src->g)); -} - -static inline void -get_h_gregs_from_l_regs(host_gregs_t *dst, const local_gregs_t *src) -{ - BUG_ON(HOST_GREGS_PAIRS_START < LOCAL_GREGS_START); - tagged_memcpy_8(dst->g, - &src->g[HOST_GREGS_PAIRS_START - LOCAL_GREGS_START], - sizeof(dst->g)); -} - #endif /* _E2K_ASM_KVM_GREGS_H */ diff --git a/arch/e2k/include/asm/kvm/guest.h b/arch/e2k/include/asm/kvm/guest.h index 504ba1a..d9bfbe0 100644 --- a/arch/e2k/include/asm/kvm/guest.h +++ b/arch/e2k/include/asm/kvm/guest.h @@ -67,7 +67,9 @@ typedef struct kvm_cpu_regs { e2k_wd_t CPU_WD; /* Window Descriptor Register */ e2k_bgr_t CPU_BGR; /* Base Global Register */ e2k_lsr_t CPU_LSR; /* Loop Status Register */ + e2k_lsr_t CPU_LSR1; /* */ e2k_ilcr_t CPU_ILCR; /* Initial Loop Counters Register */ + e2k_ilcr_t CPU_ILCR1; /* */ e2k_rpr_lo_t CPU_RPR_lo; /* Recovery point register */ e2k_rpr_hi_t CPU_RPR_hi; e2k_cutd_t CPU_OSCUTD; /* CUTD Register of OS */ @@ -235,7 +237,6 @@ typedef struct kvm_host_info { unsigned long features; /* KVM and hypervisor features */ /* see details */ kvm_time_t time; /* current host time state */ - int clock_rate; /* clock tick frequency */ } kvm_host_info_t; /* diff --git a/arch/e2k/include/asm/kvm/guest/copy-hw-stacks.h b/arch/e2k/include/asm/kvm/guest/copy-hw-stacks.h index e616bcf..92807ca 100644 --- a/arch/e2k/include/asm/kvm/guest/copy-hw-stacks.h +++ b/arch/e2k/include/asm/kvm/guest/copy-hw-stacks.h @@ -11,6 +11,7 @@ #include #include +#include extern bool debug_ustacks; #undef DEBUG_USER_STACKS_MODE @@ -129,14 +130,18 @@ copy_stack_page_to_user(void __user *dst, void *src, e2k_size_t to_copy, ts_flag = set_ts_flag(TS_KERNEL_SYSCALL); do { npages = __get_user_pages_fast(addr, 1, 1, &page); - if (npages == 1) + if (likely(npages == 1)) break; npages = get_user_pages_unlocked(addr, 1, &page, FOLL_WRITE); - if (npages == 1) + if (likely(npages == 1)) { break; + } else if (npages < 0) { + ret = npages; + } else { + ret = -EFAULT; + } clear_ts_flag(ts_flag); set_fs(seg); - ret = -EFAULT; goto failed; } while (npages != 1); clear_ts_flag(ts_flag); @@ -217,7 +222,12 @@ kvm_copy_user_stack_from_kernel(void __user *dst, void *src, return 0; failed: - pr_err("%s(): failed, error %d\n", __func__, ret); + if (likely(ret == -ERESTARTSYS && fatal_signal_pending(current))) { + /* there is fatal signal to kill the process */ + ; + } else { + pr_err("%s(): failed, error %d\n", __func__, ret); + } return ret; } @@ -303,10 +313,17 @@ kvm_user_hw_stacks_copy(pt_regs_t *regs) } if (to_copy > 0) { ret = kvm_copy_user_stack_from_kernel(dst, src, to_copy, false); - if (ret != 0) { - pr_err("%s(): procedure stack copying from kernel %px " - "to user %px, size 0x%lx failed, error %d\n", - __func__, src, dst, to_copy, ret); + if (unlikely(ret != 0)) { + if (likely(ret == -ERESTARTSYS && + fatal_signal_pending(current))) { + /* there is fatal signal to kill the process */ + ; + } else { + pr_err("%s(): procedure stack copying from " + "kernel %px to user %px, size 0x%lx " + "failed, error %d\n", + __func__, src, dst, to_copy, ret); + } goto failed; } regs->copyed.ps_size = to_copy; @@ -347,10 +364,16 @@ kvm_user_hw_stacks_copy(pt_regs_t *regs) } if (to_copy > 0) { ret = kvm_copy_user_stack_from_kernel(dst, src, to_copy, true); - if (ret != 0) { - pr_err("%s(): chain stack copying from kernel %px " - "to user %px, size 0x%lx failed, error %d\n", - __func__, src, dst, to_copy, ret); + if (unlikely(ret != 0)) { + if (likely(ret == -ERESTARTSYS && + fatal_signal_pending(current))) { + /* there is fatal signal to kill the process */ + ; + } else { + pr_err("%s(): chain stack copying from kernel %px " + "to user %px, size 0x%lx failed, error %d\n", + __func__, src, dst, to_copy, ret); + } goto failed; } regs->copyed.pcs_size = to_copy; @@ -382,7 +405,7 @@ kvm_copy_injected_pcs_frames_to_user(pt_regs_t *regs, int frames_num) BUG_ON(irqs_disabled()); frames_size = frames_num * SZ_OF_CR; - copyed_frames_size = regs->copyed.pcs_injected_frames_size; + copyed_frames_size = regs->copyed.pcs_injected_frames_size; if (unlikely(copyed_frames_size >= frames_size)) { /* all frames have been already copyed */ return 0; @@ -402,8 +425,8 @@ kvm_copy_injected_pcs_frames_to_user(pt_regs_t *regs, int frames_num) "ind 0x%lx, pcsh top 0x%x\n", src, pcs_size, frames_size, pcs_ind, pcsh_top); BUG_ON(regs->copyed.pcs_size + frames_size > pcs_ind + pcsh_top); - if (stacks->pcsp_hi.PCSP_hi_ind + frames_size > - stacks->pcsp_hi.PCSP_hi_size) { + if (unlikely(stacks->pcsp_hi.PCSP_hi_ind + frames_size > + stacks->pcsp_hi.PCSP_hi_size)) { /* user chain stack can overflow, need expand */ ret = handle_chain_stack_bounds(stacks, regs->trap); if (unlikely(ret)) { @@ -430,10 +453,16 @@ kvm_copy_injected_pcs_frames_to_user(pt_regs_t *regs, int frames_num) } if (likely(to_copy > 0)) { ret = kvm_copy_user_stack_from_kernel(dst, src, to_copy, true); - if (ret != 0) { - pr_err("%s(): chain stack copying from kernel %px " - "to user %px, size 0x%lx failed, error %d\n", - __func__, src, dst, to_copy, ret); + if (unlikely(ret != 0)) { + if (likely(ret == -ERESTARTSYS && + fatal_signal_pending(current))) { + /* there is fatal signal to kill the process */ + ; + } else { + pr_err("%s(): chain stack copying from kernel %px " + "to user %px, size 0x%lx failed, error %d\n", + __func__, src, dst, to_copy, ret); + } goto failed; } regs->copyed.pcs_injected_frames_size = to_copy; @@ -503,9 +532,14 @@ static __always_inline int kvm_user_hw_stacks_prepare( * kvm_prepare_user_hv_stacks() */ ret = kvm_user_hw_stacks_copy(regs); - if (ret != 0) { - pr_err("%s(): copying of hardware stacks failed< error %d\n", - __func__, ret); + if (unlikely(ret != 0)) { + if (likely(ret == -ERESTARTSYS)) { + /* there is fatal signal to kill the process */ + ; + } else { + pr_err("%s(): copying of hardware stacks failed, error %d\n", + __func__, ret); + } do_exit(SIGKILL); } return ret; @@ -549,7 +583,7 @@ static __always_inline void host_user_hw_stacks_prepare( struct e2k_stacks *stacks, pt_regs_t *regs, u64 cur_window_q, enum restore_caller from, int syscall) { - if (regs->sys_num == __NR_e2k_longjmp2) { + if (unlikely(from_syscall(regs) && regs->sys_num == __NR_e2k_longjmp2)) { /* hardware stacks already are prepared */ return; } diff --git a/arch/e2k/include/asm/kvm/guest/debug.h b/arch/e2k/include/asm/kvm/guest/debug.h index fb1f729..578b242 100644 --- a/arch/e2k/include/asm/kvm/guest/debug.h +++ b/arch/e2k/include/asm/kvm/guest/debug.h @@ -48,6 +48,7 @@ print_guest_stack(struct task_struct *task, { /* nothing to do, guest has not other guest processes */ } + static inline void host_ftrace_stop(void) { diff --git a/arch/e2k/include/asm/kvm/guest/mmu.h b/arch/e2k/include/asm/kvm/guest/mmu.h index ac57c7b..f13b46c 100644 --- a/arch/e2k/include/asm/kvm/guest/mmu.h +++ b/arch/e2k/include/asm/kvm/guest/mmu.h @@ -37,14 +37,9 @@ static inline bool kvm_is_guest_kernel_gregs(struct thread_info *ti, unsigned greg_num_d, u64 **greg_copy) { - if (HOST_KERNEL_GREGS_PAIR_MASK == 0 || - !(HOST_KERNEL_GREGS_PAIR_MASK & (1UL << greg_num_d))) - /* register is not used by host and guest */ - /* to support virtualization */ - return false; - - *greg_copy = ti->h_gregs.g[greg_num_d - HOST_GREGS_PAIRS_START].xreg; - return true; + /* no additional register are used by host and guest */ + /* to support virtualization */ + return false; } #ifdef CONFIG_KVM_GUEST_KERNEL diff --git a/arch/e2k/include/asm/kvm/guest/pgatomic.h b/arch/e2k/include/asm/kvm/guest/pgatomic.h index 5c2aad9..ee55903 100644 --- a/arch/e2k/include/asm/kvm/guest/pgatomic.h +++ b/arch/e2k/include/asm/kvm/guest/pgatomic.h @@ -59,6 +59,18 @@ kvm_pt_get_and_xchg_atomic(struct mm_struct *mm, unsigned long addr, } } +static inline pgprotval_t +kvm_pt_get_and_xchg_relaxed(struct mm_struct *mm, unsigned long addr, + pgprotval_t newval, pgprot_t *pgprot) +{ + if (IS_HV_MMU_TDP()) { + return native_pt_get_and_xchg_relaxed(newval, &pgprot->pgprot); + } else { + return pgprot_val(kvm_pt_atomic_update(mm, addr, pgprot, + ATOMIC_GET_AND_XCHG, newval)); + } +} + static inline pgprotval_t kvm_pt_clear_relaxed_atomic(pgprotval_t prot_mask, pgprot_t *pgprot) { @@ -111,6 +123,13 @@ pt_get_and_xchg_atomic(struct mm_struct *mm, unsigned long addr, return kvm_pt_get_and_xchg_atomic(mm, addr, newval, pgprot); } +static inline pgprotval_t +pt_get_and_xchg_relaxed(struct mm_struct *mm, unsigned long addr, + pgprotval_t newval, pgprot_t *pgprot) +{ + return kvm_pt_get_and_xchg_relaxed(mm, addr, newval, pgprot); +} + static inline pgprotval_t pt_clear_relaxed_atomic(pgprotval_t mask, pgprot_t *pgprot) { diff --git a/arch/e2k/include/asm/kvm/guest/regs_state.h b/arch/e2k/include/asm/kvm/guest/regs_state.h index a39b1ae..b4fdfbd 100644 --- a/arch/e2k/include/asm/kvm/guest/regs_state.h +++ b/arch/e2k/include/asm/kvm/guest/regs_state.h @@ -57,9 +57,6 @@ guest_save_local_glob_regs_v2(local_gregs_t *l_gregs, bool is_signal) if (KERNEL_GREGS_MAX_MASK & LOCAL_GREGS_USER_MASK) copy_k_gregs_to_l_gregs(l_gregs, ¤t_thread_info()->k_gregs); - if (HOST_KERNEL_GREGS_MASK & LOCAL_GREGS_USER_MASK) - copy_h_gregs_to_l_gregs(l_gregs, - ¤t_thread_info()->h_gregs); } static inline void @@ -69,9 +66,6 @@ guest_save_local_glob_regs_v5(local_gregs_t *l_gregs, bool is_signal) if (KERNEL_GREGS_MAX_MASK & LOCAL_GREGS_USER_MASK) copy_k_gregs_to_l_gregs(l_gregs, ¤t_thread_info()->k_gregs); - if (HOST_KERNEL_GREGS_MASK & LOCAL_GREGS_USER_MASK) - copy_h_gregs_to_l_gregs(l_gregs, - ¤t_thread_info()->h_gregs); } static inline void @@ -93,9 +87,6 @@ guest_restore_local_glob_regs_v2(const local_gregs_t *l_gregs, bool is_signal) if (KERNEL_GREGS_MAX_MASK & LOCAL_GREGS_USER_MASK) get_k_gregs_from_l_regs(¤t_thread_info()->k_gregs, l_gregs); - if (HOST_KERNEL_GREGS_MASK & LOCAL_GREGS_USER_MASK) - get_h_gregs_from_l_regs(¤t_thread_info()->h_gregs, - l_gregs); } static inline void @@ -105,9 +96,6 @@ guest_restore_local_glob_regs_v5(const local_gregs_t *l_gregs, bool is_signal) if (KERNEL_GREGS_MAX_MASK & LOCAL_GREGS_USER_MASK) get_k_gregs_from_l_regs(¤t_thread_info()->k_gregs, l_gregs); - if (HOST_KERNEL_GREGS_MASK & LOCAL_GREGS_USER_MASK) - get_h_gregs_from_l_regs(¤t_thread_info()->h_gregs, - l_gregs); } static inline void @@ -115,7 +103,6 @@ guest_get_all_user_glob_regs(global_regs_t *gregs) { machine.save_gregs(gregs); copy_k_gregs_to_gregs(gregs, ¤t_thread_info()->k_gregs); - copy_h_gregs_to_gregs(gregs, ¤t_thread_info()->h_gregs); } #ifdef CONFIG_GREGS_CONTEXT @@ -126,8 +113,6 @@ guest_get_all_user_glob_regs(global_regs_t *gregs) KVM_SAVE_VCPU_STATE_BASE(vcpu_base); \ NATIVE_INIT_G_REGS(); \ KVM_RESTORE_VCPU_STATE_BASE(vcpu_base); \ - clear_memory_8(¤t_thread_info()->h_gregs, \ - sizeof(current_thread_info()->h_gregs), ETAGEWD); \ }) #define BOOT_KVM_INIT_G_REGS() \ ({ \ @@ -310,7 +295,7 @@ do { \ e2k_addr_t ktx = \ (e2k_addr_t)&(kernel_tcellar_ext[cnt].data); \ e2k_addr_t tx = \ - (e2k_addr_t)&(kernel_tcellar_ext[cnt].data); \ + (e2k_addr_t)&(tcellar[cnt].data_ext); \ kvm_move_tagged_dword(kt, t); \ if (is_qp) { \ kvm_move_tagged_dword(ktx, tx); \ diff --git a/arch/e2k/include/asm/kvm/guest/signal.h b/arch/e2k/include/asm/kvm/guest/signal.h index adcdba5..f25133a 100644 --- a/arch/e2k/include/asm/kvm/guest/signal.h +++ b/arch/e2k/include/asm/kvm/guest/signal.h @@ -6,6 +6,8 @@ #ifndef __ASSEMBLY__ extern int kvm_signal_setup(struct pt_regs *regs); +extern int kvm_longjmp_copy_user_to_kernel_hw_stacks(struct pt_regs *regs, + struct pt_regs *new_regs); extern int kvm_complete_long_jump(struct pt_regs *regs); #ifdef CONFIG_KVM_GUEST_KERNEL @@ -17,6 +19,12 @@ static inline int signal_setup(struct pt_regs *regs) return kvm_signal_setup(regs); } +static inline int longjmp_copy_user_to_kernel_hw_stacks(struct pt_regs *regs, + struct pt_regs *new_regs) +{ + return kvm_longjmp_copy_user_to_kernel_hw_stacks(regs, new_regs); +} + static inline int complete_long_jump(struct pt_regs *regs) { if (likely(IS_HV_GM())) { diff --git a/arch/e2k/include/asm/kvm/guest/stacks.h b/arch/e2k/include/asm/kvm/guest/stacks.h index e4a516f..33dfea7 100644 --- a/arch/e2k/include/asm/kvm/guest/stacks.h +++ b/arch/e2k/include/asm/kvm/guest/stacks.h @@ -13,6 +13,7 @@ * Guest kernel thread stacks descriptions */ #define KVM_GUEST_KERNEL_C_STACK_SIZE KERNEL_C_STACK_SIZE /* as on host */ +#define KVM_GUEST_KERNEL_C_STACK_OFFSET KERNEL_C_STACK_OFFSET /* as on host */ #define KVM_GUEST_KERNEL_PS_SIZE (16 * PAGE_SIZE) /* 64 KBytes */ #define KVM_GUEST_KERNEL_PS_INIT_SIZE (1 * PAGE_SIZE) /* 4 KBytes */ #define KVM_GUEST_KERNEL_PCS_SIZE (2 * PAGE_SIZE) /* 8 KBytes */ diff --git a/arch/e2k/include/asm/kvm/guest/string.h b/arch/e2k/include/asm/kvm/guest/string.h index 7b34106..7f761be 100644 --- a/arch/e2k/include/asm/kvm/guest/string.h +++ b/arch/e2k/include/asm/kvm/guest/string.h @@ -9,6 +9,9 @@ #ifndef __ASSEMBLY__ +#define REPLACE_USR_PFAULT(to_pfault_IP) \ + (current_thread_info()->usr_pfault_jump = to_pfault_IP) + /* * optimized copy memory along with tags * using privileged LD/ST recovery operations @@ -38,7 +41,7 @@ kvm_do_fast_tagged_memory_set(void *addr, u64 val, u64 tag, len, strd_opcode); } else { ret = HYPERVISOR_fast_tagged_memory_set(addr, val, tag, len, - strd_opcode); + strd_opcode); } return ret; } @@ -107,31 +110,39 @@ extern unsigned long boot_kvm_fast_tagged_memory_set(void *addr, u64 val, extern unsigned long kvm_extract_tags_32(u16 *dst, const void *src); #endif /* ! DEBUG_GUEST_STRINGS */ +extern unsigned long kvm_fast_tagged_memory_copy_user(void *dst, const void *src, + size_t len, size_t *copied, + unsigned long strd_opcode, + unsigned long ldrd_opcode, + int prefetch); +extern unsigned long kvm_fast_tagged_memory_set_user(void *addr, u64 val, u64 tag, + size_t len, size_t *cleared, u64 strd_opcode); + static inline int kvm_fast_tagged_memory_copy_to_user(void __user *dst, const void *src, - size_t len, const struct pt_regs *regs, + size_t len, size_t *copied, const struct pt_regs *regs, unsigned long strd_opcode, unsigned long ldrd_opcode, int prefetch) { /* guest kernel does not support any nested guests */ - return kvm_fast_tagged_memory_copy(dst, src, len, + return kvm_fast_tagged_memory_copy_user(dst, src, len, copied, strd_opcode, ldrd_opcode, prefetch); } static inline int kvm_fast_tagged_memory_copy_from_user(void *dst, const void __user *src, - size_t len, const struct pt_regs *regs, + size_t len, size_t *copied, const struct pt_regs *regs, unsigned long strd_opcode, unsigned long ldrd_opcode, int prefetch) { /* guest kernel does not support any nested guests */ - return kvm_fast_tagged_memory_copy(dst, src, len, + return kvm_fast_tagged_memory_copy_user(dst, src, len, copied, strd_opcode, ldrd_opcode, prefetch); } static inline void kvm_tagged_memcpy_8(void *dst, const void *src, size_t n) { - E2K_PREFETCH_L2(src); + E2K_PREFETCH_L1_SPEC(src); __tagged_memcpy_8(dst, src, n); } @@ -161,6 +172,14 @@ fast_tagged_memory_copy(void *dst, const void *src, size_t len, ldrd_opcode, prefetch); } static inline unsigned long +fast_tagged_memory_copy_user(void *dst, const void *src, size_t len, size_t *copied, + unsigned long strd_opcode, unsigned long ldrd_opcode, + int prefetch) +{ + return kvm_fast_tagged_memory_copy_user(dst, src, len, copied, strd_opcode, + ldrd_opcode, prefetch); +} +static inline unsigned long boot_fast_tagged_memory_copy(void *dst, const void *src, size_t len, unsigned long strd_opcode, unsigned long ldrd_opcode, int prefetch) @@ -174,6 +193,13 @@ fast_tagged_memory_set(void *addr, u64 val, u64 tag, { return kvm_fast_tagged_memory_set(addr, val, tag, len, strd_opcode); } +static inline unsigned long +fast_tagged_memory_set_user(void *addr, u64 val, u64 tag, + size_t len, size_t *cleared, u64 strd_opcode) +{ + return kvm_fast_tagged_memory_set_user(addr, val, tag, len, cleared, + strd_opcode); +} static inline void boot_fast_tagged_memory_set(void *addr, u64 val, u64 tag, size_t len, u64 strd_opcode) @@ -189,21 +215,21 @@ extract_tags_32(u16 *dst, const void *src) static inline int fast_tagged_memory_copy_to_user(void __user *dst, const void *src, - size_t len, const struct pt_regs *regs, + size_t len, size_t *copied, const struct pt_regs *regs, unsigned long strd_opcode, unsigned long ldrd_opcode, int prefetch) { - return kvm_fast_tagged_memory_copy_to_user(dst, src, len, regs, + return kvm_fast_tagged_memory_copy_to_user(dst, src, len, copied, regs, strd_opcode, ldrd_opcode, prefetch); } static inline int fast_tagged_memory_copy_from_user(void *dst, const void __user *src, - size_t len, const struct pt_regs *regs, + size_t len, size_t *copied, const struct pt_regs *regs, unsigned long strd_opcode, unsigned long ldrd_opcode, int prefetch) { - return kvm_fast_tagged_memory_copy_from_user(dst, src, len, regs, + return kvm_fast_tagged_memory_copy_from_user(dst, src, len, copied, regs, strd_opcode, ldrd_opcode, prefetch); } diff --git a/arch/e2k/include/asm/kvm/guest/sync_pg_tables.h b/arch/e2k/include/asm/kvm/guest/sync_pg_tables.h index 36f1932..e4d546c 100644 --- a/arch/e2k/include/asm/kvm/guest/sync_pg_tables.h +++ b/arch/e2k/include/asm/kvm/guest/sync_pg_tables.h @@ -10,10 +10,27 @@ #include #include -static inline void kvm_sync_addr_range(e2k_addr_t start, e2k_addr_t end) +static inline void kvm_sync_mm_addr(e2k_addr_t addr) { - if (!IS_HV_GM()) - HYPERVISOR_sync_addr_range(start, end); + HYPERVISOR_sync_addr_range(addr, addr); } -#endif +static inline void kvm_sync_mm_range(e2k_addr_t start, e2k_addr_t end) +{ + HYPERVISOR_sync_addr_range(start, end); +} + +#ifdef CONFIG_KVM_GUEST_KERNEL +/* it is native guest kernel (not paravirtualized based on pv_ops) */ +static inline void sync_mm_addr(e2k_addr_t addr) +{ + kvm_sync_mm_addr(addr); +} + +static inline void sync_mm_range(e2k_addr_t start, e2k_addr_t end) +{ + kvm_sync_mm_range(start, end); +} +#endif /* CONFIG_KVM_GUEST_KERNEL */ + +#endif /* !_E2K_GST_SYNC_PG_TABLES_H */ diff --git a/arch/e2k/include/asm/kvm/guest/tlbflush.h b/arch/e2k/include/asm/kvm/guest/tlbflush.h index fc191e1..29ebc44 100644 --- a/arch/e2k/include/asm/kvm/guest/tlbflush.h +++ b/arch/e2k/include/asm/kvm/guest/tlbflush.h @@ -85,23 +85,25 @@ kvm_flush_tlb_range_and_pgtables(struct mm_struct *mm, kvm_pv_flush_tlb_range_and_pgtables(mm, start, end); } #else /* CONFIG_SMP */ -extern void native_smp_flush_tlb_all(void); -extern void native_smp_flush_tlb_mm(struct mm_struct *mm); -extern void native_smp_flush_tlb_page(struct vm_area_struct *vma, - e2k_addr_t addr); -extern void native_smp_flush_tlb_range(struct mm_struct *mm, - e2k_addr_t start, e2k_addr_t end); -extern void native_smp_flush_pmd_tlb_range(struct mm_struct *mm, - e2k_addr_t start, e2k_addr_t end); -extern void native_smp_flush_tlb_range_and_pgtables(struct mm_struct *mm, - e2k_addr_t start, e2k_addr_t end); +extern void kvm_pv_smp_flush_tlb_mm(struct mm_struct *const mm); +extern void kvm_pv_smp_flush_tlb_all(void); +extern void kvm_pv_smp_flush_tlb_page(struct vm_area_struct *const vma, + const e2k_addr_t addr); +extern void kvm_pv_smp_flush_tlb_range(struct mm_struct *const mm, + const e2k_addr_t start, const e2k_addr_t end); +extern void kvm_pv_smp_flush_pmd_tlb_range(struct mm_struct *const mm, + const e2k_addr_t start, const e2k_addr_t end); +extern void kvm_pv_smp_flush_tlb_range_and_pgtables(struct mm_struct *const mm, + const e2k_addr_t start, const e2k_addr_t end); +extern void kvm_pv_smp_flush_tlb_kernel_range(e2k_addr_t start, e2k_addr_t end); + static inline void kvm_flush_tlb_all(void) { if (IS_HV_GM()) native_smp_flush_tlb_all(); else - kvm_pv_flush_tlb_all(); + kvm_pv_smp_flush_tlb_all(); } static inline void kvm_flush_tlb_mm(struct mm_struct *mm) @@ -109,7 +111,7 @@ kvm_flush_tlb_mm(struct mm_struct *mm) if (IS_HV_GM()) native_smp_flush_tlb_mm(mm); else - kvm_pv_flush_tlb_mm(mm); + kvm_pv_smp_flush_tlb_mm(mm); } static inline void kvm_flush_tlb_page(struct vm_area_struct *vma, e2k_addr_t addr) @@ -117,7 +119,7 @@ kvm_flush_tlb_page(struct vm_area_struct *vma, e2k_addr_t addr) if (IS_HV_GM()) native_smp_flush_tlb_page(vma, addr); else - kvm_pv_flush_tlb_page(vma->vm_mm, addr); + kvm_pv_smp_flush_tlb_page(vma, addr); } static inline void kvm_flush_tlb_range(struct mm_struct *mm, e2k_addr_t start, e2k_addr_t end) @@ -125,7 +127,7 @@ kvm_flush_tlb_range(struct mm_struct *mm, e2k_addr_t start, e2k_addr_t end) if (IS_HV_GM()) native_smp_flush_tlb_range(mm, start, end); else - kvm_pv_flush_tlb_range(mm, start, end); + kvm_pv_smp_flush_tlb_range(mm, start, end); } static inline void kvm_flush_tlb_kernel_range(e2k_addr_t start, e2k_addr_t end) @@ -133,7 +135,7 @@ kvm_flush_tlb_kernel_range(e2k_addr_t start, e2k_addr_t end) if (IS_HV_GM()) native_smp_flush_tlb_all(); else - kvm_pv_flush_tlb_kernel_range(start, end); + kvm_pv_smp_flush_tlb_kernel_range(start, end); } static inline void kvm_flush_pmd_tlb_range(struct mm_struct *mm, e2k_addr_t start, @@ -142,7 +144,7 @@ kvm_flush_pmd_tlb_range(struct mm_struct *mm, e2k_addr_t start, if (IS_HV_GM()) native_smp_flush_pmd_tlb_range(mm, start, end); else - kvm_pv_flush_pmd_tlb_range(mm, start, end); + kvm_pv_smp_flush_pmd_tlb_range(mm, start, end); } static inline void kvm_flush_tlb_range_and_pgtables(struct mm_struct *mm, @@ -151,7 +153,7 @@ kvm_flush_tlb_range_and_pgtables(struct mm_struct *mm, if (IS_HV_GM()) native_smp_flush_tlb_range_and_pgtables(mm, start, end); else - kvm_pv_flush_tlb_range_and_pgtables(mm, start, end); + kvm_pv_smp_flush_tlb_range_and_pgtables(mm, start, end); } #endif /* CONFIG_SMP */ diff --git a/arch/e2k/include/asm/kvm/guest/trace-hw-stacks.h b/arch/e2k/include/asm/kvm/guest/trace-hw-stacks.h index d0567da..a917f8d 100644 --- a/arch/e2k/include/asm/kvm/guest/trace-hw-stacks.h +++ b/arch/e2k/include/asm/kvm/guest/trace-hw-stacks.h @@ -234,68 +234,7 @@ TRACE_EVENT( __entry->pcs_frame.cr1_hi.CR1_hi_half) ); -TRACE_EVENT( - guest_va_tlb_state, - - TP_PROTO(e2k_addr_t address), - - TP_ARGS(address), - - TP_STRUCT__entry( - __field( e2k_addr_t, address ) - __field( tlb_tag_t, set0_tag ) - __field_struct( pte_t, set0_entry ) - __field( tlb_tag_t, set1_tag ) - __field_struct( pte_t, set1_entry ) - __field( tlb_tag_t, set2_tag ) - __field_struct( pte_t, set2_entry ) - __field( tlb_tag_t, set3_tag ) - __field_struct( pte_t, set3_entry ) - __field( tlb_tag_t, setH_tag ) - __field_struct( pte_t, setH_entry ) - __field( u64, dtlb_entry ) - __field( unsigned long, mmu_pptb ) - __field( unsigned long, mmu_pid ) - ), - - TP_fast_assign( - __entry->address = address; - __entry->set0_tag = HYPERVISOR_get_tlb_set_tag(address, 0, false); - pte_val(__entry->set0_entry) = - HYPERVISOR_get_tlb_set_entry(address, 0, false); - __entry->set1_tag = HYPERVISOR_get_tlb_set_tag(address, 1, false); - pte_val(__entry->set1_entry) = - HYPERVISOR_get_tlb_set_entry(address, 1, false); - __entry->set2_tag = HYPERVISOR_get_tlb_set_tag(address, 2, false); - pte_val(__entry->set2_entry) = - HYPERVISOR_get_tlb_set_entry(address, 2, false); - __entry->set3_tag = HYPERVISOR_get_tlb_set_tag(address, 3, false); - pte_val(__entry->set3_entry) = - HYPERVISOR_get_tlb_set_entry(address, 3, false); - __entry->setH_tag = HYPERVISOR_get_tlb_set_tag(address, 3, true); - pte_val(__entry->setH_entry) = - HYPERVISOR_get_tlb_set_entry(address, 3, true); - __entry->dtlb_entry = HYPERVISOR_mmu_probe(address, - KVM_MMU_PROBE_ENTRY); - __entry->mmu_pptb = HYPERVISOR_get_host_mmu_pptb(); - __entry->mmu_pid = HYPERVISOR_get_host_mmu_pid(); - ), - - TP_printk(" 0x%016lx : dtlb 0x%016llx U_PPTB 0x%lx PID 0x%lx\n" - " TLB set #0 tag 0x%016lx entry 0x%016lx\n" - " TLB set #1 tag 0x%016lx entry 0x%016lx\n" - " TLB set #2 tag 0x%016lx entry 0x%016lx\n" - " TLB set #3 tag 0x%016lx entry 0x%016lx\n" - " TLB set #H tag 0x%016lx entry 0x%016lx", - __entry->address, __entry->dtlb_entry, - __entry->mmu_pptb, __entry->mmu_pid, - __entry->set0_tag, pte_val(__entry->set0_entry), - __entry->set1_tag, pte_val(__entry->set1_entry), - __entry->set2_tag, pte_val(__entry->set2_entry), - __entry->set3_tag, pte_val(__entry->set3_entry), - __entry->setH_tag, pte_val(__entry->setH_entry) - ) -); +#include #endif /* _KVM_GUEST_TRACE_COPY_HW_STACKS_H */ diff --git a/arch/e2k/include/asm/kvm/guest/trace-tlb-flush.h b/arch/e2k/include/asm/kvm/guest/trace-tlb-flush.h new file mode 100644 index 0000000..fd851f7 --- /dev/null +++ b/arch/e2k/include/asm/kvm/guest/trace-tlb-flush.h @@ -0,0 +1,103 @@ +#if !defined(_KVM_GUEST_TRACE_TLB_FLUSH_H) || defined(TRACE_HEADER_MULTI_READ) +#define _KVM_GUEST_TRACE_TLB_FLUSH_H + +#include +#include + +#include +#include +#include +#include +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM guest + +TRACE_EVENT( + guest_flush_tlb_range(mm, opc, start, end), + + TP_PROTO(struct mm_struct *mm, mmu_flush_tlb_op_t opc, + e2k_addr_t start, e2k_addr_t end), + + TP_ARGS(mm, opc, start, end), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, gmm_id) + __field(mmu_flush_tlb_op_t, opc) + __field(e2k_addr_t, start) + __field(e2k_addr_t, end) + ), + + TP_fast_assign( + __entry->vcpu_id = smp_processor_id(); + __entry->gmm_id = (mm != NULL) ? mm->gmmid_nr : -2; + __entry->opc = opc; + __entry->start = start; + __entry->end = end; + ), + + TP_printk("vcpu #%d gmm #%d flush TLB %s from %x to %px", + __entry->vcpu_id, __entry->gmm_id, + (__print_symbolic(__entry->opc, + { flush_all_tlb_op, "all" }, + { flush_mm_page_tlb_op, "page" }, + { flush_mm_range_tlb_op, "mm range" }, + { flush_mm_tlb_op, "mm" }, + { flush_pmd_range_tlb_op, "pmd range" }, + { flush_pt_range_tlb_op, "page tables" }, + { flush_kernel_range_tlb_op, "kernel range" })), + __entry->start, __entry->end + ) +); + +TRACE_EVENT( + guest_flush_tlb_failed(mm, opc, start, end, error), + + TP_PROTO(struct mm_struct *mm, mmu_flush_tlb_op_t opc, + e2k_addr_t start, e2k_addr_t end, int eroor), + + TP_ARGS(mm, opc, start, end, error), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, gmm_id) + __field(mmu_flush_tlb_op_t, opc) + __field(e2k_addr_t, start) + __field(e2k_addr_t, end) + __field(int, error) + ), + + TP_fast_assign( + __entry->vcpu_id = smp_processor_id(); + __entry->gmm_id = (mm != NULL) ? mm->gmmid_nr : -2; + __entry->opc = opc; + __entry->start = start; + __entry->end = end; + __entry->error = error; + ), + + TP_printk("vcpu #%d gmm #%d flush TLB %s from %x to %px failed %d", + __entry->vcpu_id, __entry->gmm_id, + (__print_symbolic(__entry->opc, + { flush_all_tlb_op, "all" }, + { flush_mm_page_tlb_op, "page" }, + { flush_mm_range_tlb_op, "mm range" }, + { flush_mm_tlb_op, "mm" }, + { flush_pmd_range_tlb_op, "pmd range" }, + { flush_pt_range_tlb_op, "page tables" }, + { flush_kernel_range_tlb_op, "kernel range" })), + __entry->start, __entry->end, __entry->error + ) +); + +#endif /* _KVM_GUEST_TRACE_TLB_FLUSH_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../arch/e2k/kvm/guest +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace-tlb-flush + +/* This part must be outside protection */ +#include diff --git a/arch/e2k/include/asm/kvm/guest/trace-tlb-state.h b/arch/e2k/include/asm/kvm/guest/trace-tlb-state.h new file mode 100644 index 0000000..3d08eed --- /dev/null +++ b/arch/e2k/include/asm/kvm/guest/trace-tlb-state.h @@ -0,0 +1,83 @@ +#if !defined(_KVM_GUEST_TRACE_TLB_STATE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _KVM_GUEST_TRACE_TLB_STATE_H + +#include +#include + +#include +#include +#include +#include + +TRACE_EVENT( + guest_va_tlb_state, + + TP_PROTO(e2k_addr_t address), + + TP_ARGS(address), + + TP_STRUCT__entry( + __field( e2k_addr_t, address ) + __field( tlb_tag_t, set0_tag ) + __field_struct( pte_t, set0_entry ) + __field( tlb_tag_t, set1_tag ) + __field_struct( pte_t, set1_entry ) + __field( tlb_tag_t, set2_tag ) + __field_struct( pte_t, set2_entry ) + __field( tlb_tag_t, set3_tag ) + __field_struct( pte_t, set3_entry ) + __field( tlb_tag_t, setH_tag ) + __field_struct( pte_t, setH_entry ) + __field( u64, dtlb_entry ) + __field( unsigned long, mmu_pptb ) + __field( unsigned long, mmu_pid ) + ), + + TP_fast_assign( + __entry->address = address; + __entry->set0_tag = HYPERVISOR_get_tlb_set_tag(address, 0, false); + pte_val(__entry->set0_entry) = + HYPERVISOR_get_tlb_set_entry(address, 0, false); + __entry->set1_tag = HYPERVISOR_get_tlb_set_tag(address, 1, false); + pte_val(__entry->set1_entry) = + HYPERVISOR_get_tlb_set_entry(address, 1, false); + __entry->set2_tag = HYPERVISOR_get_tlb_set_tag(address, 2, false); + pte_val(__entry->set2_entry) = + HYPERVISOR_get_tlb_set_entry(address, 2, false); + __entry->set3_tag = HYPERVISOR_get_tlb_set_tag(address, 3, false); + pte_val(__entry->set3_entry) = + HYPERVISOR_get_tlb_set_entry(address, 3, false); + __entry->setH_tag = HYPERVISOR_get_tlb_set_tag(address, 3, true); + pte_val(__entry->setH_entry) = + HYPERVISOR_get_tlb_set_entry(address, 3, true); + __entry->dtlb_entry = HYPERVISOR_mmu_probe(address, + KVM_MMU_PROBE_ENTRY); + __entry->mmu_pptb = HYPERVISOR_get_host_mmu_pptb(); + __entry->mmu_pid = HYPERVISOR_get_host_mmu_pid(); + ), + + TP_printk(" 0x%016lx : dtlb 0x%016llx U_PPTB 0x%lx PID 0x%lx\n" + " TLB set #0 tag 0x%016lx entry 0x%016lx\n" + " TLB set #1 tag 0x%016lx entry 0x%016lx\n" + " TLB set #2 tag 0x%016lx entry 0x%016lx\n" + " TLB set #3 tag 0x%016lx entry 0x%016lx\n" + " TLB set #H tag 0x%016lx entry 0x%016lx", + __entry->address, __entry->dtlb_entry, + __entry->mmu_pptb, __entry->mmu_pid, + __entry->set0_tag, pte_val(__entry->set0_entry), + __entry->set1_tag, pte_val(__entry->set1_entry), + __entry->set2_tag, pte_val(__entry->set2_entry), + __entry->set3_tag, pte_val(__entry->set3_entry), + __entry->setH_tag, pte_val(__entry->setH_entry) + ) +); + +#endif /* _KVM_GUEST_TRACE_TLB_STATE_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../arch/e2k/include/asm/kvm/guest +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace-tlb-state + +/* This part must be outside protection */ +#include diff --git a/arch/e2k/include/asm/kvm/guest/trap_table.S.h b/arch/e2k/include/asm/kvm/guest/trap_table.S.h index 5b2281e..fd5a391 100644 --- a/arch/e2k/include/asm/kvm/guest/trap_table.S.h +++ b/arch/e2k/include/asm/kvm/guest/trap_table.S.h @@ -53,26 +53,17 @@ } .endm /* NEED_SAVE_CUR_AND_VCPU_STATE_GREGS */ -/* guest VCPU state registers are saved at thread_info->h_gregs */ -/* same as by host for paravirtualized guest */ +/* guest VCPU state registers are saved with other kernel global registers */ +/* at thread_info->k_gregs, same as by host for paravirtualized guest */ .macro DO_SAVE_HOST_GREGS_V2 gvcpu_lo, gvcpu_hi, hvcpu_lo, hvcpu_hi \ drti, predSAVE, drtmp, rtmp0, rtmp1 - /* drtmp: thread_info->h_gregs.g */ - addd \drti, TI_HOST_GREGS_TO_VIRT, \drtmp ? \predSAVE; - SAVE_GREGS_PAIR_COND_V2 \gvcpu_lo, \gvcpu_hi, \hvcpu_lo, \hvcpu_hi, \ - \drtmp, /* thread_info->h_gregs.g base address */ \ - \predSAVE, \ - \rtmp0, \rtmp1 + /* not used */ .endm /* DO_SAVE_HOST_GREGS_V2 */ .macro DO_SAVE_HOST_GREGS_V5 gvcpu_lo, gvcpu_hi, hvcpu_lo, hvcpu_hi \ drti, predSAVE, drtmp - /* drtmp: thread_info->h_gregs.g */ - addd \drti, TI_HOST_GREGS_TO_VIRT, \drtmp ? \predSAVE; - SAVE_GREGS_PAIR_COND_V5 \gvcpu_lo, \gvcpu_hi, \hvcpu_lo, \hvcpu_hi, \ - \drtmp, /* thread_info->h_gregs.g base address */ \ - \predSAVE + /* not used */ .endm /* DO_SAVE_HOST_GREGS_V5 */ .macro SAVE_HOST_GREGS_V2 drti, predSAVE, drtmp, rtmp0, rtmp1 @@ -92,24 +83,30 @@ .endm /* SAVE_HOST_GREGS_V5 */ .macro SAVE_HOST_GREGS_UNEXT gvcpu, hvcpu, drti, drtmp - /* drtmp: thread_info->h_gregs.g */ - addd \drti, TI_HOST_GREGS_TO_VIRT, \drtmp; - SAVE_GREG_UNEXT \gvcpu, \hvcpu, \drtmp + /* not used */ .endm /* SAVE_HOST_GREGS_UNEXT */ .global vcpus_state; -.macro SET_VCPU_STATE_GREGS drti, predSAVE, drtmp - ldw [ \drti + TSK_TI_CPU_DELTA ], \drtmp ? \predSAVE /* VCPU # */ - shld \drtmp, 3, \drtmp ? \predSAVE - ldd [ \drtmp + vcpus_state ], GVCPUSTATE ? \predSAVE -.endm /* SET_VCPU_STATE_GREGS */ - +#ifdef CONFIG_SMP .macro SET_VCPU_STATE_GREGS_UNCOND drti, drtmp ldw [ \drti + TSK_TI_CPU_DELTA ], \drtmp /* VCPU # */ shld \drtmp, 3, \drtmp ldd [ \drtmp + vcpus_state ], GVCPUSTATE .endm /* SET_VCPU_STATE_GREGS */ +.macro SET_VCPU_STATE_GREGS drti, predSAVE, drtmp + ldw [ \drti + TSK_TI_CPU_DELTA ], \drtmp ? \predSAVE /* VCPU # */ + shld \drtmp, 3, \drtmp ? \predSAVE + ldd [ \drtmp + vcpus_state ], GVCPUSTATE ? \predSAVE +.endm /* SET_VCPU_STATE_GREGS */ +#else +.macro SET_VCPU_STATE_GREGS_UNCOND drti, drtmp + ldd [ 0 + vcpus_state ], GVCPUSTATE +.endm /* SET_VCPU_STATE_GREGS */ +.macro SET_VCPU_STATE_GREGS drti, predSAVE, drtmp + ldd [ 0 + vcpus_state ], GVCPUSTATE ? \predSAVE +.endm /* SET_VCPU_STATE_GREGS */ +#endif .macro SAVE_HOST_GREGS_TO_VIRT_V2 drti, predSAVE, drtmp, rtmp0, rtmp1 SAVE_HOST_GREGS_V2 \drti, \predSAVE, \drtmp, \rtmp0, \rtmp1 diff --git a/arch/e2k/include/asm/kvm/guest/trap_table.h b/arch/e2k/include/asm/kvm/guest/trap_table.h index a5198b0..bc13495 100644 --- a/arch/e2k/include/asm/kvm/guest/trap_table.h +++ b/arch/e2k/include/asm/kvm/guest/trap_table.h @@ -54,6 +54,8 @@ static inline void kvm_clear_fork_child_pt_regs(struct pt_regs *childregs) kvm_init_pt_regs_copyed_fields(childregs); } +#define kvm_restore_some_values_after_fill(__regs, __from, __return_to_user) + #define KVM_FILL_HARDWARE_STACKS() /* host itself will fill */ extern void kvm_correct_trap_psp_pcsp(struct pt_regs *regs, @@ -171,6 +173,9 @@ do { \ } \ } while (false) +#define restore_some_values_after_fill(__regs, __from, __return_to_user) \ + kvm_restore_some_values_after_fill(__regs, __from, __return_to_user) + static inline void exit_handle_syscall(e2k_addr_t sbr, e2k_usd_hi_t usd_hi, e2k_usd_lo_t usd_lo, e2k_upsr_t upsr) diff --git a/arch/e2k/include/asm/kvm/head.h b/arch/e2k/include/asm/kvm/head.h index 61ef5ba..28db035 100644 --- a/arch/e2k/include/asm/kvm/head.h +++ b/arch/e2k/include/asm/kvm/head.h @@ -50,6 +50,10 @@ #ifdef CONFIG_VIRTUALIZATION +#ifndef __ASSEMBLY__ +#include +#endif /* !__ASSEMBLY__ */ + #define HOST_KERNEL_PHYS_MEM_VIRT_BASE HOST_PAGE_OFFSET /* 0x0000c000 ... */ #define GUEST_KERNEL_PHYS_MEM_VIRT_BASE GUEST_PAGE_OFFSET /* 0x00002000 ... */ #define GUEST_IO_PORTS_VIRT_BASE 0x00003f7e7e000000UL @@ -84,7 +88,8 @@ /* Macros defines VRAM for one VCPU or VIRQ VCPU, */ /* but VRAM should be created for all VCPU and VIRQ VCPU */ -#define GUEST_ONE_VCPU_VRAM_SIZE (4 *4096) /* 4 pages */ +#define GUEST_ONE_VCPU_VRAM_SIZE sizeof(kvm_vcpu_state_t) +#define HOST_INFO_VCPU_VRAM_SIZE sizeof(kvm_host_info_t) #define GUEST_VCPU_VRAM_PHYS_BASE 0x000000ff00000000UL #define GUEST_MAX_VCPU_VRAM_SIZE 0x0000000001000000UL #define GUEST_VCPU_VRAM_VIRT_BASE \ diff --git a/arch/e2k/include/asm/kvm/hypercall.h b/arch/e2k/include/asm/kvm/hypercall.h index 0415119..5475f86 100644 --- a/arch/e2k/include/asm/kvm/hypercall.h +++ b/arch/e2k/include/asm/kvm/hypercall.h @@ -38,7 +38,6 @@ #include #include -#include #include #include @@ -619,6 +618,10 @@ HYPERVISOR_switch_to_expanded_guest_chain_stack(long delta_size, #define KVM_HCALL_FTRACE_DUMP 123 /* dump host's ftrace buffer */ #define KVM_HCALL_DUMP_COMPLETION 125 /* show state or dump all */ /* stacks is completed */ +#define KVM_HCALL_FAST_TAGGED_MEMORY_COPY_USER 127 /* fast tagged memory copy */ + /* to/from user */ +#define KVM_HCALL_FAST_TAGGED_MEMORY_SET_USER 128 /* fast tagged memory set */ + /* at user */ #define KVM_HCALL_HOST_PRINTK 130 /* guest printk() on host */ #define KVM_HCALL_PRINT_GUEST_KERNEL_PTES 131 /* dump guest kernel address */ @@ -629,10 +632,10 @@ HYPERVISOR_switch_to_expanded_guest_chain_stack(long delta_size, #define KVM_HCALL_PV_ENABLE_ASYNC_PF 133 /* enable async pf */ /* on current vcpu */ #endif /* CONFIG_KVM_ASYNC_PF */ -#define KVM_HCALL_FLUSH_TLB_RANGE 134 /* sync given address range */ - /* in page tables and flush tlb */ -#define KVM_HCALL_SYNC_ADDR_RANGE 135 /* sync ptes in page */ - /* tables without flushing tlb */ +#define KVM_HCALL_MMU_PV_FLUSH_TLB 134 /* sync host's shadow PTs */ + /* and flush tlb */ +#define KVM_HCALL_SYNC_ADDR_RANGE 135 /* sync host's shadow PTs */ + /* without flushing tlb */ #define KVM_HCALL_GET_SPT_TRANSLATION 137 /* get full translation of guest */ /* address at shadow PTs */ #define KVM_HCALL_RECOVERY_FAULTED_TAGGED_STORE 141 @@ -1392,7 +1395,9 @@ extern void smp_send_refresh(void); static inline unsigned long HYPERVISOR_kvm_shutdown(void *msg, unsigned long reason) { +#ifdef CONFIG_SMP smp_send_refresh(); +#endif return generic_hypercall2(KVM_HCALL_SHUTDOWN, (unsigned long)msg, reason); } @@ -1488,12 +1493,33 @@ HYPERVISOR_fast_tagged_memory_copy(void *dst, const void *src, size_t len, len, strd_opcode, ldrd_opcode, prefetch); } static inline unsigned long +HYPERVISOR_fast_tagged_memory_copy_user(void *dst, const void *src, + size_t len, size_t *copied, + unsigned long strd_opcode, unsigned long ldrd_opcode, + int prefetch) +{ + return generic_hypercall6(KVM_HCALL_FAST_TAGGED_MEMORY_COPY_USER, + (unsigned long)dst, (unsigned long)src, + len, (unsigned long)copied, + strd_opcode | + LDST_PREFETCH_FLAG_SET((unsigned long)!!prefetch), + ldrd_opcode); +} +static inline unsigned long HYPERVISOR_fast_tagged_memory_set(void *addr, u64 val, u64 tag, size_t len, u64 strd_opcode) { return generic_hypercall5(KVM_HCALL_FAST_TAGGED_MEMORY_SET, (unsigned long)addr, val, tag, len, strd_opcode); } +static inline unsigned long +HYPERVISOR_fast_tagged_memory_set_user(void *addr, u64 val, u64 tag, + size_t len, size_t *cleared, u64 strd_opcode) +{ + return generic_hypercall6(KVM_HCALL_FAST_TAGGED_MEMORY_SET_USER, + (unsigned long)addr, val, tag, len, + (unsigned long)cleared, strd_opcode); +} #ifdef CONFIG_KVM_ASYNC_PF static inline int HYPERVISOR_pv_enable_async_pf(u64 apf_reason_gpa, u64 apf_id_gpa, u32 apf_ready_vector, u32 irq_controller) @@ -1503,11 +1529,34 @@ static inline int HYPERVISOR_pv_enable_async_pf(u64 apf_reason_gpa, apf_ready_vector, irq_controller); } #endif /* CONFIG_KVM_ASYNC_PF */ + +/* + * The structure to flush guest virtual space at the host shadow PTs + */ + +typedef enum mmu_flush_tlb_op { + undefined_tlb_op = 0, /* undefined type of flush */ + flush_all_tlb_op, /* flush all TLB */ + flush_mm_page_tlb_op, /* flush a single page from TLB */ + flush_mm_range_tlb_op, /* flush a range of pages */ + flush_mm_tlb_op, /* flush a specified user mapping */ + flush_pmd_range_tlb_op, /* same as a range of pages, but for pmd's */ + flush_pt_range_tlb_op, /* flush a range of pages and page tables */ + flush_kernel_range_tlb_op, /* flush a kernel range of pages */ +} mmu_flush_tlb_op_t; + +typedef struct mmu_spt_flush { + mmu_flush_tlb_op_t opc; /* flush type (see above) */ + int gmm_id; /* gmm ID */ + unsigned long start; /* affress or start of range */ + unsigned long end; /* end address of range */ +} mmu_spt_flush_t; + static inline unsigned long -HYPERVISOR_flush_tlb_range(e2k_addr_t start_gva, e2k_addr_t end_gva) +HYPERVISOR_mmu_pv_flush_tlb(mmu_spt_flush_t *flush_info) { - return generic_hypercall2(KVM_HCALL_FLUSH_TLB_RANGE, - start_gva, end_gva); + return generic_hypercall1(KVM_HCALL_MMU_PV_FLUSH_TLB, + (unsigned long)flush_info); } static inline void HYPERVISOR_sync_addr_range(e2k_addr_t start_gva, e2k_addr_t end_gva) diff --git a/arch/e2k/include/asm/kvm/machdep.h b/arch/e2k/include/asm/kvm/machdep.h index 0c8584d..6fb3cdb 100644 --- a/arch/e2k/include/asm/kvm/machdep.h +++ b/arch/e2k/include/asm/kvm/machdep.h @@ -7,7 +7,6 @@ typedef struct global_regs global_regs_t; typedef struct kernel_gregs kernel_gregs_t; -typedef struct host_gregs host_gregs_t; #ifndef CONFIG_VIRTUALIZATION /* it is native kernel without any virtualization support */ @@ -18,10 +17,6 @@ typedef struct guest_machdep { /* none any guest */ } guest_machdep_t; #else /* CONFIG_VIRTUALIZATION */ -extern void kvm_save_host_gregs_v2(struct host_gregs *gregs); -extern void kvm_save_host_gregs_v5(struct host_gregs *gregs); -extern void kvm_restore_host_gregs_v5(const struct host_gregs *gregs); - extern void kvm_guest_save_local_gregs_v2(struct local_gregs *gregs, bool is_signal); extern void kvm_guest_save_local_gregs_v5(struct local_gregs *gregs, diff --git a/arch/e2k/include/asm/kvm/mm.h b/arch/e2k/include/asm/kvm/mm.h index c1c2313..29a004d 100644 --- a/arch/e2k/include/asm/kvm/mm.h +++ b/arch/e2k/include/asm/kvm/mm.h @@ -55,7 +55,19 @@ typedef struct gmm_struct { } gmm_struct_t; /* same as accessor for struct mm_struct's cpu_vm_mask but for guest mm */ -#define gmm_cpumask(gmm) (&(gmm)->cpu_vm_mask) +static inline void gmm_init_cpumask(gmm_struct_t *gmm) +{ + unsigned long cpu_vm_mask = (unsigned long)gmm; + + cpu_vm_mask += offsetof(gmm_struct_t, cpu_vm_mask); + cpumask_clear((struct cpumask *)cpu_vm_mask); +} + +/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ +static inline cpumask_t *gmm_cpumask(gmm_struct_t *gmm) +{ + return (struct cpumask *)&gmm->cpu_vm_mask; +} typedef struct kvm_nid_table gmmid_table_t; @@ -66,7 +78,10 @@ struct kvm; extern int kvm_guest_mm_drop(struct kvm_vcpu *vcpu, int gmmid_nr); extern int kvm_activate_guest_mm(struct kvm_vcpu *vcpu, int active_gmmid_nr, int gmmid_nr, gpa_t u_phys_ptb); +extern int kvm_pv_init_gmm_create(struct kvm *kvm); extern int kvm_guest_pv_mm_init(struct kvm *kvm); +extern void kvm_guest_pv_mm_reset(struct kvm *kvm); +extern void kvm_guest_pv_mm_free(struct kvm *kvm); extern void kvm_guest_pv_mm_destroy(struct kvm *kvm); #define for_each_guest_mm(gmm, entry, next, gmmid_table) \ @@ -75,6 +90,8 @@ extern void kvm_guest_pv_mm_destroy(struct kvm *kvm); #define gmmid_entry(ptr) container_of(ptr, gmm_struct_t, nid) #define gmmid_table_lock(gmmid_table) \ nid_table_lock(gmmid_table) +#define gmmid_table_trylock(gmmid_table) \ + nid_table_trylock(gmmid_table) #define gmmid_table_unlock(gmmid_table) \ nid_table_unlock(gmmid_table) #define gmmid_table_lock_irq(gmmid_table) \ @@ -91,7 +108,7 @@ kvm_find_gmmid(gmmid_table_t *gmmid_table, int gmmid_nr) { kvm_nid_t *nid; - nid = kvm_find_nid(gmmid_table, gmmid_nr, gmmid_hashfn(gmmid_nr)); + nid = kvm_try_find_nid(gmmid_table, gmmid_nr, gmmid_hashfn(gmmid_nr)); if (nid == NULL) return NULL; return gmmid_entry(nid); diff --git a/arch/e2k/include/asm/kvm/mmu.h b/arch/e2k/include/asm/kvm/mmu.h index a7c714f..18f0820 100644 --- a/arch/e2k/include/asm/kvm/mmu.h +++ b/arch/e2k/include/asm/kvm/mmu.h @@ -93,6 +93,10 @@ static inline bool is_spt_paging(struct kvm_vcpu *vcpu) } static inline bool is_hv_paging(struct kvm_vcpu *vcpu) { +#ifdef CONFIG_VIRTUALIZATION + if (current_thread_info()->vcpu != vcpu) + return is_paging_flag(vcpu); +#endif if (vcpu->arch.mmu.is_paging == NULL) return is_paging_flag(vcpu); diff --git a/arch/e2k/include/asm/kvm/mmu_regs_access.h b/arch/e2k/include/asm/kvm/mmu_regs_access.h index 3b78f6d..f4f5678 100644 --- a/arch/e2k/include/asm/kvm/mmu_regs_access.h +++ b/arch/e2k/include/asm/kvm/mmu_regs_access.h @@ -245,15 +245,13 @@ static inline mmu_reg_t KVM_READ_DTLB_REG(tlb_addr_t tlb_addr) static inline void KVM_FLUSH_TLB_ENTRY(flush_op_t flush_op, flush_addr_t flush_addr) { - if (IS_HV_GM()) { - /* FIXME: guest should fully control own PTs including */ - /* all hardware MMU registers, but it is not so now, */ - /* for example PT roots and context registers are controled */ - /* by hypervisor as for paravirtualized kernels */ - native_flush_TLB_all(); - } else if (IS_ENABLED(CONFIG_KVM_PARAVIRT_TLB_FLUSH)) { - HYPERVISOR_flush_tlb_range(flush_addr_get_va(flush_addr), - flush_addr_get_va(flush_addr)); + if (unlikely(flush_addr_get_pid(flush_addr) == E2K_KERNEL_CONTEXT)) { + pr_warn("%s(): CPU #%d try to flush %s addr 0x%lx pid 0x%03lx\n", + __func__, smp_processor_id(), + (flush_op_get_type(flush_op) == flush_op_tlb_page_sys) ? + "TLB page" : "???", + flush_addr_get_va(flush_addr), + flush_addr_get_pid(flush_addr)); } } @@ -351,11 +349,8 @@ KVM_FLUSH_CACHE_L12(flush_op_t flush_op) static inline void KVM_FLUSH_TLB_ALL(flush_op_t flush_op) { - if (IS_HV_GM()) { - native_flush_TLB_all(); - } else if (IS_ENABLED(CONFIG_KVM_PARAVIRT_TLB_FLUSH)) { - HYPERVISOR_flush_tlb_range(0, E2K_VA_SIZE); - } + pr_warn_once("%s(): try to flush all TLB : op 0x%lx\n", + __func__, flush_op); } /* diff --git a/arch/e2k/include/asm/kvm/nid.h b/arch/e2k/include/asm/kvm/nid.h index 9b9a880..9ca63f0 100644 --- a/arch/e2k/include/asm/kvm/nid.h +++ b/arch/e2k/include/asm/kvm/nid.h @@ -45,8 +45,24 @@ extern void kvm_do_free_nid(kvm_nid_t *nid, struct kvm_nid_table *nid_table); extern void kvm_free_nid(kvm_nid_t *nid, struct kvm_nid_table *nid_table); extern int kvm_nidmap_init(struct kvm_nid_table *nid_table, int nid_max_limit, int reserved_nids, int last_nid); +extern void kvm_nidmap_reset(struct kvm_nid_table *nid_table, int last_nid); extern void kvm_nidmap_destroy(struct kvm_nid_table *nid_table); +static inline kvm_nid_t * +kvm_do_find_nid(struct kvm_nid_table *nid_table, int nid_nr, int hash_index) +{ + kvm_nid_t *nid; + + hlist_for_each_entry(nid, + &(nid_table->nid_hash[hash_index]), + nid_chain) { + if (nid->nr == nid_nr) { + return nid; + } + } + return NULL; +} + static inline kvm_nid_t * kvm_find_nid(struct kvm_nid_table *nid_table, int nid_nr, int hash_index) { @@ -54,17 +70,24 @@ kvm_find_nid(struct kvm_nid_table *nid_table, int nid_nr, int hash_index) unsigned long flags; raw_spin_lock_irqsave(&nid_table->nidmap_lock, flags); - hlist_for_each_entry(nid, - &(nid_table->nid_hash[hash_index]), - nid_chain) { - if (nid->nr == nid_nr) { - raw_spin_unlock_irqrestore(&nid_table->nidmap_lock, - flags); - return nid; - } - } + nid = kvm_do_find_nid(nid_table, nid_nr, hash_index); raw_spin_unlock_irqrestore(&nid_table->nidmap_lock, flags); - return NULL; + return nid; +} + +static inline kvm_nid_t * +kvm_try_find_nid(struct kvm_nid_table *nid_table, int nid_nr, int hash_index) +{ + kvm_nid_t *nid; + unsigned long flags; + bool locked; + + locked = raw_spin_trylock_irqsave(&nid_table->nidmap_lock, flags); + nid = kvm_do_find_nid(nid_table, nid_nr, hash_index); + if (likely(locked)) { + raw_spin_unlock_irqrestore(&nid_table->nidmap_lock, flags); + } + return nid; } #define for_each_guest_nid_node(node, entry, next, nid_table, \ @@ -75,6 +98,8 @@ kvm_find_nid(struct kvm_nid_table *nid_table, int nid_nr, int hash_index) nid_hlist_member) #define nid_table_lock(nid_table) \ raw_spin_lock(&(nid_table)->nidmap_lock) +#define nid_table_trylock(nid_table) \ + raw_spin_trylock(&(nid_table)->nidmap_lock) #define nid_table_unlock(nid_table) \ raw_spin_unlock(&(nid_table)->nidmap_lock) #define nid_table_lock_irq(nid_table) \ diff --git a/arch/e2k/include/asm/kvm/page_track.h b/arch/e2k/include/asm/kvm/page_track.h index 8fd16ef..ee0bc9e 100644 --- a/arch/e2k/include/asm/kvm/page_track.h +++ b/arch/e2k/include/asm/kvm/page_track.h @@ -1,12 +1,22 @@ #ifndef _ASM_E2K_KVM_PAGE_TRACK_H #define _ASM_E2K_KVM_PAGE_TRACK_H -#ifdef CONFIG_KVM_HV_MMU enum kvm_page_track_mode { KVM_PAGE_TRACK_WRITE, KVM_PAGE_TRACK_MAX, }; +/* + * @flags argument of track_write() function to clarify the possible + * reason fow writing at protected area + */ +#define THP_INVALIDATE_WR_TRACK 0x0001UL /* to invalidate PT huge */ + /* entry at THP mode */ +#define NUMA_BALANCING_WR_TRACK 0x0010UL /* to migrate from one */ + /* NUMA node to other */ + +#ifdef CONFIG_KVM_HV_MMU + /* * The notifier represented by @kvm_page_track_notifier_node is linked into * the head which will be notified when guest is triggering the track event. @@ -32,7 +42,7 @@ struct kvm_page_track_notifier_node { * @bytes: the written length. */ void (*track_write)(struct kvm_vcpu *vcpu, struct gmm_struct *gmm, - gpa_t gpa, const u8 *new, int bytes); + gpa_t gpa, const u8 *new, int bytes, unsigned long flags); /* * It is called when memory slot is being moved or removed * users can drop write-protection for the pages in that memory slot @@ -69,7 +79,7 @@ void kvm_page_track_unregister_notifier(struct kvm *kvm, struct kvm_page_track_notifier_node *n); void kvm_page_track_write(struct kvm_vcpu *vcpu, struct gmm_struct *gmm, - gpa_t gpa, const u8 *new, int bytes); + gpa_t gpa, const u8 *new, int bytes, unsigned long flags); void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot); #else /* ! CONFIG_KVM_HV_MMU */ static inline void kvm_page_track_init(struct kvm *kvm) diff --git a/arch/e2k/include/asm/kvm/process.h b/arch/e2k/include/asm/kvm/process.h index 2c52152..ed09d9d 100644 --- a/arch/e2k/include/asm/kvm/process.h +++ b/arch/e2k/include/asm/kvm/process.h @@ -16,12 +16,11 @@ #include extern void kvm_clear_host_thread_info(thread_info_t *ti); -extern gthread_info_t *create_guest_start_thread_info(struct kvm_vcpu *vcpu); extern int kvm_resume_vm_thread(void); extern int kvm_correct_guest_trap_return_ip(unsigned long return_ip); -extern long return_pv_vcpu_syscall_fork(void); +extern long return_pv_vcpu_syscall_fork(u64 sys_rval); /* * Is the CPU at guest Hardware Virtualized mode @@ -235,16 +234,7 @@ host_exit_to_usermode_loop(struct pt_regs *regs, bool syscall, bool has_signal) schedule(); } - if (has_signal) { - /* - * This is guest VCPU interception emulation, but - * there is (are) pending signal for host VCPU mode, - * so it need switch to host VCPU mode to handle - * signal and probably to kill VM - */ - WRITE_PSR_IRQ_BARRIER(AW(E2K_KERNEL_PSR_DISABLED)); - pv_vcpu_switch_to_host_from_intc(current_thread_info()); - } else if (likely(guest_trap_pending(current_thread_info()))) { + if (likely(guest_trap_pending(current_thread_info()))) { /* * This is guest VCPU interception emulation and * there is (are) the guest trap(s) to handle @@ -266,6 +256,16 @@ host_exit_to_usermode_loop(struct pt_regs *regs, bool syscall, bool has_signal) } WRITE_PSR_IRQ_BARRIER(AW(E2K_KERNEL_PSR_DISABLED)); + + if (has_signal) { + /* + * This is guest VCPU interception emulation, but + * there is (are) pending signal for host VCPU mode, + * so it need switch to host VCPU mode to handle + * signal and probably to kill VM + */ + pv_vcpu_switch_to_host_from_intc(current_thread_info()); + } } #ifdef CONFIG_SMP @@ -355,7 +355,7 @@ host_exit_to_usermode_loop(struct pt_regs *regs, bool syscall, bool has_signal) #define RESTORE_GUEST_KERNEL_GREGS_COPY(__ti, __gti, __vcpu) \ ({ \ kernel_gregs_t *k_gregs = &(__ti)->k_gregs; \ - kernel_gregs_t *g_gregs = &(__gti)->gu_gregs; \ + kernel_gregs_t *g_gregs = &(__gti)->gk_gregs; \ \ RESTORE_GUEST_KERNEL_GREGS_COPY_FROM(k_gregs, g_gregs, true); \ INIT_HOST_VCPU_STATE_GREG_COPY(__ti, __vcpu); \ diff --git a/arch/e2k/include/asm/kvm/ptrace.h b/arch/e2k/include/asm/kvm/ptrace.h index 6471832..8ceb53a 100644 --- a/arch/e2k/include/asm/kvm/ptrace.h +++ b/arch/e2k/include/asm/kvm/ptrace.h @@ -53,12 +53,6 @@ typedef enum inject_caller { #ifdef __KERNEL__ -/* some global registers are used to support virtualization mode */ -/* (see usage and real numbers at asm/glob_regs.h) */ -typedef struct host_gregs { - struct e2k_greg g[HOST_KERNEL_GREGS_PAIRS_SIZE]; -} host_gregs_t; - /* * We could check CR.pm and TIR.ip here, but that is not needed * because whenever CR.pm = 1 or TIR.ip < TASK_SIZE, SBR points diff --git a/arch/e2k/include/asm/kvm/pv-emul.h b/arch/e2k/include/asm/kvm/pv-emul.h index 02de005..81479d0 100644 --- a/arch/e2k/include/asm/kvm/pv-emul.h +++ b/arch/e2k/include/asm/kvm/pv-emul.h @@ -134,6 +134,15 @@ static inline bool kvm_vcpu_in_hypercall(struct kvm_vcpu *vcpu) return vcpu->arch.sw_ctxt.in_hypercall; } +static inline void pv_vcpu_clear_gti(struct kvm_vcpu *vcpu) +{ + if (likely(!vcpu->arch.is_hv && vcpu->arch.is_pv)) { + vcpu->arch.gti = NULL; + } else { + KVM_BUG_ON(true); + } +} + static inline gthread_info_t *pv_vcpu_get_gti(struct kvm_vcpu *vcpu) { if (likely(!vcpu->arch.is_hv && vcpu->arch.is_pv)) { @@ -168,6 +177,21 @@ static inline gmm_struct_t *pv_mmu_get_init_gmm(struct kvm *kvm) return kvm->arch.init_gmm; } +static inline void pv_mmu_clear_init_gmm(struct kvm *kvm) +{ + kvm->arch.init_gmm = NULL; +} + +static inline bool pv_mmu_is_init_gmm(struct kvm *kvm, gmm_struct_t *gmm) +{ + if (likely(!kvm->arch.is_hv && kvm->arch.is_pv)) { + return gmm == pv_mmu_get_init_gmm(kvm); + } else { + KVM_BUG_ON(true); + } + return false; +} + static inline gmm_struct_t *pv_vcpu_get_init_gmm(struct kvm_vcpu *vcpu) { return pv_mmu_get_init_gmm(vcpu->kvm); @@ -175,12 +199,7 @@ static inline gmm_struct_t *pv_vcpu_get_init_gmm(struct kvm_vcpu *vcpu) static inline bool pv_vcpu_is_init_gmm(struct kvm_vcpu *vcpu, gmm_struct_t *gmm) { - if (likely(!vcpu->arch.is_hv && vcpu->arch.is_pv)) { - return gmm == pv_vcpu_get_init_gmm(vcpu); - } else { - KVM_BUG_ON(true); - } - return false; + return pv_mmu_is_init_gmm(vcpu->kvm, gmm); } static inline void pv_vcpu_clear_gmm(struct kvm_vcpu *vcpu) @@ -244,6 +263,11 @@ static inline mm_context_t *pv_vcpu_get_gmm_context(struct kvm_vcpu *vcpu) return &pv_vcpu_get_gmm(vcpu)->context; } +static inline cpumask_t *pv_vcpu_get_gmm_cpumask(struct kvm_vcpu *vcpu) +{ + return gmm_cpumask(pv_vcpu_get_gmm(vcpu)); +} + #else /* !CONFIG_VIRTUALIZATION */ static __always_inline void kvm_set_intc_emul_flag(pt_regs_t *regs) diff --git a/arch/e2k/include/asm/kvm/regs_state.h b/arch/e2k/include/asm/kvm/regs_state.h index 01970e1..2b4432a 100644 --- a/arch/e2k/include/asm/kvm/regs_state.h +++ b/arch/e2k/include/asm/kvm/regs_state.h @@ -254,22 +254,6 @@ } \ } \ }) -#define RESTORE_GUEST_USER_REGS_AT_TI(thread_info, gthread_info, restore_upsr) \ -({ \ - thread_info_t *__ti = (thread_info); \ - gthread_info_t *__gti = (gthread_info); \ - host_gregs_t *__greg_pair = &__ti->h_gregs; \ - global_regs_t *__gregs = &__gti->gregs; \ - \ - if (test_ti_thread_flag(__ti, TIF_VIRTUALIZED_GUEST)) { \ - RESTORE_GUEST_KERNEL_GREGS_AT_TI(__ti, __gti, __gregs); \ - RESTORE_GUEST_HOST_GREGS_AT_TI(__greg_pair->g, \ - __gregs->g); \ - if (restore_upsr) { \ - RESTORE_GUEST_USER_UPSR_AT_TI(__ti, __gti); \ - } \ - } \ -}) #define KVM_INIT_GUEST_USER_UPSR(thread_info, __upsr) \ ({ \ thread_info_t *__ti = (thread_info); \ diff --git a/arch/e2k/include/asm/kvm/stacks.h b/arch/e2k/include/asm/kvm/stacks.h index 6c237d4..454b098 100644 --- a/arch/e2k/include/asm/kvm/stacks.h +++ b/arch/e2k/include/asm/kvm/stacks.h @@ -13,7 +13,8 @@ /* * Guest kernel thread stacks descriptions */ -#define VIRT_KERNEL_C_STACK_SIZE KVM_GUEST_KERNEL_C_STACK_SIZE +#define VIRT_KERNEL_C_STACK_SIZE \ + (KVM_GUEST_KERNEL_C_STACK_SIZE + KVM_GUEST_KERNEL_C_STACK_OFFSET) #define VIRT_KERNEL_PS_SIZE KVM_GUEST_KERNEL_PS_SIZE #define VIRT_KERNEL_PS_INIT_SIZE KVM_GUEST_KERNEL_PS_INIT_SIZE #define VIRT_KERNEL_PCS_SIZE KVM_GUEST_KERNEL_PCS_SIZE diff --git a/arch/e2k/include/asm/kvm/switch.h b/arch/e2k/include/asm/kvm/switch.h index 92c0b0b..b416e9f 100644 --- a/arch/e2k/include/asm/kvm/switch.h +++ b/arch/e2k/include/asm/kvm/switch.h @@ -216,19 +216,36 @@ static inline void kvm_switch_mmu_tc_regs(struct kvm_sw_cpu_context *sw_ctxt) static inline void kvm_switch_mmu_regs(struct kvm_sw_cpu_context *sw_ctxt, bool switch_tc) { - kvm_switch_mmu_pt_regs(sw_ctxt); - if (switch_tc) + if (likely(!sw_ctxt->no_switch_pt)) { + kvm_switch_mmu_pt_regs(sw_ctxt); + } + if (switch_tc) { kvm_switch_mmu_tc_regs(sw_ctxt); + } } static inline void kvm_switch_to_guest_mmu_pid(struct kvm_vcpu *vcpu) { mm_context_t *gmm_context; - unsigned long mask; + unsigned long mask, flags; + int cpu = raw_smp_processor_id(); + if (unlikely(vcpu->arch.sw_ctxt.no_switch_pt)) { + copy_user_pgd_to_kernel_root_pt( + (pgd_t *)__va(kvm_get_space_type_spt_u_root(vcpu))); + } + raw_all_irq_save(flags); gmm_context = pv_vcpu_get_gmm_context(vcpu); - mask = get_mmu_pid(gmm_context, smp_processor_id()); +#ifdef CONFIG_SMP + /* Stop flush ipis for the guest mm */ + cpumask_set_cpu(cpu, pv_vcpu_get_gmm_cpumask(vcpu)); + /* This barrier could be smp_mb__after_atomic() ..., */ + /* see arch/e2k/iclude/asm/mmu_context.h */ + smp_mb(); +#endif /* CONFIG_SMP */ + mask = get_mmu_pid(gmm_context, cpu); reload_context_mask(mask); + raw_all_irq_restore(flags); } static inline unsigned long kvm_get_guest_mmu_pid(struct kvm_vcpu *vcpu) @@ -239,12 +256,26 @@ static inline unsigned long kvm_get_guest_mmu_pid(struct kvm_vcpu *vcpu) return gmm_context->cpumsk[smp_processor_id()]; } -static inline void kvm_switch_to_host_mmu_pid(struct mm_struct *mm) +static inline void kvm_switch_to_host_mmu_pid(struct kvm_vcpu *vcpu, + struct mm_struct *mm) { - unsigned long mask; + unsigned long mask, flags; + int cpu = raw_smp_processor_id(); - mask = get_mmu_context(mm, smp_processor_id()); + if (unlikely(vcpu->arch.sw_ctxt.no_switch_pt)) { + copy_user_pgd_to_kernel_root_pt(mm->pgd); + } + raw_all_irq_save(flags); +#ifdef CONFIG_SMP + /* Start receiving flush ipis for the guest mm */ + cpumask_clear_cpu(cpu, pv_vcpu_get_gmm_cpumask(vcpu)); + /* This barrier could be smp_mb__after_atomic() ..., */ + /* see arch/e2k/iclude/asm/mmu_context.h */ + smp_mb(); +#endif /* CONFIG_SMP */ + mask = get_mmu_context(mm, cpu); reload_context_mask(mask); + raw_all_irq_restore(flags); } static inline void kvm_switch_debug_regs(struct kvm_sw_cpu_context *sw_ctxt, @@ -746,20 +777,6 @@ pv_vcpu_switch_guest_host_context(struct kvm_vcpu *vcpu, pv_vcpu_restore_host_context(vcpu, next_gti); } -static inline void -pv_vcpu_switch_kernel_pgd_range(struct kvm_vcpu *vcpu, int cpu) -{ - hpa_t vcpu_root; - - if (is_sep_virt_spaces(vcpu)) { - vcpu_root = kvm_get_space_type_spt_os_root(vcpu); - } else { - vcpu_root = kvm_get_space_type_spt_u_root(vcpu); - } - - copy_kernel_pgd_range(__va(vcpu_root), the_cpu_pg_dir(cpu)); -} - static inline void pv_vcpu_switch_host_context(struct kvm_vcpu *vcpu) { kvm_host_context_t *host_ctxt = &vcpu->arch.host_ctxt; diff --git a/arch/e2k/include/asm/kvm/thread_info.h b/arch/e2k/include/asm/kvm/thread_info.h index 943ad33..1284da2 100644 --- a/arch/e2k/include/asm/kvm/thread_info.h +++ b/arch/e2k/include/asm/kvm/thread_info.h @@ -147,10 +147,10 @@ typedef struct gthread_info { vcpu_l_gregs_t l_gregs; /* guest user "local" global */ /* registers to save updated on page */ /* fault values */ - kernel_gregs_t gk_gregs; /* guest kernel global resiters state */ + kernel_gregs_t gk_gregs; /* guest kernel global registers state */ /* some registers can be updated only */ /* after migration to other VCPU */ - kernel_gregs_t gu_gregs; /* guest user global resiters state */ + kernel_gregs_t gu_gregs; /* guest user global regitsers state */ /* only for global registers which */ /* used by the guest kernel for its */ /* own purposes */ @@ -332,6 +332,20 @@ static inline int test_gti_thread_flag(gthread_info_t *gti, int flag) return test_the_flag(>i->flags, flag); } +#define gti_signal_pt_regs_first(__gti) \ +({ \ + struct pt_regs __user *__sig_regs; \ + if (__gti->signal.stack.used) { \ + __sig_regs = &((struct signal_stack_context __user *) \ + (__gti->signal.stack.base + \ + __gti->signal.stack.used - \ + sizeof(struct signal_stack_context)))->regs; \ + } else { \ + __sig_regs = NULL; \ + } \ + __sig_regs; \ +}) + /* * Hardware stacks bounds control */ @@ -534,6 +548,8 @@ get_next_gpt_regs(thread_info_t *ti, gpt_regs_t *gregs) } extern int kvm_pv_guest_thread_info_init(struct kvm *kvm); +extern void kvm_pv_guest_thread_info_reset(struct kvm *kvm); +extern void kvm_pv_guest_thread_info_free(struct kvm *kvm); extern void kvm_pv_guest_thread_info_destroy(struct kvm *kvm); extern void kvm_pv_clear_guest_thread_info(gthread_info_t *gthread_info); diff --git a/arch/e2k/include/asm/kvm/tlbflush.h b/arch/e2k/include/asm/kvm/tlbflush.h index aa86001..8b7372f 100644 --- a/arch/e2k/include/asm/kvm/tlbflush.h +++ b/arch/e2k/include/asm/kvm/tlbflush.h @@ -7,8 +7,9 @@ #define _E2K_KVM_TLBFLUSH_H #include -#include +#include +#include /* * Guest VM support on host @@ -20,146 +21,86 @@ * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables */ -#ifndef CONFIG_VIRTUALIZATION -/* it is native kernel without any virtualization */ -static __always_inline bool -__flush_guest_cpu_root_pt_page(struct vm_area_struct *vma, e2k_addr_t addr) -{ - return false; /* none any guests and guest addresses */ -} -static __always_inline bool -__flush_guest_cpu_root_pt_range(struct mm_struct *mm, - e2k_addr_t start, e2k_addr_t end) -{ - return false; /* none any guests and guest addresses */ -} -static __always_inline bool -__flush_guest_cpu_root_pt_mm(struct mm_struct *mm) -{ - return false; /* none any guests and guest addresses */ -} -static __always_inline bool -__flush_guest_cpu_root_pt(void) -{ - return false; /* none any guests and guest addresses */ -} -#else /* CONFIG_VIRTUALIZATION */ -extern void kvm_flush_guest_tlb_mm(struct gmm_struct *gmm); -extern void kvm_flush_guest_tlb_page(struct gmm_struct *gmm, e2k_addr_t addr); -extern void kvm_flush_guest_tlb_range(struct gmm_struct *gmm, - e2k_addr_t start, e2k_addr_t end); -extern void kvm_flush_guest_tlb_pgtables(struct gmm_struct *gmm, - e2k_addr_t start, e2k_addr_t end); -extern void kvm_flush_guest_tlb_range_and_pgtables(struct gmm_struct *gmm, - e2k_addr_t start, e2k_addr_t end); +extern void mmu_pv_flush_tlb_address(struct kvm_vcpu *vcpu, gmm_struct_t *gmm, + e2k_addr_t addr); +extern void mmu_pv_flush_tlb_address_pgtables(struct kvm_vcpu *vcpu, + gmm_struct_t *gmm, + e2k_addr_t addr); +extern void mmu_pv_flush_tlb_page(struct kvm_vcpu *vcpu, gmm_struct_t *gmm, + e2k_addr_t addr); +extern void mmu_pv_flush_tlb_mm(struct kvm_vcpu *vcpu, gmm_struct_t *gmm); +extern void mmu_pv_flush_tlb_range(struct kvm_vcpu *vcpu, gmm_struct_t *gmm, + const e2k_addr_t start, const e2k_addr_t end); +extern void mmu_pv_flush_pmd_tlb_range(struct kvm_vcpu *vcpu, gmm_struct_t *gmm, + unsigned long start, unsigned long end); +extern void mmu_pv_flush_tlb_kernel_range(struct kvm_vcpu *vcpu, gmm_struct_t *gmm, + const e2k_addr_t start, const e2k_addr_t end); +extern void mmu_pv_flush_tlb_pgtables(struct kvm_vcpu *vcpu, gmm_struct_t *gmm, + e2k_addr_t start, e2k_addr_t end); +extern void mmu_pv_flush_tlb_range_and_pgtables(struct kvm_vcpu *vcpu, + gmm_struct_t *gmm, + e2k_addr_t start, e2k_addr_t end); +extern void mmu_pv_flush_tlb_page_and_pgtables(struct kvm_vcpu *vcpu, + gmm_struct_t *gmm, + unsigned long address); +extern void mmu_pv_flush_cpu_root_pt_mm(struct kvm_vcpu *vcpu, gmm_struct_t *gmm); +extern void mmu_pv_flush_cpu_root_pt(struct kvm_vcpu *vcpu); -/* - * Functions to flush guest CPU root PT on host should return boolean value: - * true if address or MM is from guest VM space and flushing was done - * false if address or MM is not from guest VM space or flushing cannot - * be done - */ -extern bool kvm_do_flush_guest_cpu_root_pt_page(struct vm_area_struct *vma, - e2k_addr_t addr); -extern bool kvm_do_flush_guest_cpu_root_pt_range(struct mm_struct *mm, - e2k_addr_t start, e2k_addr_t end); -extern bool kvm_do_flush_guest_cpu_root_pt_mm(struct mm_struct *mm); -extern bool kvm_do_flush_guest_cpu_root_pt(void); +extern long kvm_pv_sync_and_flush_tlb(struct kvm_vcpu *vcpu, + mmu_spt_flush_t __user *flush_user); +extern long kvm_pv_sync_addr_range(struct kvm_vcpu *vcpu, + gva_t start_gva, gva_t end_gva); -static inline bool -kvm_flush_guest_cpu_root_pt_page(struct vm_area_struct *vma, e2k_addr_t addr) -{ - if (MMU_IS_SEPARATE_PT()) { - /* cannot be any CPU root PTs */ - return false; - } else if (!test_thread_flag(TIF_VIRTUALIZED_GUEST)) { - /* it is not guest VCPU process on host */ - /* so cannot have guest VM */ - return false; - } else if (paravirt_enabled()) { - /* it is guest process on guest and guest has not own guests */ - return false; - } - return kvm_do_flush_guest_cpu_root_pt_page(vma, addr); -} +extern void mmu_pv_smp_flush_tlb_mm(struct kvm_vcpu *vcpu, gmm_struct_t *gmm); +extern void mmu_pv_smp_flush_tlb_page(struct kvm_vcpu *vcpu, gmm_struct_t *gmm, + e2k_addr_t addr); +extern void mmu_pv_smp_flush_tlb_range(struct kvm_vcpu *vcpu, gmm_struct_t *gmm, + e2k_addr_t start, e2k_addr_t end); +extern void mmu_pv_smp_flush_pmd_tlb_range(struct kvm_vcpu *vcpu, gmm_struct_t *gmm, + e2k_addr_t start, e2k_addr_t end); +extern void mmu_pv_smp_flush_tlb_kernel_range(struct kvm_vcpu *vcpu, + gmm_struct_t *gmm, + e2k_addr_t start, e2k_addr_t end); +extern void mmu_pv_smp_flush_tlb_range_and_pgtables(struct kvm_vcpu *vcpu, + gmm_struct_t *gmm, + e2k_addr_t start, e2k_addr_t end); -static inline bool -kvm_flush_guest_cpu_root_pt_range(struct mm_struct *mm, - e2k_addr_t start, e2k_addr_t end) -{ - if (MMU_IS_SEPARATE_PT()) { - /* cannot be any CPU root PTs */ - return false; - } else if (!test_thread_flag(TIF_VIRTUALIZED_GUEST)) { - /* it is not guest VCPU process on host */ - /* so cannot have guest VM */ - return false; - } else if (paravirt_enabled()) { - /* it is guest process on guest and guest has not own guests */ - return false; - } - return kvm_do_flush_guest_cpu_root_pt_range(mm, start, end); -} +extern void host_flush_shadow_pt_tlb_range(struct kvm_vcpu *vcpu, + gva_t start, gva_t end, pgprot_t spte, int level); -static inline bool -kvm_flush_guest_cpu_root_pt_mm(struct mm_struct *mm) -{ - if (MMU_IS_SEPARATE_PT()) { - /* cannot be any CPU root PTs */ - return false; - } else if (!test_thread_flag(TIF_VIRTUALIZED_GUEST)) { - /* it is not guest VCPU process on host */ - /* so cannot have guest VM */ - return false; - } else if (paravirt_enabled()) { - /* it is guest process on guest and guest has not own guests */ - return false; - } - return kvm_do_flush_guest_cpu_root_pt_mm(mm); -} +extern void host_flush_shadow_pt_level_tlb(struct kvm *kvm, gmm_struct_t *gmm, + gva_t gva, int level, pgprot_t new_spte, pgprot_t old_spte); -static inline bool -kvm_flush_guest_cpu_root_pt(void) -{ - if (MMU_IS_SEPARATE_PT()) { - /* cannot be any CPU root PTs */ - return false; - } else if (!test_thread_flag(TIF_VIRTUALIZED_GUEST)) { - /* it is not guest VCPU process on host */ - /* so cannot have guest VM */ - return false; - } else if (paravirt_enabled()) { - /* it is guest process on guest and guest has not own guests */ - return false; - } - return kvm_do_flush_guest_cpu_root_pt(); -} +#ifndef CONFIG_SMP +#define host_flush_tlb_mm(vcpu, gmm) \ + mmu_pv_flush_tlb_mm(vcpu, gmm) +#define host_flush_tlb_page(vcpu, gmm, addr) \ + mmu_pv_flush_tlb_page(vcpu, gmm, addr) +#define host_flush_tlb_range(vcpu, gmm, start, end) \ + mmu_pv_flush_tlb_range(vcpu, gmm, start, end) +#define host_flush_pmd_tlb_range(vcpu, gmm, start, end) \ + mmu_pv_flush_pmd_tlb_range(vcpu, gmm, start, end) +#define host_flush_tlb_mm_range(vcpu, gmm, start, end) \ + mmu_pv_flush_tlb_range(vcpu, gmm, start, end) +#define host_flush_tlb_kernel_range(vcpu, gmm, start, end) \ + mmu_pv_flush_tlb_kernel_range(vcpu, gmm, start, end) +#define host_flush_tlb_range_and_pgtables(vcpu, gmm, start, end) \ + mmu_pv_flush_tlb_range_and_pgtables(vcpu, gmm, start, end) +#else /* CONFIG_SMP */ +#define host_flush_tlb_mm(vcpu, gmm) \ + mmu_pv_smp_flush_tlb_mm(vcpu, gmm) +#define host_flush_tlb_page(vcpu, gmm, addr) \ + mmu_pv_smp_flush_tlb_page(vcpu, gmm, addr) +#define host_flush_tlb_range(vcpu, gmm, start, end) \ + mmu_pv_smp_flush_tlb_range(vcpu, gmm, start, end) +#define host_flush_pmd_tlb_range(vcpu, gmm, start, end) \ + mmu_pv_smp_flush_pmd_tlb_range(vcpu, gmm, start, end) +#define host_flush_tlb_kernel_range(vcpu, gmm, start, end) \ + mmu_pv_smp_flush_tlb_kernel_range(vcpu, gmm, start, end) +#define host_flush_tlb_mm_range(vcpu, gmm, start, end) \ + mmu_pv_smp_flush_tlb_range(vcpu, gmm, start, end) +#define host_flush_tlb_range_and_pgtables(vcpu, gmm, start, end) \ + mmu_pv_smp_flush_tlb_range_and_pgtables(vcpu, gmm, start, end) +#endif /* !CONFIG_SMP */ -#ifndef CONFIG_KVM_GUEST_KERNEL -/* it is native host kernel with virtualization support */ -/* or it is paravirtualized host/guest kernel */ -static inline bool -__flush_guest_cpu_root_pt_page(struct vm_area_struct *vma, e2k_addr_t addr) -{ - return kvm_flush_guest_cpu_root_pt_page(vma, addr); -} -static inline bool -__flush_guest_cpu_root_pt_range(struct mm_struct *mm, - e2k_addr_t start, e2k_addr_t end) -{ - return kvm_flush_guest_cpu_root_pt_range(mm, start, end); -} -static inline bool -__flush_guest_cpu_root_pt_mm(struct mm_struct *mm) -{ - return kvm_flush_guest_cpu_root_pt_mm(mm); -} -static inline bool -__flush_guest_cpu_root_pt(void) -{ - return kvm_flush_guest_cpu_root_pt(); -} - -#endif /* ! CONFIG_KVM_GUEST_KERNEL */ -#endif /* ! CONFIG_VIRTUALIZATION */ #endif /* _E2K_KVM_TLBFLUSH_H */ diff --git a/arch/e2k/include/asm/kvm/trace_kvm_hv.h b/arch/e2k/include/asm/kvm/trace_kvm_hv.h index 0a3de33..63d422c 100644 --- a/arch/e2k/include/asm/kvm/trace_kvm_hv.h +++ b/arch/e2k/include/asm/kvm/trace_kvm_hv.h @@ -88,10 +88,9 @@ TRACE_EVENT( intc, - TP_PROTO(const struct kvm_intc_cpu_context *intc_ctxt, - u64 guest_ip, u64 cpu), + TP_PROTO(const struct kvm_intc_cpu_context *intc_ctxt), - TP_ARGS(intc_ctxt, guest_ip, cpu), + TP_ARGS(intc_ctxt), TP_STRUCT__entry( __field( int, cu_num ) @@ -99,8 +98,6 @@ TRACE_EVENT( __field( u64, cu_hdr_lo ) __array( u64, cu, INTC_INFO_CU_ENTRY_MAX ) __array( u64, mu, INTC_INFO_MU_MAX ) - __field( u64, guest_ip ) - __field( u64, cpu ) ), TP_fast_assign( @@ -139,12 +136,9 @@ TRACE_EVENT( AW(intc_ctxt->mu[i].mask); } } - - __entry->guest_ip = guest_ip; - __entry->cpu = cpu; ), - TP_printk("CPU#%llu, guest IP 0x%llx, cu_num %d, mu_num %d\n" + TP_printk("cu_num %d, mu_num %d\n" "CU header: %s (0x%llx)\n" "CU entry0: %s (0x%llx 0x%llx)\n" "CU entry1: %s (0x%llx 0x%llx)\n" @@ -160,7 +154,6 @@ TRACE_EVENT( "MU entry9: %s (0x%llx), 0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx\n" "MU entry10: %s (0x%llx), 0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx\n" , - __entry->cpu, __entry->guest_ip, __entry->cu_num, __entry->mu_num, (__entry->cu_num >= 0) ? E2K_TRACE_PRINT_CU_HDR_LO(__entry->cu_hdr_lo) : "(none)", @@ -183,14 +176,12 @@ TRACE_EVENT( TRACE_EVENT( single_mu_intc, - TP_PROTO(const intc_info_mu_t *mu, u64 guest_ip, u64 cpu), + TP_PROTO(const intc_info_mu_t *mu), - TP_ARGS(mu, guest_ip, cpu), + TP_ARGS(mu), TP_STRUCT__entry( __array( u64, mu, INTC_INFO_MU_ITEM_SIZE ) - __field( u64, guest_ip ) - __field( u64, cpu ) ), TP_fast_assign( @@ -201,27 +192,21 @@ TRACE_EVENT( __entry->mu[4] = AW(mu[0].condition); __entry->mu[5] = mu[0].data_ext; __entry->mu[6] = AW(mu[0].mask); - __entry->guest_ip = guest_ip; - __entry->cpu = cpu; ), - TP_printk("CPU#%llu, guest IP: 0x%llx\n" - "MU entry0: %s (0x%llx), 0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx\n", - __entry->cpu, __entry->guest_ip, + TP_printk("MU entry0: %s (0x%llx), 0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx\n", E2K_PRINT_INTC_MU_ENTRY(__entry, 1, 0)) ); TRACE_EVENT( double_mu_intc, - TP_PROTO(const intc_info_mu_t *mu, u64 guest_ip, u64 cpu), + TP_PROTO(const intc_info_mu_t *mu), - TP_ARGS(mu, guest_ip, cpu), + TP_ARGS(mu), TP_STRUCT__entry( __array( u64, mu, 2 * INTC_INFO_MU_ITEM_SIZE ) - __field( u64, guest_ip ) - __field( u64, cpu ) ), TP_fast_assign( @@ -242,14 +227,10 @@ TRACE_EVENT( __entry->mu[7 * i + 6] = AW(mu[i].mask); } - __entry->guest_ip = guest_ip; - __entry->cpu = cpu; ), - TP_printk("CPU#%llu, guest IP: 0x%llx\n" - "MU entry0: %s (0x%llx), 0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx\n" + TP_printk("MU entry0: %s (0x%llx), 0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx\n" "MU entry1: %s (0x%llx), 0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx\n", - __entry->cpu, __entry->guest_ip, E2K_PRINT_INTC_MU_ENTRY(__entry, 2, 0), E2K_PRINT_INTC_MU_ENTRY(__entry, 2, 1)) ); @@ -257,25 +238,19 @@ TRACE_EVENT( TRACE_EVENT( single_cu_intc, - TP_PROTO(const intc_info_cu_hdr_t cu_hdr, u64 guest_ip, u64 cpu), + TP_PROTO(const intc_info_cu_hdr_t cu_hdr), - TP_ARGS(cu_hdr, guest_ip, cpu), + TP_ARGS(cu_hdr), TP_STRUCT__entry( __field( u64, cu_hdr_lo ) - __field( u64, guest_ip ) - __field( u64, cpu ) ), TP_fast_assign( __entry->cu_hdr_lo = AW(cu_hdr.lo); - __entry->guest_ip = guest_ip; - __entry->cpu = cpu; ), - TP_printk("CPU#%llu, guest IP: 0x%llx\n" - "CU header: %s (0x%llx)\n", - __entry->cpu, __entry->guest_ip, + TP_printk("CU header: %s (0x%llx)\n", E2K_TRACE_PRINT_CU_HDR_LO(__entry->cu_hdr_lo), __entry->cu_hdr_lo) @@ -301,6 +276,60 @@ TRACE_EVENT( __entry->ret) ); +TRACE_EVENT( + intc_stacks, + + TP_PROTO(const kvm_sw_cpu_context_t *sw_ctxt, const kvm_hw_cpu_context_t *hw_ctxt, + const e2k_mem_crs_t *crs), + + TP_ARGS(sw_ctxt, hw_ctxt, crs), + + TP_STRUCT__entry( + /* Stacks */ + __field( u64, sbr ) + __field( u64, usd_lo ) + __field( u64, usd_hi ) + __field( u64, psp_lo ) + __field( u64, psp_hi ) + __field( u64, pcsp_lo ) + __field( u64, pcsp_hi ) + __field( u64, pshtp ) + __field( unsigned int, pcshtp ) + /* CRs */ + __field( u64, cr0_lo ) + __field( u64, cr0_hi ) + __field( u64, cr1_lo ) + __field( u64, cr1_hi ) + ), + + TP_fast_assign( + __entry->sbr = AW(sw_ctxt->sbr); + __entry->usd_lo = AW(sw_ctxt->usd_lo); + __entry->usd_hi = AW(sw_ctxt->usd_hi); + __entry->psp_lo = AW(hw_ctxt->sh_psp_lo); + __entry->psp_hi = AW(hw_ctxt->sh_psp_hi); + __entry->pcsp_lo = AW(hw_ctxt->sh_pcsp_lo); + __entry->pcsp_hi = AW(hw_ctxt->sh_pcsp_hi); + __entry->pshtp = AW(hw_ctxt->sh_pshtp); + __entry->pcshtp = hw_ctxt->sh_pcshtp; + __entry->cr0_lo = AW(crs->cr0_lo); + __entry->cr0_hi = AW(crs->cr0_hi); + __entry->cr1_lo = AW(crs->cr1_lo); + __entry->cr1_hi = AW(crs->cr1_hi); + ), + + TP_printk("sbr 0x%llx, usd_lo 0x%llx, usd_hi 0x%llx\n" + "sh_psp_lo 0x%llx, sh_psp_hi 0x%llx, sh_pcsp_lo 0x%llx, sh_pcsp_hi 0x%llx\n" + "sh_pshtp 0x%llx, sh_pcshtp 0x%x\n" + "cr0_lo 0x%llx, cr0_hi 0x%llx, cr1_lo 0x%llx, cr1_hi 0x%llx\n" + , + __entry->sbr, __entry->usd_lo, __entry->usd_hi, + __entry->psp_lo, __entry->psp_hi, __entry->pcsp_lo, __entry->pcsp_hi, + __entry->pshtp, __entry->pcshtp, + __entry->cr0_lo, __entry->cr0_hi, __entry->cr1_lo, __entry->cr1_hi) + +); + TRACE_EVENT( irq_delivery, diff --git a/arch/e2k/include/asm/kvm/uaccess.h b/arch/e2k/include/asm/kvm/uaccess.h index 2e86d1a..65ffd78 100644 --- a/arch/e2k/include/asm/kvm/uaccess.h +++ b/arch/e2k/include/asm/kvm/uaccess.h @@ -136,7 +136,7 @@ extern int kvm_vcpu_copy_host_from_guest(struct kvm_vcpu *vcpu, static inline int fast_tagged_memory_copy_to_user(void __user *dst, const void *src, - size_t len, const struct pt_regs *regs, + size_t len, size_t *copied, const struct pt_regs *regs, unsigned long strd_opcode, unsigned long ldrd_opcode, int prefetch) { @@ -156,7 +156,7 @@ fast_tagged_memory_copy_to_user(void __user *dst, const void *src, static inline int fast_tagged_memory_copy_from_user(void *dst, const void __user *src, - size_t len, const struct pt_regs *regs, + size_t len, size_t *copied, const struct pt_regs *regs, unsigned long strd_opcode, unsigned long ldrd_opcode, int prefetch) { diff --git a/arch/e2k/include/asm/kvm_host.h b/arch/e2k/include/asm/kvm_host.h index 28d5489..67277de 100644 --- a/arch/e2k/include/asm/kvm_host.h +++ b/arch/e2k/include/asm/kvm_host.h @@ -361,6 +361,9 @@ typedef struct kvm_mmu_page { pgprot_t *spt; gva_t gva; /* the shadow PT map guest virtual addresses from */ + /* hold the gpa of guest huge page table entry */ + /* for direct shadow page table level */ + gpa_t huge_gpt_gpa; /* hold the gfn of each spte inside spt */ gfn_t *gfns; bool unsync; @@ -390,8 +393,8 @@ typedef struct kvm_mmu_page { atomic_t write_flooding_count; #ifdef CONFIG_GUEST_MM_SPT_LIST struct list_head gmm_entry; /* entry at the gmm list of SPs */ - gmm_struct_t *gmm; /* the gmm in whose list the entry */ #endif /* CONFIG_GUEST_MM_SPT_LIST */ + gmm_struct_t *gmm; /* the gmm in whose list the entry */ } kvm_mmu_page_t; /* page fault handling results */ @@ -539,11 +542,13 @@ typedef struct kvm_mmu { void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, pgprot_t *spte, const void *pte); void (*sync_gva)(struct kvm_vcpu *vcpu, gva_t gva); - void (*sync_gva_range)(struct kvm_vcpu *vcpu, gva_t gva_start, - gva_t gva_end, bool flush_tlb); + long (*sync_gva_range)(struct kvm_vcpu *vcpu, gmm_struct_t *gmm, + gva_t gva_start, gva_t gva_end); int (*sync_page)(struct kvm_vcpu *vcpu, kvm_mmu_page_t *sp); } kvm_mmu_t; +extern void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp); + typedef struct intc_mu_state { unsigned long notifier_seq; /* 'mmu_notifier_seq' state before */ /* gfn->pfn translation */ @@ -668,6 +673,8 @@ typedef struct kvm_sw_cpu_context { e2k_mem_crs_t crs; /* only for PV guest */ + long ret_value; /* return value from hypercall to guest */ + /* * TODO here goes stuff that can be not switched * on hypercalls if we do not support calling QEMU from them @@ -692,8 +699,6 @@ typedef struct kvm_sw_cpu_context { global_regs_t host_gregs; kernel_gregs_t vcpu_k_gregs; kernel_gregs_t host_k_gregs; - host_gregs_t vcpu_h_gregs; - host_gregs_t host_h_gregs; #endif /* CONFIG_GREGS_CONTEXT */ e2k_cutd_t cutd; @@ -704,6 +709,7 @@ typedef struct kvm_sw_cpu_context { mmu_reg_t tc_hpa; /* host physical base of VCPU */ /* trap cellar */ mmu_reg_t trap_count; + bool no_switch_pt; /* do not switch PT registers */ e2k_dibcr_t dibcr; e2k_ddbcr_t ddbcr; @@ -993,9 +999,10 @@ struct kvm_vcpu_arch { struct kvm_cepic *epic; /* Hardware guest CEPIC support */ - raw_spinlock_t epic_dam_lock; /* lock to update dam_active */ - bool epic_dam_active; + raw_spinlock_t epic_dat_lock; /* lock to update dam_active */ + bool epic_dat_active; struct hrtimer cepic_idle; + ktime_t cepic_idle_start_time; int mp_state; int sipi_vector; @@ -1009,13 +1016,12 @@ struct kvm_vcpu_arch { /* spin lock/unlock */ bool unhalted; /* VCPU was woken up by pv_kick */ bool halted; /* VCPU is halted */ + bool reboot; /* VCPU is rebooted */ bool on_idle; /* VCPU is on idle waiting for some */ /* events for guest */ bool on_spinlock; /* VCPU is on slow spinlock waiting */ bool on_csd_lock; /* VCPU is waiting for csd unlocking */ /* (IPI completion) */ - bool should_stop; /* guest VCPU thread should be */ - /* stopped and completed */ bool virq_wish; /* trap 'last wish' is injection to */ /* pass pending VIRQs to guest */ bool virq_injected; /* interrupt is injected to handle */ @@ -1047,17 +1053,6 @@ struct kvm_vcpu_arch { int64_t ioport_data_size; /* max size of IO port data area */ uint32_t notifier_io; /* IO request notifier */ - bool in_exit_req; /* VCPU is waiting for exit */ - /* request completion */ - /* exit request in progress */ - struct completion exit_req_done; /* exit request is completed */ - - struct list_head exit_reqs_list; /* exit requests list head */ - /* used only on main VCPU */ - struct list_head exit_req; /* the VCPU exit request */ - raw_spinlock_t exit_reqs_lock; /* to lock list of exit */ - /* requests */ - struct work_struct dump_work; /* to schedule work to dump */ /* guest VCPU state */ @@ -1088,7 +1083,6 @@ struct kvm_vcpu_arch { int hard_cpu_id; }; -#ifdef CONFIG_KVM_HV_MMU typedef struct kvm_lpage_info { int disallow_lpage; } kvm_lpage_info_t; @@ -1096,30 +1090,16 @@ typedef struct kvm_lpage_info { typedef struct kvm_arch_memory_slot { kvm_rmap_head_t *rmap[KVM_NR_PAGE_SIZES]; kvm_lpage_info_t *lpage_info[KVM_NR_PAGE_SIZES - 1]; + unsigned long page_size; kvm_mem_guest_t guest_areas; unsigned short *gfn_track[KVM_PAGE_TRACK_MAX]; } kvm_arch_memory_slot_t; -#else /* ! CONFIG_KVM_HV_MMU */ -struct kvm_lpage_info { - int write_count; -}; - -struct kvm_arch_memory_slot { - unsigned long *rmap; - struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; - kvm_mem_guest_t guest_areas; -}; - -extern struct file_operations kvm_vm_fops; -#endif /* CONFIG_KVM_HV_MMU */ /* * e2k-arch vcpu->requests bit members */ #define KVM_REQ_TRIPLE_FAULT 10 /* FIXME: not implemented */ #define KVM_REQ_MMU_SYNC 11 /* FIXME: not implemented */ -#define KVM_REQ_PENDING_IRQS 15 /* there are unhandled IRQs */ - /* injected on VCPU */ #define KVM_REQ_PENDING_VIRQS 16 /* there are unhandled VIRQs */ /* to inject on VCPU */ #define KVM_REG_SHOW_STATE 17 /* bit should be cleared */ @@ -1138,7 +1118,7 @@ extern struct file_operations kvm_vm_fops; #define kvm_clear_pending_virqs(vcpu) \ clear_bit(KVM_REQ_PENDING_VIRQS, (void *)&vcpu->requests) #define kvm_test_pending_virqs(vcpu) \ - test_bit(KVM_REQ_PENDING_VIRQS, (void *)&vcpu->requests) + test_bit(KVM_REQ_PENDING_VIRQS, (const void *)&vcpu->requests) #define kvm_set_virqs_injected(vcpu) \ set_bit(KVM_REQ_VIRQS_INJECTED, (void *)&vcpu->requests) #define kvm_test_and_clear_virqs_injected(vcpu) \ @@ -1177,6 +1157,8 @@ struct irq_remap_table { struct pci_dev *vfio_dev; }; +#define KVM_ARCH_WANT_MMU_NOTIFIER + struct kvm_arch { unsigned long vm_type; /* virtual machine type */ unsigned long flags; @@ -1193,6 +1175,8 @@ struct kvm_arch { bool tdp_enable; /* two dimensional paging is supported */ /* by hardware MMU and hypervisor */ bool shadow_pt_set_up; /* shadow PT was set up, skip setup on other VCPUs */ + struct mutex spt_sync_lock; + atomic_t vcpus_to_reset; /* atomic counter of VCPUs ready to reset */ kvm_mem_alias_t aliases[KVM_ALIAS_SLOTS]; kvm_kernel_shadow_t shadows[KVM_SHADOW_SLOTS]; kvm_nidmap_t gpid_nidmap[GPIDMAP_ENTRIES]; @@ -1297,6 +1281,10 @@ struct kvm_arch { /* sign of reboot VM, true - reboot */ bool reboot; +#ifdef KVM_ARCH_WANT_MMU_NOTIFIER + struct swait_queue_head mmu_wq; +#endif /* KVM_ARCH_WANT_MMU_NOTIFIER */ + /* lock to update num_sclkr_run and common sh_sclkm3 * for all vcpu-s of the guest */ raw_spinlock_t sh_sclkr_lock; @@ -1316,6 +1304,13 @@ struct kvm_arch { bool legacy_vga_passthrough; }; +static inline bool kvm_has_passthrough_device(const struct kvm_arch *kvm) +{ + if (!kvm->irt) + return false; + return kvm->irt->vfio_dev != NULL; +} + #ifdef CONFIG_KVM_ASYNC_PF /* Async page fault event descriptor */ @@ -1334,6 +1329,8 @@ struct kvm_arch_async_pf { /* and has shadow image address */ #define KVMF_VCPU_STARTED 1 /* VCPUs (one or more) is started */ /* VM real active */ +#define KVMF_ARCH_API_TAKEN 4 /* ioctl() to get KVM arch api version */ + /* was received (to break old versions) */ #define KVMF_IN_SHOW_STATE 8 /* show state of KVM (print all */ /* stacks) is in progress */ #define KVMF_NATIVE_KERNEL 32 /* guest is running native */ @@ -1343,6 +1340,7 @@ struct kvm_arch_async_pf { #define KVMF_LINTEL 40 /* guest is running LIntel */ #define KVMF_PARAVIRT_GUEST_MASK (1UL << KVMF_PARAVIRT_GUEST) #define KVMF_VCPU_STARTED_MASK (1UL << KVMF_VCPU_STARTED) +#define KVMF_ARCH_API_TAKEN_MASK (1UL << KVMF_ARCH_API_TAKEN) #define KVMF_IN_SHOW_STATE_MASK (1UL << KVMF_IN_SHOW_STATE) #define KVMF_NATIVE_KERNEL_MASK (1UL << KVMF_NATIVE_KERNEL) #define KVMF_PARAVIRT_KERNEL_MASK (1UL << KVMF_PARAVIRT_KERNEL) @@ -1404,7 +1402,11 @@ static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) #define SEP_VIRT_ROOT_PT_FLAG (1U << SEP_VIRT_ROOT_PT_BIT) #define DONT_SYNC_ROOT_PT_FLAG (1U << DONT_SYNC_ROOT_PT_BIT) -#define KVM_ARCH_WANT_MMU_NOTIFIER +typedef enum mmu_retry { + NO_MMU_RETRY = 0, + WAIT_FOR_MMU_RETRY, + DO_MMU_RETRY, +} mmu_retry_t; #ifdef KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, unsigned flags); @@ -1434,67 +1436,33 @@ extern void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, !defined(CONFIG_KVM_GUEST_KERNEL) /* it is hypervisor or host with virtualization support */ extern void kvm_hv_epic_load(struct kvm_vcpu *vcpu); -extern void kvm_epic_invalidate_dat(struct kvm_vcpu *vcpu); +extern void kvm_epic_invalidate_dat(struct kvm_vcpu_arch *vcpu); extern void kvm_epic_enable_int(void); extern void kvm_epic_timer_start(void); -extern void kvm_epic_timer_stop(void); +extern void kvm_epic_timer_stop(bool skip_check); extern void kvm_deliver_cepic_epic_interrupt(void); -extern void kvm_epic_check_int_status(struct kvm_vcpu_arch *vcpu); +extern void kvm_epic_vcpu_blocking(struct kvm_vcpu_arch *vcpu); +extern void kvm_epic_vcpu_unblocking(struct kvm_vcpu_arch *vcpu); extern void kvm_init_cepic_idle_timer(struct kvm_vcpu *vcpu); + +#define VCPU_IDLE_TIMEOUT 1 extern void kvm_epic_start_idle_timer(struct kvm_vcpu *vcpu); extern void kvm_epic_stop_idle_timer(struct kvm_vcpu *vcpu); + #else /* ! CONFIG_KVM_HW_VIRTUALIZATION || CONFIG_KVM_GUEST_KERNEL */ /* it is host without virtualization support */ /* or native paravirtualized guest */ -static inline void kvm_hv_epic_load(struct kvm_vcpu *vcpu) -{ - /* nothing to do */ -} - -static inline void kvm_epic_invalidate_dat(struct kvm_vcpu *vcpu) -{ - /* nothing to do */ -} - -static inline void kvm_epic_enable_int(void) -{ - /* nothing to do */ -} - -static inline void kvm_epic_timer_start(void) -{ - /* nothing to do */ -} - -static inline void kvm_epic_timer_stop(void) -{ - /* nothing to do */ -} - -static inline void kvm_deliver_cepic_epic_interrupt(void) -{ - /* nothing to do */ -} - -static inline void kvm_epic_check_int_status(struct kvm_vcpu_arch *vcpu) -{ - /* nothing to do */ -} - -static inline void kvm_init_cepic_idle_timer(struct kvm_vcpu *vcpu) -{ - /* nothing to do */ -} - -static inline void kvm_epic_start_idle_timer(struct kvm_vcpu *vcpu) -{ - /* nothing to do */ -} - -static inline void kvm_epic_stop_idle_timer(struct kvm_vcpu *vcpu) -{ - /* nothing to do */ -} +static inline void kvm_hv_epic_load(struct kvm_vcpu *vcpu) { } +static inline void kvm_epic_invalidate_dat(struct kvm_vcpu_arch *vcpu) { } +static inline void kvm_epic_enable_int(void) { } +static inline void kvm_epic_vcpu_blocking(struct kvm_vcpu_arch *vcpu) { } +static inline void kvm_epic_vcpu_unblocking(struct kvm_vcpu_arch *vcpu) { } +static inline void kvm_epic_timer_start(void) { } +static inline void kvm_epic_timer_stop(bool skip_check) { } +static inline void kvm_deliver_cepic_epic_interrupt(void) { } +static inline void kvm_init_cepic_idle_timer(struct kvm_vcpu *vcpu) { } +static inline void kvm_epic_start_idle_timer(struct kvm_vcpu *vcpu) { } +static inline void kvm_epic_stop_idle_timer(struct kvm_vcpu *vcpu) { } #endif /* CONFIG_KVM_HW_VIRTUALIZATION && !CONFIG_KVM_GUEST_KERNEL */ extern struct work_struct kvm_dump_stacks; diff --git a/arch/e2k/include/asm/machdep.h b/arch/e2k/include/asm/machdep.h index bdb7fe7..346dea5 100644 --- a/arch/e2k/include/asm/machdep.h +++ b/arch/e2k/include/asm/machdep.h @@ -14,6 +14,7 @@ #ifdef __KERNEL__ enum { + /* Hardware bugs */ CPU_HWBUG_LARGE_PAGES, CPU_HWBUG_LAPIC_TIMER, CPU_HWBUG_PIO_READS, @@ -42,10 +43,15 @@ enum { CPU_HWBUG_E16C_SLEEP, CPU_HWBUG_L1I_STOPS_WORKING, CPU_HWBUG_CLW_STALE_L1_ENTRY, + CPU_HWBUG_PIPELINE_FREEZE_MONITORS, CPU_HWBUG_C3_WAIT_MA_C, CPU_HWBUG_VIRT_SCLKM3_INTC, - CPU_HWBUG_VIRT_PSIZE_INTERCEPTION, + CPU_HWBUG_VIRT_PUSD_PSL, CPU_HWBUG_USD_ALIGNMENT, + CPU_HWBUG_VIRT_PSIZE_INTERCEPTION, + CPU_NO_HWBUG_SOFT_WAIT, + + /* Features, not bugs */ CPU_FEAT_WC_PCI_PREFETCH, CPU_FEAT_FLUSH_DC_IC, CPU_FEAT_EPIC, @@ -55,6 +61,7 @@ enum { CPU_FEAT_ISET_V3, CPU_FEAT_ISET_V5, CPU_FEAT_ISET_V6, + NR_CPU_FEATURES }; @@ -107,7 +114,6 @@ typedef struct machdep { u8 sic_mc_count; u32 sic_mc1_ecc; u32 sic_io_str1; - u32 clock_tick_rate; unsigned long cpu_features[(NR_CPU_FEATURES + 63) / 64]; @@ -304,31 +310,18 @@ CPUHAS(CPU_HWBUG_DMA_AT_APIC_ADDR, * pending exc_data_debug exceptions. * Workaround - disable data monitor profiling in kernel. */ CPUHAS(CPU_HWBUG_KERNEL_DATA_MONITOR, - !IS_ENABLED(CONFIG_CPU_ES2) && !IS_ENABLED(CONFIG_CPU_E2S) && - !IS_ENABLED(CONFIG_CPU_E8C) && !IS_ENABLED(CONFIG_CPU_E1CP), - false, + IS_ENABLED(CONFIG_E2K_MACHINE), + IS_ENABLED(CONFIG_CPU_ES2) || IS_ENABLED(CONFIG_CPU_E2S) || + IS_ENABLED(CONFIG_CPU_E8C) || IS_ENABLED(CONFIG_CPU_E1CP), cpu == IDR_ES2_DSP_MDL || cpu == IDR_ES2_RU_MDL || - cpu == IDR_E2S_MDL || cpu == IDR_E8C_MDL || - cpu == IDR_E1CP_MDL); + cpu == IDR_E2S_MDL || cpu == IDR_E8C_MDL || + cpu == IDR_E1CP_MDL); /* #89495 - write barrier does not work (even for atomics). * Workaround - special command sequence after every read-acquire. */ CPUHAS(CPU_HWBUG_WRITE_MEMORY_BARRIER, !IS_ENABLED(CONFIG_CPU_E8C), false, cpu == IDR_E8C_MDL && revision <= 1); -/* On some processor's revisions writecombine memory - * in prefetchable PCI area is not allowed. */ -CPUHAS(CPU_FEAT_WC_PCI_PREFETCH, - !IS_ENABLED(CONFIG_CPU_ES2), - true, - !((cpu == IDR_ES2_DSP_MDL || cpu == IDR_ES2_RU_MDL) && - revision == 0)); -/* #82499 - Instruction Cache must be handled carefully - * when flush_dc_line also flushes IC by physical address. */ -CPUHAS(CPU_FEAT_FLUSH_DC_IC, - CONFIG_CPU_ISET != 0, - CONFIG_CPU_ISET >= 3, - iset_ver >= E2K_ISET_V3); /* #89653 - some hw counter won't reset, which may cause corruption of DMA. * Workaround - reset machine until the counter sets in good value */ CPUHAS(CPU_HWBUG_BAD_RESET, @@ -338,12 +331,12 @@ CPUHAS(CPU_HWBUG_BAD_RESET, /* #90514 - hardware hangs after modifying code with a breakpoint. * Workaround - use HS.lng from the instruction being replaced. */ CPUHAS(CPU_HWBUG_BREAKPOINT_INSTR, - !IS_ENABLED(CONFIG_CPU_ES2) && !IS_ENABLED(CONFIG_CPU_E2S) && - !IS_ENABLED(CONFIG_CPU_E8C) && !IS_ENABLED(CONFIG_CPU_E8C2), - false, + IS_ENABLED(CONFIG_E2K_MACHINE), + IS_ENABLED(CONFIG_CPU_ES2) || IS_ENABLED(CONFIG_CPU_E2S) || + IS_ENABLED(CONFIG_CPU_E8C) || IS_ENABLED(CONFIG_CPU_E8C2), cpu == IDR_ES2_DSP_MDL || cpu == IDR_ES2_RU_MDL || - cpu == IDR_E2S_MDL || cpu == IDR_E8C_MDL || - cpu == IDR_E8C2_MDL); + cpu == IDR_E2S_MDL || cpu == IDR_E8C_MDL || + cpu == IDR_E8C2_MDL); /* #92834, #96516 - hang because of hardware problems. * Workaround - boot activates watchdog, kernel should disable it */ CPUHAS(CPU_HWBUG_E8C_WATCHDOG, @@ -367,13 +360,12 @@ CPUHAS(CPU_HWBUG_WC_DAM, /* 96719 - combination of flags s_f=0, store=1, sru=1 is possible * Workaround - treat it as s_f=1, store=1, sru=1 */ CPUHAS(CPU_HWBUG_TRAP_CELLAR_S_F, - !IS_ENABLED(CONFIG_CPU_ES2) && !IS_ENABLED(CONFIG_CPU_E2S) && - !IS_ENABLED(CONFIG_CPU_E8C) && !IS_ENABLED(CONFIG_CPU_E1CP) && - !IS_ENABLED(CONFIG_CPU_E8C2), - false, + IS_ENABLED(CONFIG_E2K_MACHINE) && !IS_ENABLED(CONFIG_CPU_E8C2), + IS_ENABLED(CONFIG_CPU_ES2) || IS_ENABLED(CONFIG_CPU_E2S) || + IS_ENABLED(CONFIG_CPU_E8C) || IS_ENABLED(CONFIG_CPU_E1CP), cpu == IDR_ES2_DSP_MDL || cpu == IDR_ES2_RU_MDL || - cpu == IDR_E2S_MDL || cpu == IDR_E8C_MDL || - cpu == IDR_E1CP_MDL || cpu == IDR_E8C2_MDL && revision == 0); + cpu == IDR_E2S_MDL || cpu == IDR_E8C_MDL || + cpu == IDR_E1CP_MDL || cpu == IDR_E8C2_MDL && revision == 0); /* #97594 - %cr1_lo.ss flag is lost if ext. interrupt arrives faster. * Workaround - manually set %cr1_lo.ss again in interrupt handler */ CPUHAS(CPU_HWBUG_SS, @@ -388,54 +380,53 @@ CPUHAS(CPU_HWBUG_SS, /* #99302 - %aaldv sometimes is not restored properly. * Workaround - insert 'wait ma_c' barrier */ CPUHAS(CPU_HWBUG_AAU_AALDV, - !IS_ENABLED(CONFIG_CPU_ES2) && !IS_ENABLED(CONFIG_CPU_E2S) && - !IS_ENABLED(CONFIG_CPU_E8C) && !IS_ENABLED(CONFIG_CPU_E1CP) && - !IS_ENABLED(CONFIG_CPU_E8C2), - false, + IS_ENABLED(CONFIG_E2K_MACHINE) && !IS_ENABLED(CONFIG_CPU_E8C2), + IS_ENABLED(CONFIG_CPU_ES2) || IS_ENABLED(CONFIG_CPU_E2S) || + IS_ENABLED(CONFIG_CPU_E8C) || IS_ENABLED(CONFIG_CPU_E1CP), cpu == IDR_ES2_DSP_MDL || cpu == IDR_ES2_RU_MDL || - cpu == IDR_E2S_MDL || cpu == IDR_E8C_MDL || - cpu == IDR_E1CP_MDL || cpu == IDR_E8C2_MDL && revision == 0); + cpu == IDR_E2S_MDL || cpu == IDR_E8C_MDL || + cpu == IDR_E1CP_MDL || cpu == IDR_E8C2_MDL && revision == 0); /* #103223 - LAPIC does not send EoI to IO_APIC for level interrupts. * Workaround - wait under closed interrupts until APIC_ISR clears */ CPUHAS(CPU_HWBUG_LEVEL_EOI, - !IS_ENABLED(CONFIG_CPU_ES2) && !IS_ENABLED(CONFIG_CPU_E2S) && - !IS_ENABLED(CONFIG_CPU_E8C) && !IS_ENABLED(CONFIG_CPU_E1CP) && - !IS_ENABLED(CONFIG_CPU_E8C2), - false, + IS_ENABLED(CONFIG_E2K_MACHINE), + IS_ENABLED(CONFIG_CPU_ES2) || IS_ENABLED(CONFIG_CPU_E2S) || + IS_ENABLED(CONFIG_CPU_E8C) || IS_ENABLED(CONFIG_CPU_E1CP) || + IS_ENABLED(CONFIG_CPU_E8C2), cpu == IDR_ES2_DSP_MDL || cpu == IDR_ES2_RU_MDL || - cpu == IDR_E2S_MDL || cpu == IDR_E8C_MDL || - cpu == IDR_E1CP_MDL || cpu == IDR_E8C2_MDL); + cpu == IDR_E2S_MDL || cpu == IDR_E8C_MDL || + cpu == IDR_E1CP_MDL || cpu == IDR_E8C2_MDL); /* #104865 - hardware might generate a false single step interrupt * Workaround - clean frame 0 of PCS during the allocation */ CPUHAS(CPU_HWBUG_FALSE_SS, - !IS_ENABLED(CONFIG_CPU_ES2) && !IS_ENABLED(CONFIG_CPU_E2S) && - !IS_ENABLED(CONFIG_CPU_E8C) && !IS_ENABLED(CONFIG_CPU_E1CP) && - !IS_ENABLED(CONFIG_CPU_E8C2), - false, + IS_ENABLED(CONFIG_E2K_MACHINE) && !IS_ENABLED(CONFIG_CPU_E2S) && + !IS_ENABLED(CONFIG_CPU_E8C), + IS_ENABLED(CONFIG_CPU_ES2) || IS_ENABLED(CONFIG_CPU_E1CP) || + IS_ENABLED(CONFIG_CPU_E8C2), cpu == IDR_ES2_DSP_MDL || cpu == IDR_ES2_RU_MDL || - cpu == IDR_E2S_MDL && revision <= 2 || - cpu == IDR_E8C_MDL && revision <= 2 || - cpu == IDR_E1CP_MDL || cpu == IDR_E8C2_MDL); + cpu == IDR_E2S_MDL && revision <= 2 || + cpu == IDR_E8C_MDL && revision <= 2 || + cpu == IDR_E1CP_MDL || cpu == IDR_E8C2_MDL); /* #117649 - false exc_data_debug are generated based on _previous_ * values in ld/st address registers. * Workaround - forbid data breakpoint on the first 31 bytes * (hardware prefetch works with 32 bytes blocks). */ CPUHAS(CPU_HWBUG_SPURIOUS_EXC_DATA_DEBUG, - !IS_ENABLED(CONFIG_CPU_ES2) && !IS_ENABLED(CONFIG_CPU_E2S) && - !IS_ENABLED(CONFIG_CPU_E8C) && !IS_ENABLED(CONFIG_CPU_E1CP) && - !IS_ENABLED(CONFIG_CPU_E8C2) && !IS_ENABLED(CONFIG_CPU_E16C) && - !IS_ENABLED(CONFIG_CPU_E2C3), - false, + IS_ENABLED(CONFIG_E2K_MACHINE) && !IS_ENABLED(CONFIG_CPU_E16C) && + !IS_ENABLED(CONFIG_CPU_E2C3), + IS_ENABLED(CONFIG_CPU_ES2) || IS_ENABLED(CONFIG_CPU_E2S) || + IS_ENABLED(CONFIG_CPU_E8C) || IS_ENABLED(CONFIG_CPU_E1CP) || + IS_ENABLED(CONFIG_CPU_E8C2), cpu == IDR_ES2_DSP_MDL || cpu == IDR_ES2_RU_MDL || - cpu == IDR_E2S_MDL || cpu == IDR_E8C_MDL || - cpu == IDR_E1CP_MDL || cpu == IDR_E8C2_MDL || - cpu == IDR_E16C_MDL && revision == 0 || - cpu == IDR_E2C3_MDL && revision == 0); + cpu == IDR_E2S_MDL || cpu == IDR_E8C_MDL || + cpu == IDR_E1CP_MDL || cpu == IDR_E8C2_MDL || + cpu == IDR_E16C_MDL && revision == 0 || + cpu == IDR_E2C3_MDL && revision == 0); /* #119084 - several TBL flushes in a row might fail to flush L1D. * Workaround - insert "wait fl_c" immediately after every TLB flush */ CPUHAS(CPU_HWBUG_TLB_FLUSH_L1D, - !IS_ENABLED(CONFIG_CPU_E8C2), - false, + IS_ENABLED(CONFIG_E2K_MACHINE), + IS_ENABLED(CONFIG_CPU_E8C2), cpu == IDR_E8C2_MDL); /* #121311 - asynchronous entries in INTC_INFO_MU always have "pm" bit set. * Workaround - use "pm" bit saved in guest's chain stack. */ @@ -445,7 +436,7 @@ CPUHAS(CPU_HWBUG_GUEST_ASYNC_PM, cpu == IDR_E16C_MDL && revision == 0 || cpu == IDR_E2C3_MDL && revision == 0); /* #122946 - conflict new interrupt while sync signal turning off. - * Workaround - wating for C0 after E2K_WAIT_V6 */ + * Workaround - wating for C0 after "wait int=1" */ CPUHAS(CPU_HWBUG_E16C_SLEEP, !IS_ENABLED(CONFIG_CPU_E16C), false, @@ -454,32 +445,40 @@ CPUHAS(CPU_HWBUG_E16C_SLEEP, * Workaround - prepare %ctpr's in glaunch/trap handler entry; * avoid rbranch in glaunch/trap handler entry and exit. */ CPUHAS(CPU_HWBUG_L1I_STOPS_WORKING, - !IS_ENABLED(CONFIG_CPU_ES2) && !IS_ENABLED(CONFIG_CPU_E2S) && - !IS_ENABLED(CONFIG_CPU_E8C) && !IS_ENABLED(CONFIG_CPU_E1CP) && - !IS_ENABLED(CONFIG_CPU_E8C2) && !IS_ENABLED(CONFIG_CPU_E16C) && - !IS_ENABLED(CONFIG_CPU_E2C3), - false, + IS_ENABLED(CONFIG_E2K_MACHINE) && !IS_ENABLED(CONFIG_CPU_E16C) && + !IS_ENABLED(CONFIG_CPU_E2C3), + IS_ENABLED(CONFIG_CPU_ES2) || IS_ENABLED(CONFIG_CPU_E2S) || + IS_ENABLED(CONFIG_CPU_E8C) || IS_ENABLED(CONFIG_CPU_E1CP) || + IS_ENABLED(CONFIG_CPU_E8C2), cpu == IDR_ES2_DSP_MDL || cpu == IDR_ES2_RU_MDL || - cpu == IDR_E2S_MDL || cpu == IDR_E8C_MDL || - cpu == IDR_E1CP_MDL || cpu == IDR_E8C2_MDL || - cpu == IDR_E16C_MDL && revision == 0 || - cpu == IDR_E2C3_MDL && revision == 0); + cpu == IDR_E2S_MDL || cpu == IDR_E8C_MDL || + cpu == IDR_E1CP_MDL || cpu == IDR_E8C2_MDL || + cpu == IDR_E16C_MDL && revision == 0 || + cpu == IDR_E2C3_MDL && revision == 0); /* #124947 - CLW clearing by OS must be done on the same CPU that started the * hardware clearing operation to avoid creating a stale L1 entry. * Workaround - forbid migration until CLW clearing is finished in software. */ CPUHAS(CPU_HWBUG_CLW_STALE_L1_ENTRY, - !IS_ENABLED(CONFIG_CPU_E2S) && !IS_ENABLED(CONFIG_CPU_E8C) && - !IS_ENABLED(CONFIG_CPU_E8C2) && !IS_ENABLED(CONFIG_CPU_E16C), - false, + IS_ENABLED(CONFIG_E2K_MACHINE) && !IS_ENABLED(CONFIG_CPU_E16C), + IS_ENABLED(CONFIG_CPU_E2S) || IS_ENABLED(CONFIG_CPU_E8C) || + IS_ENABLED(CONFIG_CPU_E8C2), cpu == IDR_E2S_MDL || cpu == IDR_E8C_MDL || cpu == IDR_E8C2_MDL || - cpu == IDR_E16C_MDL && revision == 0); + cpu == IDR_E16C_MDL && revision == 0); +/* #125405 - CPU pipeline freeze feature conflicts with performance monitoring. + * Workaround - disable pipeline freeze when monitoring is enabled. */ +CPUHAS(CPU_HWBUG_PIPELINE_FREEZE_MONITORS, + IS_ENABLED(CONFIG_E2K_MACHINE), + IS_ENABLED(CONFIG_CPU_E8C2) || IS_ENABLED(CONFIG_CPU_E16C) || + IS_ENABLED(CONFIG_CPU_E2C3) || IS_ENABLED(CONFIG_CPU_E12C), + cpu == IDR_E8C2_MDL || cpu == IDR_E16C_MDL || + cpu == IDR_E2C3_MDL || cpu == IDR_E12C_MDL); /* #126587 - "wait ma_c=1" does not wait for all L2$ writebacks to complete * when disabling CPU core with "wait trap=1" algorithm. * Workaround - manually insert 66 NOPs before "wait trap=1" */ CPUHAS(CPU_HWBUG_C3_WAIT_MA_C, - !IS_ENABLED(CONFIG_CPU_E2S) && !IS_ENABLED(CONFIG_CPU_E8C) && - !IS_ENABLED(CONFIG_CPU_E1CP), - false, + IS_ENABLED(CONFIG_E2K_MACHINE), + IS_ENABLED(CONFIG_CPU_E2S) || IS_ENABLED(CONFIG_CPU_E8C) || + IS_ENABLED(CONFIG_CPU_E1CP), cpu == IDR_E2S_MDL || cpu == IDR_E8C_MDL || cpu == IDR_E1CP_MDL); /* #128127 - Intercepting SCLKM3 write does not prevent guest from writing it. * Workaround - Update SH_SCLKM3 in intercept handler */ @@ -488,10 +487,9 @@ CPUHAS(CPU_HWBUG_VIRT_SCLKM3_INTC, false, cpu == IDR_E16C_MDL && revision == 0 || cpu == IDR_E2C3_MDL && revision == 0); -/* #130039 - intercepting some specific sequences of call/return/setwd - * (that change WD.psize in a specific way) does not work. - * Workaround - avoid those sequences. */ -CPUHAS(CPU_HWBUG_VIRT_PSIZE_INTERCEPTION, +/* #128350 - glaunch increases guest's pusd.psl by 1 on phase 1 + * Workaround - decrease guest's pusd.psl by 1 before glaunch */ +CPUHAS(CPU_HWBUG_VIRT_PUSD_PSL, !IS_ENABLED(CONFIG_CPU_E16C) && !IS_ENABLED(CONFIG_CPU_E2C3), false, cpu == IDR_E16C_MDL && revision == 0 || @@ -500,16 +498,50 @@ CPUHAS(CPU_HWBUG_VIRT_PSIZE_INTERCEPTION, * Workaround - write usd_lo before usd_hi, while keeping 2 tact distance from sbr write. * Valid sequences are: sbr, nop, usd.lo, usd.hi OR sbr, usd.lo, usd.hi, usd.lo */ CPUHAS(CPU_HWBUG_USD_ALIGNMENT, - !IS_ENABLED(CONFIG_CPU_ES2) && !IS_ENABLED(CONFIG_CPU_E2S) && - !IS_ENABLED(CONFIG_CPU_E8C) && !IS_ENABLED(CONFIG_CPU_E1CP) && - !IS_ENABLED(CONFIG_CPU_E8C2) && !IS_ENABLED(CONFIG_CPU_E16C) && - !IS_ENABLED(CONFIG_CPU_E2C3) && !IS_ENABLED(CONFIG_CPU_E12C), - false, + IS_ENABLED(CONFIG_E2K_MACHINE), + IS_ENABLED(CONFIG_CPU_ES2) || IS_ENABLED(CONFIG_CPU_E2S) || + IS_ENABLED(CONFIG_CPU_E8C) || IS_ENABLED(CONFIG_CPU_E1CP) || + IS_ENABLED(CONFIG_CPU_E8C2) || IS_ENABLED(CONFIG_CPU_E16C) || + IS_ENABLED(CONFIG_CPU_E2C3) || IS_ENABLED(CONFIG_CPU_E12C), cpu == IDR_ES2_DSP_MDL || cpu == IDR_ES2_RU_MDL || - cpu == IDR_E2S_MDL || cpu == IDR_E8C_MDL || - cpu == IDR_E1CP_MDL || cpu == IDR_E8C2_MDL || - cpu == IDR_E16C_MDL || cpu == IDR_E2C3_MDL || - cpu == IDR_E12C_MDL); + cpu == IDR_E2S_MDL || cpu == IDR_E8C_MDL || + cpu == IDR_E1CP_MDL || cpu == IDR_E8C2_MDL || + cpu == IDR_E16C_MDL || cpu == IDR_E2C3_MDL || + cpu == IDR_E12C_MDL); +/* #130039 - intercepting some specific sequences of call/return/setwd + * (that change WD.psize in a specific way) does not work. + * Workaround - avoid those sequences. */ +CPUHAS(CPU_HWBUG_VIRT_PSIZE_INTERCEPTION, + !IS_ENABLED(CONFIG_CPU_E16C) && !IS_ENABLED(CONFIG_CPU_E2C3), + false, + cpu == IDR_E16C_MDL && revision == 0 || + cpu == IDR_E2C3_MDL && revision == 0); + +/* #130066, #134351 - L1/L2 do not respect "lal"/"las"/"sas"/"st_rel" barriers. + * Workaround - do not use "las"/"sas"/"st_rel", and add 5 nops after "lal". */ +CPUHAS(CPU_NO_HWBUG_SOFT_WAIT, + !IS_ENABLED(CONFIG_CPU_E16C) && !IS_ENABLED(CONFIG_CPU_E2C3), + true, + !(cpu == IDR_E16C_MDL && revision == 0 || + cpu == IDR_E2C3_MDL && revision == 0)); + +/* + * Not bugs but features go here + */ + +/* On some processor's revisions writecombine memory + * in prefetchable PCI area is not allowed. */ +CPUHAS(CPU_FEAT_WC_PCI_PREFETCH, + !IS_ENABLED(CONFIG_CPU_ES2), + true, + !((cpu == IDR_ES2_DSP_MDL || cpu == IDR_ES2_RU_MDL) && + revision == 0)); +/* #82499 - Instruction Cache must be handled carefully + * when flush_dc_line also flushes IC by physical address. */ +CPUHAS(CPU_FEAT_FLUSH_DC_IC, + CONFIG_CPU_ISET != 0, + CONFIG_CPU_ISET >= 3, + iset_ver >= E2K_ISET_V3); /* Rely on IDR instead of iset version to choose between APIC and EPIC. * For guest we use it's own fake IDR so that we choose between APIC and * EPIC based on what hardware guest *thinks* it's being executed on. */ @@ -611,7 +643,12 @@ extern __nodedata pt_struct_t pgtable_struct; # endif /* E2K_P2V */ #endif /* CONFIG_CPU_ISET 0-6 */ -#define IS_HV_GM() (machine.gmi) +/* Returns true in guest running with hardware virtualization support */ +#if CONFIG_CPU_ISET >= 3 && !defined E2K_P2V +# define IS_HV_GM() (cpu_has(CPU_FEAT_ISET_V6) && READ_CORE_MODE_REG().gmi) +#else +# define IS_HV_GM() (machine.gmi) +#endif extern void save_kernel_gregs_v2(struct kernel_gregs *); extern void save_kernel_gregs_v5(struct kernel_gregs *); @@ -691,8 +728,8 @@ void get_and_invalidate_MLT_context_v3(e2k_mlt_t *mlt_state); void get_and_invalidate_MLT_context_v6(e2k_mlt_t *mlt_state); #ifdef CONFIG_SMP -void clock_off_v3(void); -void clock_on_v3(int cpu); +void native_clock_off_v3(void); +void native_clock_on_v3(int cpu); #endif void C1_enter_v2(void); diff --git a/arch/e2k/include/asm/mmu.h b/arch/e2k/include/asm/mmu.h index 9c9ba40..e9f4bce 100644 --- a/arch/e2k/include/asm/mmu.h +++ b/arch/e2k/include/asm/mmu.h @@ -179,9 +179,9 @@ extern long hw_context_lookup_pcsp_and_switch(e2k_pcsp_lo_t pcsp_lo, extern int hw_contexts_init(struct task_struct *p, mm_context_t *mm_context, bool is_fork); extern void hw_contexts_destroy(mm_context_t *mm_context); -extern long do_swapcontext(void __user *oucp, const void __user *ucp, - bool save_prev_ctx, int format); +extern long swapcontext(const void __user *ucp, int format); extern void makecontext_trampoline(void); +extern void makecontext_trampoline_protected(void); extern void makecontext_trampoline_continue(void); extern void hw_context_deactivate_mm(struct task_struct *dead_task); diff --git a/arch/e2k/include/asm/mmu_context.h b/arch/e2k/include/asm/mmu_context.h index c417d88..bb45a57 100644 --- a/arch/e2k/include/asm/mmu_context.h +++ b/arch/e2k/include/asm/mmu_context.h @@ -159,30 +159,12 @@ reload_mmu_context(struct mm_struct *mm) reload_context_mask(ctx); raw_all_irq_restore(flags); } -static inline void -invalidate_mmu_context(struct mm_struct *mm) -{ - int cpu = raw_smp_processor_id(); -#ifdef CONFIG_SMP - /* - * Remove this cpu from mm_cpumask. This might be - * needed, for example, after sys_io_setup() if the - * kernel thread which was using this mm received - * flush ipi (unuse_mm() does not clear mm_cpumask). - * And maybe there are other such places where - * a kernel thread uses user mm. - */ - cpumask_clear_cpu(cpu, mm_cpumask(mm)); -#endif - mm->context.cpumsk[cpu] = 0; -} extern inline void enter_lazy_tlb (struct mm_struct *mm, struct task_struct *tsk) { } - extern int __init_new_context(struct task_struct *p, struct mm_struct *mm, mm_context_t *context); static inline int init_new_context(struct task_struct *p, struct mm_struct *mm) diff --git a/arch/e2k/include/asm/mmu_regs.h b/arch/e2k/include/asm/mmu_regs.h index 8fd7e0b..cb51ad3 100644 --- a/arch/e2k/include/asm/mmu_regs.h +++ b/arch/e2k/include/asm/mmu_regs.h @@ -707,9 +707,9 @@ boot_native_invalidate_CACHE_L12(void) static inline void native_raw_write_back_CACHE_L12(void) { - __E2K_WAIT(E2K_WAIT_OP_MA_C_MASK); + __E2K_WAIT(_ma_c); NATIVE_FLUSH_CACHE_L12(_flush_op_write_back_cache_L12); - __E2K_WAIT(E2K_WAIT_OP_FL_C_MASK | E2K_WAIT_OP_MA_C_MASK); + __E2K_WAIT(_fl_c | _ma_c); } static inline void @@ -727,9 +727,9 @@ write_back_CACHE_L12(void) static inline void native_raw_flush_TLB_all(void) { - __E2K_WAIT(E2K_WAIT_OP_ST_C_MASK); + __E2K_WAIT(_st_c); NATIVE_FLUSH_TLB_ALL(_flush_op_tlb_all); - __E2K_WAIT(E2K_WAIT_OP_FL_C_MASK | E2K_WAIT_OP_MA_C_MASK); + __E2K_WAIT(_fl_c | _ma_c); } static inline void diff --git a/arch/e2k/include/asm/mmu_types.h b/arch/e2k/include/asm/mmu_types.h index c991e50..8caa61e 100644 --- a/arch/e2k/include/asm/mmu_types.h +++ b/arch/e2k/include/asm/mmu_types.h @@ -219,6 +219,8 @@ typedef struct pt_struct { pgprotval_t priv_mask; /* page is privileged */ pgprotval_t non_exec_mask; /* page is not executable */ pgprotval_t exec_mask; /* page is executable */ + pgprotval_t huge_mask; /* page is huge */ + pgprotval_t protnone_mask; /* page is with none protections */ /* mask of bits available for software */ pgprotval_t sw_bit1_mask; /* # 1 */ @@ -818,5 +820,9 @@ typedef union { #define TAGGED_MEM_STORE_REC_OPC_W (LDST_WORD_FMT << LDST_REC_OPC_FMT_SHIFT) #define MEM_STORE_REC_OPC_B (LDST_BYTE_FMT << LDST_REC_OPC_FMT_SHIFT) +/* prefetch flag (placed at deprecated field 'rg_deprecated' */ +#define LDST_PREFETCH_FLAG_SET(flag) ((flag) << LDST_REC_OPC_RG_SHIFT) +#define LDST_PREFETCH_FLAG_GET(strd) (((strd) >> LDST_REC_OPC_RG_SHIFT) & 0x1UL) +#define LDST_PREFETCH_FLAG_CLEAR(strd) ((strd) & ~LDST_PREFETCH_FLAG_SET(1UL)) #endif /* _E2K_MMU_TYPES_H_ */ diff --git a/arch/e2k/include/asm/native_cpu_regs_access.h b/arch/e2k/include/asm/native_cpu_regs_access.h index 634ddeb..5c63a6c 100644 --- a/arch/e2k/include/asm/native_cpu_regs_access.h +++ b/arch/e2k/include/asm/native_cpu_regs_access.h @@ -424,10 +424,50 @@ extern void native_write_SCLKM2_reg_value(unsigned long reg_value); NATIVE_SET_SREG_CLOSED_NOEXC(dibcr, DIBCR_value, 4) #define NATIVE_WRITE_DIBSR_REG_VALUE(DIBSR_value) \ NATIVE_SET_SREG_CLOSED_NOEXC(dibsr, DIBSR_value, 4) -/* 6 cycles delay guarantess that all counting - * is stopped and %dibsr is updated accordingly. */ -#define NATIVE_WRITE_DIMCR_REG_VALUE(DIMCR_value) \ - NATIVE_SET_DSREG_CLOSED_NOEXC(dimcr, DIMCR_value, 5) + +static inline bool is_event_pipe_frz_sensitive(int event) +{ + return event == 0x2e || + event >= 0x30 && event <= 0x3d || + event >= 0x48 && event <= 0x4a || + event >= 0x58 && event <= 0x5a || + event >= 0x68 && event <= 0x69; +} + +static inline bool is_dimcr_pipe_frz_sensitive(e2k_dimcr_t dimcr) +{ + return dimcr_enabled(dimcr, 0) && + is_event_pipe_frz_sensitive(AS(dimcr)[0].event) || + dimcr_enabled(dimcr, 1) && + is_event_pipe_frz_sensitive(AS(dimcr)[1].event); +} + +#define NATIVE_WRITE_DIMCR_REG_VALUE(DIMCR_value) \ +do { \ + e2k_dimcr_t __new_value = { .word = (DIMCR_value) }; \ + \ + if (cpu_has(CPU_HWBUG_PIPELINE_FREEZE_MONITORS)) { \ + e2k_dimcr_t __old_value = { .word = NATIVE_READ_DIMCR_REG_VALUE() }; \ + bool __old_sensitive = is_dimcr_pipe_frz_sensitive(__old_value); \ + bool __new_sensitive = is_dimcr_pipe_frz_sensitive(__new_value); \ + \ + if (__old_sensitive != __new_sensitive) { \ + unsigned long flags; \ + \ + raw_all_irq_save(flags); \ + \ + e2k_cu_hw0_t cu_hw0 = { .word = NATIVE_READ_CU_HW0_REG_VALUE() }; \ + cu_hw0.pipe_frz_dsbl = (__new_sensitive) ? 1 : 0; \ + NATIVE_WRITE_CU_HW0_REG_VALUE(cu_hw0.word); \ + \ + raw_all_irq_restore(flags); \ + } \ + } \ + \ + /* 6 cycles delay guarantess that all counting \ + * is stopped and %dibsr is updated accordingly. */ \ + NATIVE_SET_DSREG_CLOSED_NOEXC(dimcr, AW(__new_value), 5); \ +} while (0) #define NATIVE_WRITE_DIBAR0_REG_VALUE(DIBAR0_value) \ NATIVE_SET_DSREG_CLOSED_NOEXC(dibar0, DIBAR0_value, 4) #define NATIVE_WRITE_DIBAR1_REG_VALUE(DIBAR1_value) \ diff --git a/arch/e2k/include/asm/nbsr_v6_regs.h b/arch/e2k/include/asm/nbsr_v6_regs.h index 41f8e1c..e94c507 100644 --- a/arch/e2k/include/asm/nbsr_v6_regs.h +++ b/arch/e2k/include/asm/nbsr_v6_regs.h @@ -64,6 +64,9 @@ /* MC monitors */ #define MC_CH 0x400 +#define MC_CFG 0x418 +#define MC_OPMB 0x424 +#define MC_ECC 0x440 #define MC_STATUS 0x44c #define MC_MON_CTL 0x450 #define MC_MON_CTR0 0x454 @@ -73,18 +76,22 @@ /* HMU monitors */ #define HMU_MIC 0xd00 #define HMU_MCR 0xd14 +#define HMU0_INT 0xd40 #define HMU0_MAR0_LO 0xd44 #define HMU0_MAR0_HI 0xd48 #define HMU0_MAR1_LO 0xd4c #define HMU0_MAR1_HI 0xd50 +#define HMU1_INT 0xd70 #define HMU1_MAR0_LO 0xd74 #define HMU1_MAR0_HI 0xd78 #define HMU1_MAR1_LO 0xd7c #define HMU1_MAR1_HI 0xd80 +#define HMU2_INT 0xda0 #define HMU2_MAR0_LO 0xda4 #define HMU2_MAR0_HI 0xda8 #define HMU2_MAR1_LO 0xdac #define HMU2_MAR1_HI 0xdb0 +#define HMU3_INT 0xdd0 #define HMU3_MAR0_LO 0xdd4 #define HMU3_MAR0_HI 0xdd8 #define HMU3_MAR1_LO 0xddc diff --git a/arch/e2k/include/asm/override-lcc-warnings.h b/arch/e2k/include/asm/override-lcc-warnings.h index d973752..2fc9cad 100644 --- a/arch/e2k/include/asm/override-lcc-warnings.h +++ b/arch/e2k/include/asm/override-lcc-warnings.h @@ -1,13 +1,5 @@ -/* identifier-list parameters may only be used in a function definition */ -#pragma diag_suppress 92 - -#pragma diag_suppress 1717 - /* in 'goto *expr', expr must have type 'void *' (lcc bug #121409) */ #pragma diag_suppress 1101 /* array of elements containing a flexible array member is nonstandard */ #pragma diag_suppress 1717 - -/* a reduction in alignment without the 'packed' attribute is ignored */ -#pragma diag_suppress 1160 diff --git a/arch/e2k/include/asm/p2v/boot_smp.h b/arch/e2k/include/asm/p2v/boot_smp.h index 25f55db..4a861ee 100644 --- a/arch/e2k/include/asm/p2v/boot_smp.h +++ b/arch/e2k/include/asm/p2v/boot_smp.h @@ -214,7 +214,11 @@ extern int cpu_to_sync_num; /* real number of CPUs to make */ boot_test_phys_cpu(cpu, *boot_phys_cpu_present_map_p) #define boot_phys_cpu_present_num boot_get_vo_value(phys_cpu_present_num) -#define boot_cpu_to_sync_num boot_get_vo_value(cpu_to_sync_num) +#ifdef CONFIG_SMP +# define boot_cpu_to_sync_num boot_get_vo_value(cpu_to_sync_num) +#else +# define boot_cpu_to_sync_num 0 +#endif #ifdef CONFIG_NUMA extern atomic_t early_node_has_dup_kernel_num; diff --git a/arch/e2k/include/asm/perf_event_types.h b/arch/e2k/include/asm/perf_event_types.h index 7ffd1c2..c689c6c 100644 --- a/arch/e2k/include/asm/perf_event_types.h +++ b/arch/e2k/include/asm/perf_event_types.h @@ -3,12 +3,8 @@ #include #include -#define PERF_EVENT_INDEX_OFFSET 0 - -#define ARCH_PERFMON_EVENT_MASK 0xffff #define ARCH_PERFMON_OS (1 << 16) #define ARCH_PERFMON_USR (1 << 17) -#define ARCH_PERFMON_ENABLED (1 << 18) DECLARE_PER_CPU(struct perf_event * [4], cpu_events); diff --git a/arch/e2k/include/asm/pgatomic.h b/arch/e2k/include/asm/pgatomic.h index 7b3b7a1..252af50 100644 --- a/arch/e2k/include/asm/pgatomic.h +++ b/arch/e2k/include/asm/pgatomic.h @@ -63,6 +63,15 @@ native_pt_get_and_xchg_atomic(pgprotval_t newval, pgprotval_t *pgprot) return oldval; } +static inline pgprotval_t +native_pt_get_and_xchg_relaxed(pgprotval_t newval, pgprotval_t *pgprot) +{ + pgprotval_t oldval = xchg_relaxed(pgprot, newval); + trace_pt_update("pt_get_and_xchg_relaxed: entry at 0x%lx: 0x%lx -> 0x%lx\n", + pgprot, oldval, newval); + return oldval; +} + static inline pgprotval_t native_pt_clear_relaxed_atomic(pgprotval_t mask, pgprotval_t *pgprot) { @@ -114,6 +123,13 @@ pt_get_and_xchg_atomic(struct mm_struct *mm, unsigned long addr, return native_pt_get_and_xchg_atomic(newval, &pgprot->pgprot); } +static inline pgprotval_t +pt_get_and_xchg_relaxed(struct mm_struct *mm, unsigned long addr, + pgprotval_t newval, pgprot_t *pgprot) +{ + return native_pt_get_and_xchg_relaxed(newval, &pgprot->pgprot); +} + static inline pgprotval_t pt_clear_relaxed_atomic(pgprotval_t mask, pgprot_t *pgprot) { diff --git a/arch/e2k/include/asm/pgd.h b/arch/e2k/include/asm/pgd.h index c9e6593..14bf67a 100644 --- a/arch/e2k/include/asm/pgd.h +++ b/arch/e2k/include/asm/pgd.h @@ -288,6 +288,12 @@ copy_user_pgd_to_kernel_root_pt(pgd_t *user_pgd) { copy_user_pgd_to_kernel_pgd(cpu_kernel_root_pt, user_pgd); } +#else /* !CONFIG_COPY_USER_PGD_TO_KERNEL_ROOT_PT */ +static inline void +copy_user_pgd_to_kernel_root_pt(pgd_t *user_pgd) +{ + BUG_ON(true); +} #endif /* CONFIG_COPY_USER_PGD_TO_KERNEL_ROOT_PT */ #endif /* _E2K_PGD_H */ diff --git a/arch/e2k/include/asm/pgtable.h b/arch/e2k/include/asm/pgtable.h index 18ff8c3..df9d79b 100644 --- a/arch/e2k/include/asm/pgtable.h +++ b/arch/e2k/include/asm/pgtable.h @@ -542,7 +542,7 @@ extern pgd_t *node_pgd_offset_kernel(int nid, e2k_addr_t virt_addr); #else /* ! CONFIG_NUMA */ #define node_pgd_offset_kernel(nid, virt_addr) \ ({ \ - (nid); \ + (void) (nid); \ pgd_offset_k(virt_addr); \ }) #endif /* CONFIG_NUMA */ @@ -617,7 +617,7 @@ pgd_clear(pgd_t *pgd) #define pte_offset_map(pmd, address) \ ({ \ pte_t *__pom_pte = pte_offset_kernel((pmd), (address)); \ - prefetchw(__pom_pte); \ + prefetch_nospec(__pom_pte); \ __pom_pte; \ }) @@ -805,7 +805,8 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { - return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd))); + return __pmd(pt_get_and_xchg_relaxed(vma->vm_mm, address, + pmd_val(pmd), (pgprot_t *)pmdp)); } extern int pmdp_set_access_flags(struct vm_area_struct *vma, diff --git a/arch/e2k/include/asm/pgtable_def.h b/arch/e2k/include/asm/pgtable_def.h index fae0bb2..1271c88 100644 --- a/arch/e2k/include/asm/pgtable_def.h +++ b/arch/e2k/include/asm/pgtable_def.h @@ -31,7 +31,7 @@ do { \ trace_printk(__VA_ARGS__); \ } while (0) #else -# define trace_pt_update(...) +# define trace_pt_update(...) do { } while (0) #endif #ifndef __ASSEMBLY__ @@ -1049,6 +1049,7 @@ static inline int pmd_bad(pmd_t pmd) (is_huge_pmd_level() && _PAGE_TEST_HUGE(pmd_val(pmd))) #ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define PMD_THP_INVALIDATE_FLAGS (UNI_PAGE_PRESENT | UNI_PAGE_PROTNONE) #define has_transparent_hugepage has_transparent_hugepage static inline int has_transparent_hugepage(void) { @@ -1056,6 +1057,8 @@ static inline int has_transparent_hugepage(void) } #define pmd_trans_huge(pmd) user_pmd_huge(pmd) +#else /* !CONFIG_TRANSPARENT_HUGEPAGE */ +#define PMD_THP_INVALIDATE_FLAGS 0UL #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* @@ -1083,8 +1086,7 @@ static inline int has_transparent_hugepage(void) #define pmd_mk_present_valid(pmd) (__pmd(_PAGE_SET(pmd_val(pmd), \ UNI_PAGE_PRESENT | UNI_PAGE_VALID))) #define pmd_mknotpresent(pmd) \ - (__pmd(_PAGE_CLEAR(pmd_val(pmd), \ - UNI_PAGE_PRESENT | UNI_PAGE_PROTNONE))) + (__pmd(_PAGE_CLEAR(pmd_val(pmd), PMD_THP_INVALIDATE_FLAGS))) #define pmd_mknot_present_valid(pmd) (__pmd(_PAGE_CLEAR(pmd_val(pmd), \ UNI_PAGE_PRESENT | UNI_PAGE_PROTNONE | UNI_PAGE_VALID))) #define pmd_mknotvalid(pmd) (__pmd(_PAGE_CLEAR_VALID(pmd_val(pmd)))) diff --git a/arch/e2k/include/asm/processor.h b/arch/e2k/include/asm/processor.h index 1b2ce4f..71cdd2a 100644 --- a/arch/e2k/include/asm/processor.h +++ b/arch/e2k/include/asm/processor.h @@ -256,16 +256,18 @@ unsigned long get_wchan(struct task_struct *p); #define ARCH_HAS_PREFETCH static inline void prefetch(const void *ptr) { - /* Use fully speculative load since ptr could be bad */ E2K_PREFETCH_L1_SPEC(ptr); } #define ARCH_HAS_PREFETCHW static inline void prefetchw(const void *ptr) { - /* prefetchw() is used when ptr is good, thus - * we can use half-speculative load */ - E2K_PREFETCH_L1(ptr); + E2K_PREFETCH_L1_SPEC(ptr); +} + +static inline void prefetch_nospec(const void *ptr) +{ + E2K_PREFETCH_L1_NOSPEC(ptr); } #define prefetch_offset(ptr, offset) \ @@ -274,68 +276,67 @@ do { \ E2K_PREFETCH_L1_SPEC_OFFSET((ptr), (offset)); \ } while (0) -#define prefetchw_offset(ptr, offset) \ +#define prefetch_nospec_offset(ptr, offset) \ do { \ - E2K_PREFETCH_L2_OFFSET((ptr), (offset)); \ + E2K_PREFETCH_L2_NOSPEC_OFFSET((ptr), (offset)); \ } while (0) /* Use L2 cache line size since we are prefetching to L2 */ #define PREFETCH_STRIDE 64 -static __always_inline void prefetchw_range(const void *addr, size_t len) +static __always_inline void prefetch_nospec_range(const void *addr, size_t len) { -#ifdef ARCH_HAS_PREFETCHW s64 i, rem, prefetched; if (__builtin_constant_p(len) && len < 24 * PREFETCH_STRIDE) { if (len > 0) - prefetchw(addr); + prefetch_nospec(addr); if (len > PREFETCH_STRIDE) - prefetchw_offset(addr, PREFETCH_STRIDE); + prefetch_nospec_offset(addr, PREFETCH_STRIDE); if (len > 2 * PREFETCH_STRIDE) - prefetchw_offset(addr, 2 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 2 * PREFETCH_STRIDE); if (len > 3 * PREFETCH_STRIDE) - prefetchw_offset(addr, 3 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 3 * PREFETCH_STRIDE); if (len > 4 * PREFETCH_STRIDE) - prefetchw_offset(addr, 4 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 4 * PREFETCH_STRIDE); if (len > 5 * PREFETCH_STRIDE) - prefetchw_offset(addr, 5 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 5 * PREFETCH_STRIDE); if (len > 6 * PREFETCH_STRIDE) - prefetchw_offset(addr, 6 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 6 * PREFETCH_STRIDE); if (len > 7 * PREFETCH_STRIDE) - prefetchw_offset(addr, 7 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 7 * PREFETCH_STRIDE); if (len > 8 * PREFETCH_STRIDE) - prefetchw_offset(addr, 8 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 8 * PREFETCH_STRIDE); if (len > 9 * PREFETCH_STRIDE) - prefetchw_offset(addr, 9 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 9 * PREFETCH_STRIDE); if (len > 10 * PREFETCH_STRIDE) - prefetchw_offset(addr, 10 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 10 * PREFETCH_STRIDE); if (len > 11 * PREFETCH_STRIDE) - prefetchw_offset(addr, 11 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 11 * PREFETCH_STRIDE); if (len > 12 * PREFETCH_STRIDE) - prefetchw_offset(addr, 12 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 12 * PREFETCH_STRIDE); if (len > 13 * PREFETCH_STRIDE) - prefetchw_offset(addr, 13 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 13 * PREFETCH_STRIDE); if (len > 14 * PREFETCH_STRIDE) - prefetchw_offset(addr, 14 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 14 * PREFETCH_STRIDE); if (len > 15 * PREFETCH_STRIDE) - prefetchw_offset(addr, 15 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 15 * PREFETCH_STRIDE); if (len > 16 * PREFETCH_STRIDE) - prefetchw_offset(addr, 16 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 16 * PREFETCH_STRIDE); if (len > 17 * PREFETCH_STRIDE) - prefetchw_offset(addr, 17 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 17 * PREFETCH_STRIDE); if (len > 18 * PREFETCH_STRIDE) - prefetchw_offset(addr, 18 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 18 * PREFETCH_STRIDE); if (len > 19 * PREFETCH_STRIDE) - prefetchw_offset(addr, 19 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 19 * PREFETCH_STRIDE); if (len > 20 * PREFETCH_STRIDE) - prefetchw_offset(addr, 20 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 20 * PREFETCH_STRIDE); if (len > 21 * PREFETCH_STRIDE) - prefetchw_offset(addr, 21 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 21 * PREFETCH_STRIDE); if (len > 22 * PREFETCH_STRIDE) - prefetchw_offset(addr, 22 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 22 * PREFETCH_STRIDE); if (len > 23 * PREFETCH_STRIDE) - prefetchw_offset(addr, 23 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr, 23 * PREFETCH_STRIDE); return; } @@ -344,17 +345,16 @@ static __always_inline void prefetchw_range(const void *addr, size_t len) prefetched = len / (4 * PREFETCH_STRIDE); for (i = 0; i <= (s64) len - 256; i += 256) - E2K_PREFETCH_L2_256(addr + i); + E2K_PREFETCH_L2_NOSPEC_256(addr + i); if (rem > 0) - prefetchw(addr + prefetched); + prefetch_nospec(addr + prefetched); if (rem > PREFETCH_STRIDE) - prefetchw_offset(addr + prefetched, PREFETCH_STRIDE); + prefetch_nospec_offset(addr + prefetched, PREFETCH_STRIDE); if (rem > 2 * PREFETCH_STRIDE) - prefetchw_offset(addr + prefetched, 2 * PREFETCH_STRIDE); + prefetch_nospec_offset(addr + prefetched, 2 * PREFETCH_STRIDE); if (rem > 3 * PREFETCH_STRIDE) - prefetchw_offset(addr + prefetched, 3 * PREFETCH_STRIDE); -#endif + prefetch_nospec_offset(addr + prefetched, 3 * PREFETCH_STRIDE); } extern u64 cacheinfo_get_l1d_line_size(void); diff --git a/arch/e2k/include/asm/ptrace.h b/arch/e2k/include/asm/ptrace.h index a182329..38103ec 100644 --- a/arch/e2k/include/asm/ptrace.h +++ b/arch/e2k/include/asm/ptrace.h @@ -505,7 +505,7 @@ static inline void calculate_e2k_dstack_parameters( /* virtualization support */ #include -struct signal_stack_context { +typedef struct signal_stack_context { struct pt_regs regs; struct trap_pt_regs trap; struct k_sigaction sigact; @@ -515,7 +515,7 @@ struct signal_stack_context { #endif u64 sbbp[SBBP_ENTRIES_NUM]; struct pv_vcpu_ctxt vcpu_ctxt; -}; +} signal_stack_context_t; #define __signal_pt_regs_last(ti) \ ({ \ @@ -821,5 +821,8 @@ extern void syscall_trace_leave(struct pt_regs *regs); #define arch_has_single_step() (1) +extern long common_ptrace(struct task_struct *child, long request, unsigned long addr, + unsigned long data, bool compat); + #endif /* __ASSEMBLY__ */ #endif /* _E2K_PTRACE_H */ diff --git a/arch/e2k/include/asm/qspinlock.h b/arch/e2k/include/asm/qspinlock.h index 56f437f..9b11443 100644 --- a/arch/e2k/include/asm/qspinlock.h +++ b/arch/e2k/include/asm/qspinlock.h @@ -37,15 +37,14 @@ static inline void queued_spin_unlock(struct qspinlock *lock) static __always_inline void pv_wait(u8 *ptr, u8 val) { - if (cpu_has(CPU_FEAT_ISET_V6) && READ_CORE_MODE_REG().gmi && - READ_ONCE(*ptr) == val) + if (IS_HV_GM() && READ_ONCE(*ptr) == val) HYPERVISOR_pv_wait(); } static __always_inline void pv_kick(int cpu) { - if (cpu_has(CPU_FEAT_ISET_V6) && READ_CORE_MODE_REG().gmi) + if (IS_HV_GM()) HYPERVISOR_pv_kick(cpu); } @@ -68,7 +67,7 @@ extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { - if (cpu_has(CPU_FEAT_ISET_V6) && READ_CORE_MODE_REG().gmi) + if (IS_HV_GM()) __pv_queued_spin_lock_slowpath(lock, val); else native_queued_spin_lock_slowpath(lock, val); @@ -79,18 +78,12 @@ extern void __pv_queued_spin_unlock(struct qspinlock *lock); # define queued_spin_unlock queued_spin_unlock static inline void queued_spin_unlock(struct qspinlock *lock) { - if (cpu_has(CPU_FEAT_ISET_V6) && READ_CORE_MODE_REG().gmi) + if (IS_HV_GM()) __pv_queued_spin_unlock(lock); else native_queued_spin_unlock(lock); } -# define vcpu_is_preempted vcpu_is_preempted -static inline bool vcpu_is_preempted(long cpu) -{ - return false; -} - #endif /* !CONFIG_PARAVIRT_SPINLOCKS */ #include diff --git a/arch/e2k/include/asm/sections.h b/arch/e2k/include/asm/sections.h index 05917ef..f684136 100644 --- a/arch/e2k/include/asm/sections.h +++ b/arch/e2k/include/asm/sections.h @@ -40,6 +40,7 @@ extern char __common_data_begin[], __common_data_end[]; extern char _edata_bss[]; extern char _t_entry[], _t_entry_end[]; extern char __entry_handlers_start[], __entry_handlers_end[]; +extern char __entry_handlers_hcalls_start[], __entry_handlers_hcalls_end[]; extern char __start_ro_after_init[], __end_ro_after_init[]; #endif /* ! __ASSEMBLY__ */ diff --git a/arch/e2k/include/asm/sic_regs.h b/arch/e2k/include/asm/sic_regs.h index 89e0519..ddb92ea 100644 --- a/arch/e2k/include/asm/sic_regs.h +++ b/arch/e2k/include/asm/sic_regs.h @@ -195,34 +195,21 @@ #define SIC_MC_BASE 0x400 #define SIC_MC_SIZE (machine.sic_mc_size) -#define SIC_mc_ecc 0x440 #define SIC_mc0_ecc 0x400 #define SIC_mc1_ecc (machine.sic_mc1_ecc) #define SIC_mc2_ecc 0x480 #define SIC_mc3_ecc 0x4c0 -#define SIC_mc_ch 0x400 -#define SIC_mc_status 0x44c - -#define SIC_mc_opmb 0x424 #define SIC_mc0_opmb 0x414 #define SIC_mc1_opmb 0x454 #define SIC_mc2_opmb 0x494 #define SIC_mc3_opmb 0x4d4 -#define SIC_mc_cfg 0x418 #define SIC_mc0_cfg 0x418 #define SIC_mc1_cfg 0x458 #define SIC_mc2_cfg 0x498 #define SIC_mc3_cfg 0x4d8 -/* HMU */ -#define SIC_hmu_mic 0xd00 -#define SIC_hmu0_int 0xd40 -#define SIC_hmu1_int 0xd70 -#define SIC_hmu2_int 0xda0 -#define SIC_hmu3_int 0xdd0 - /* IPCC */ #define SIC_IPCC_LINKS_COUNT 3 #define SIC_ipcc_csr1 0x604 @@ -330,6 +317,54 @@ typedef union { u32 word; } sys_mon_1_t; +/* E8C2 Power Control System (PCS) registers */ + +#define SIC_pcs_ctrl0 0x0cb0 +#define SIC_pcs_ctrl1 0x0cb4 +#define SIC_pcs_ctrl2 0x0cb8 +#define SIC_pcs_ctrl3 0x0cbc +#define SIC_pcs_ctrl4 0x0cc0 +#define SIC_pcs_ctrl5 0x0cc4 +#define SIC_pcs_ctrl6 0x0cc8 +#define SIC_pcs_ctrl7 0x0ccc +#define SIC_pcs_ctrl8 0x0cd0 +#define SIC_pcs_ctrl9 0x0cd4 + +/* PCS_CTRL1 fields: */ +typedef union { + struct { + u32 pcs_mode : 4; + u32 n_fprogr : 6; + u32 n_fmin : 6; + u32 n_fminmc : 6; + u32 n : 6; + u32 : 4; + }; + u32 word; +} pcs_ctrl1_t; + +/* PCS_CTRL3 fields: */ +typedef union { + struct { + u32 n_fpin : 6; + u32 : 2; + u32 bfs_freq : 4; + u32 pll_bw : 3; + u32 : 1; + u32 pll_mode : 3; + u32 : 1; + u32 iol_bitrate : 3; + u32 : 1; + u32 ipl_bitrate : 3; + u32 : 1; + u32 l_equaliz : 1; + u32 l_preemph : 1; + u32 bfs_adj_dsbl : 1; + u32 : 1; + }; + u32 word; +} pcs_ctrl3_t; + /* Cache L3 */ #define SIC_l3_ctrl 0x3000 #define SIC_l3_serv 0x3004 diff --git a/arch/e2k/include/asm/signal.h b/arch/e2k/include/asm/signal.h index 2cd7749..919b628 100644 --- a/arch/e2k/include/asm/signal.h +++ b/arch/e2k/include/asm/signal.h @@ -125,7 +125,12 @@ do { \ } while (0) struct signal_stack; +extern unsigned long allocate_signal_stack(unsigned long size); extern void free_signal_stack(struct signal_stack *signal_stack); +extern struct signal_stack_context __user * + get_the_signal_stack(struct signal_stack *signal_stack); +extern struct signal_stack_context __user * + pop_the_signal_stack(struct signal_stack *signal_stack); extern struct signal_stack_context __user *pop_signal_stack(void); extern struct signal_stack_context __user *get_signal_stack(void); extern int setup_signal_stack(struct pt_regs *regs, bool is_signal); @@ -152,6 +157,9 @@ extern int prepare_sighandler_frame(struct e2k_stacks *stacks, extern int native_signal_setup(struct pt_regs *regs); +extern int native_longjmp_copy_user_to_kernel_hw_stacks(struct pt_regs *regs, + struct pt_regs *new_regs); + static inline int native_complete_long_jump(struct pt_regs *regs) { /* nithing to do for native kernel & host */ @@ -199,6 +207,12 @@ static inline int signal_setup(struct pt_regs *regs) return native_signal_setup(regs); } +static inline int longjmp_copy_user_to_kernel_hw_stacks(struct pt_regs *regs, + struct pt_regs *new_regs) +{ + return native_longjmp_copy_user_to_kernel_hw_stacks(regs, new_regs); +} + static inline int complete_long_jump(struct pt_regs *regs) { return native_complete_long_jump(regs); diff --git a/arch/e2k/include/asm/stacks.h b/arch/e2k/include/asm/stacks.h index 6a5f49e..0f1a4cc 100644 --- a/arch/e2k/include/asm/stacks.h +++ b/arch/e2k/include/asm/stacks.h @@ -114,24 +114,11 @@ typedef struct old_pcs_area { #define USER_P_STACK_INIT_SIZE (4 * PAGE_SIZE) #define USER_PC_STACK_INIT_SIZE PAGE_SIZE -/* - * Software user stack for 64-bit mode. - */ -#define USER64_C_STACK_BYTE_INCR (4 * PAGE_SIZE) /* 4 pages */ -#define USER64_STACK_TOP (USER_PC_STACKS_BASE) - -/* - * Software user stack for 32-bit mode. - */ -#define USER32_C_STACK_BYTE_INCR (4 * PAGE_SIZE) /* 4 pages */ -#define USER32_STACK_TOP (TASK32_SIZE) - -/* - * These macro definitions are to unify 32- and 64-bit user stack - * handling procedures. - */ -#define USER_C_STACK_BYTE_INCR (current->thread.flags & E2K_FLAG_32BIT ? \ - USER32_C_STACK_BYTE_INCR : USER64_C_STACK_BYTE_INCR) +#define USER_C_STACK_BYTE_INCR (4 * PAGE_SIZE) +/* Software user stack for 64-bit mode. */ +#define USER64_STACK_TOP USER_PC_STACKS_BASE +/* Software user stack for 32-bit mode. */ +#define USER32_STACK_TOP TASK32_SIZE /* * This macro definition is to limit deafault user stack size diff --git a/arch/e2k/include/asm/string.h b/arch/e2k/include/asm/string.h index 4bf4440..411c408 100644 --- a/arch/e2k/include/asm/string.h +++ b/arch/e2k/include/asm/string.h @@ -3,6 +3,7 @@ #include +#include #include #define __HAVE_ARCH_STRNLEN @@ -58,8 +59,8 @@ static inline int _memcmp(const void *s1, const void *s2, size_t n) } } - E2K_PREFETCH_L2(s1); - E2K_PREFETCH_L2(s2); + E2K_PREFETCH_L1_SPEC(s1); + E2K_PREFETCH_L1_SPEC(s1); return __memcmp(s1, s2, n); } @@ -421,7 +422,7 @@ static inline void native_tagged_memcpy_8(void *__restrict dst, else E2K_TAGGED_MEMMOVE_8(dst, src); } else { - E2K_PREFETCH_L2(src); + E2K_PREFETCH_L2_SPEC(src); __tagged_memcpy_8(dst, src, n); } @@ -490,11 +491,25 @@ fast_tagged_memory_copy(void *dst, const void *src, size_t len, strd_opcode, ldrd_opcode, prefetch); } static inline unsigned long +fast_tagged_memory_copy_user(void *dst, const void *src, size_t len, size_t *copied, + unsigned long strd_opcode, unsigned long ldrd_opcode, + int prefetch) +{ + return native_fast_tagged_memory_copy(dst, src, len, + strd_opcode, ldrd_opcode, prefetch); +} +static inline unsigned long fast_tagged_memory_set(void *addr, u64 val, u64 tag, size_t len, u64 strd_opcode) { return native_fast_tagged_memory_set(addr, val, tag, len, strd_opcode); } +static inline unsigned long +fast_tagged_memory_set_user(void *addr, u64 val, u64 tag, + size_t len, size_t *cleared, u64 strd_opcode) +{ + return native_fast_tagged_memory_set(addr, val, tag, len, strd_opcode); +} static inline unsigned long boot_fast_tagged_memory_copy(void *dst, const void *src, size_t len, @@ -520,7 +535,7 @@ extract_tags_32(u16 *dst, const void *src) /* it is native kernel without virtualization support */ static inline int fast_tagged_memory_copy_to_user(void __user *dst, const void *src, - size_t len, const struct pt_regs *regs, + size_t len, size_t *copied, const struct pt_regs *regs, unsigned long strd_opcode, unsigned long ldrd_opcode, int prefetch) { @@ -530,7 +545,7 @@ fast_tagged_memory_copy_to_user(void __user *dst, const void *src, static inline int fast_tagged_memory_copy_from_user(void *dst, const void __user *src, - size_t len, const struct pt_regs *regs, + size_t len, size_t *copied, const struct pt_regs *regs, unsigned long strd_opcode, unsigned long ldrd_opcode, int prefetch) { diff --git a/arch/e2k/include/asm/switch_to.h b/arch/e2k/include/asm/switch_to.h index 989395f..c8111b8 100644 --- a/arch/e2k/include/asm/switch_to.h +++ b/arch/e2k/include/asm/switch_to.h @@ -55,7 +55,7 @@ do { \ #define prepare_arch_switch(next) \ do { \ - prefetchw_range(&next->thread.sw_regs, \ + prefetch_nospec_range(&next->thread.sw_regs, \ offsetof(struct sw_regs, cs_lo)); \ /* It works under CONFIG_MCST_RT */ \ SAVE_CURR_TIME_SWITCH_TO; \ diff --git a/arch/e2k/include/asm/sync_pg_tables.h b/arch/e2k/include/asm/sync_pg_tables.h index 08dfc51..bf49cc2 100644 --- a/arch/e2k/include/asm/sync_pg_tables.h +++ b/arch/e2k/include/asm/sync_pg_tables.h @@ -6,13 +6,24 @@ #ifndef _E2K_SYNC_PG_TABLES_H #define _E2K_SYNC_PG_TABLES_H -#if defined(CONFIG_KVM_GUEST_KERNEL) - +#ifdef CONFIG_KVM_GUEST_KERNEL +/* it is native guest kernel (not paravirtualized based on pv_ops) */ #include - -#define sync_addr_range kvm_sync_addr_range -#else -#define sync_addr_range -#endif /* !CONFIG_KVM_GUEST_KERNEL */ +#elif defined(CONFIG_PARAVIRT_GUEST) +/* it is paravirtualized host and guest kernel */ +#include +#else /* !CONFIG_KVM_GUEST_KERNEL && !CONFIG_PARAVIRT_GUEST */ +/* it is native kernel without any virtualization */ +/* or host kernel with virtualization support */ +#define sync_mm_addr(address) \ +do { \ + (void) (address); \ +} while (0) +#define sync_mm_range(start, end) \ +do { \ + (void) (start); \ + (void) (end); \ +} while (0) +#endif /* CONFIG_KVM_GUEST_KERNEL */ #endif diff --git a/arch/e2k/include/asm/syscalls.h b/arch/e2k/include/asm/syscalls.h index 035accf..3cfa53e 100644 --- a/arch/e2k/include/asm/syscalls.h +++ b/arch/e2k/include/asm/syscalls.h @@ -125,6 +125,13 @@ extern long protected_sys_getgroups(const long a1, /* size */ const unsigned long unused5, const unsigned long unused6, const struct pt_regs *regs); +extern long protected_sys_setgroups(const long a1, /* size */ + const unsigned long __user a2, /* list[] */ + const unsigned long unused3, + const unsigned long unused4, + const unsigned long unused5, + const unsigned long unused6, + const struct pt_regs *regs); extern long protected_sys_ipc(const unsigned long call, /* a1 */ const long first, /* a2 */ const unsigned long second, /* a3 */ @@ -335,6 +342,13 @@ extern long protected_sys_ioctl(const int fd, /* a1 */ const unsigned long unused5, const unsigned long unused6, const struct pt_regs *regs); +extern long protected_sys_bpf(const int cmd, /* a1 */ + const unsigned long __user attr, /* a2 */ + const unsigned int size, /* a3 */ + const unsigned long unused4, + const unsigned long unused5, + const unsigned long unused6, + const struct pt_regs *regs); extern long protected_sys_epoll_ctl(const unsigned long epfd, /* a1 */ const unsigned long op, /* a2 */ const unsigned long fd, /* a3 */ diff --git a/arch/e2k/include/asm/thread_info.h b/arch/e2k/include/asm/thread_info.h index 81d2c7a..7ba4cd5 100644 --- a/arch/e2k/include/asm/thread_info.h +++ b/arch/e2k/include/asm/thread_info.h @@ -183,13 +183,6 @@ typedef struct thread_info { /* to support spin lock/unlock */ struct gthread_info *gti_to_spin; /* guest thread waitin for the */ /* spin lock/unlock */ - int should_stop; /* on host: guest kernel thread */ - /* should be stopped */ - /* structure to save state of user global registers, which are */ - /* used to support virtualization and PV OPs by kernel */ - host_gregs_t h_gregs; /* state of user global registers */ - /* used by host to support guest */ - /* kernel */ #endif /* CONFIG_VIRTUALIZATION */ } __aligned(SMP_CACHE_BYTES) thread_info_t; diff --git a/arch/e2k/include/asm/timex.h b/arch/e2k/include/asm/timex.h index 24fa302..a43f2f9 100644 --- a/arch/e2k/include/asm/timex.h +++ b/arch/e2k/include/asm/timex.h @@ -8,9 +8,6 @@ #include -/* Note that this is lt_timer tick rate. */ -#define CLOCK_TICK_RATE (machine.clock_tick_rate) - typedef unsigned long cycles_t; #define ARCH_HAS_READ_CURRENT_TIMER diff --git a/arch/e2k/include/asm/tlb-context.h b/arch/e2k/include/asm/tlb-context.h new file mode 100644 index 0000000..a5906ba --- /dev/null +++ b/arch/e2k/include/asm/tlb-context.h @@ -0,0 +1,791 @@ +/* + * TLB context support & flushing + */ + +#ifndef _E2K_TLB_CONTEXT_H_ +#define _E2K_TLB_CONTEXT_H_ + +#include +#include +#include + +#include +#include +#include + +#undef DEBUG_PT_MODE +#undef DebugPT +#define DEBUG_PT_MODE 0 /* Data Caches */ +#define DebugPT(...) DebugPrint(DEBUG_PT_MODE, ##__VA_ARGS__) + +/* + * TLB flushing: + */ + +/* + * Flush all processes TLBs of the processor + */ +static inline void +mmu_pid_flush_tlb_all(bool trace_enabled) +{ + flush_TLB_all(); + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_all(raw_smp_processor_id()); + } +} + +/* + * Flush a specified user mapping on the processor + */ +static inline void +mmu_pid_flush_tlb_mm(mm_context_t *context, + bool is_active, cpumask_t *mm_cpumask, + bool trace_enabled) +{ + int cpu; + unsigned long old_pid, pid; + + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); + + cpu = raw_smp_processor_id(); + old_pid = context->cpumsk[cpu]; + + if (likely(is_active)) { + unsigned long pid, flags; + + /* Should update right now */ + DebugPT("mm context will be reloaded\n"); + raw_all_irq_save(flags); + cpu = smp_processor_id(); + pid = get_new_mmu_pid(context, cpu); + reload_context_mask(pid); + raw_all_irq_restore(flags); + + DebugPT("CPU #%d new mm context is 0x%lx\n", + cpu, context->cpumsk[cpu]); + } else { +#ifdef CONFIG_SMP + /* Remove this cpu from mm_cpumask. This might be + * needed, for example, after sys_io_setup() if the + * kernel thread which was using this mm received + * flush ipi (unuse_mm() does not clear mm_cpumask). + * And maybe there are other such places where + * a kernel thread uses user mm. */ + if (likely(mm_cpumask != NULL)) { + cpumask_clear_cpu(cpu, mm_cpumask); + } +#endif + context->cpumsk[cpu] = 0; + pid = 0; + } + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_mm(cpu, context, is_active, old_pid, pid); + } +} + +/* + * Flush just one specified address of current process. + */ +static inline void +mmu_pid_flush_tlb_address(mm_context_t *context, + e2k_addr_t addr, + bool is_active, cpumask_t *mm_cpumask, + bool trace_enabled) +{ + int cpu = raw_smp_processor_id(); + unsigned long old_pid, pid; + + old_pid = context->cpumsk[cpu]; + + if (unlikely(old_pid == 0)) { + /* See comment in __flush_tlb_range(). */ + mmu_pid_flush_tlb_mm(context, is_active, mm_cpumask, trace_enabled); + pid = context->cpumsk[cpu]; + } else { + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); + pid = old_pid; + flush_TLB_page(addr, CTX_HARDWARE(pid)); + } + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_address(cpu, context, addr, old_pid, pid); + } +} + +/* + * Flush the TLB entries mapping the virtually mapped linear page + * table corresponding to specified address of current process. + */ +static inline void +mmu_pid_flush_tlb_address_pgtables(mm_context_t *context, + e2k_addr_t addr, + bool is_active, cpumask_t *mm_cpumask, + bool trace_enabled) +{ + int cpu = raw_smp_processor_id(); + unsigned long old_pid, pid; + + old_pid = context->cpumsk[cpu]; + + if (unlikely(old_pid == 0)) { + /* See comment in __flush_tlb_range(). */ + mmu_pid_flush_tlb_mm(context, is_active, mm_cpumask, trace_enabled); + pid = context->cpumsk[cpu]; + } else { + pid = old_pid; + flush_TLB_page_begin(); + /* flush virtual mapping of PTE entry (third level) */ + __flush_TLB_page(pte_virt_offset(_PAGE_ALIGN_UP(addr, + PTE_SIZE)), + CTX_HARDWARE(pid)); + /* flush virtual mapping of PMD entry (second level) */ + __flush_TLB_page(pmd_virt_offset(_PAGE_ALIGN_UP(addr, + PMD_SIZE)), + CTX_HARDWARE(pid)); + /* flush virtual mapping of PUD entry (first level) */ + __flush_TLB_page(pud_virt_offset(_PAGE_ALIGN_UP(addr, + PUD_SIZE)), + CTX_HARDWARE(pid)); + flush_TLB_page_end(); + } + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_address(cpu, context, addr, old_pid, pid); + trace_mmu_pid_flush_tlb_address(cpu, context, + pte_virt_offset(_PAGE_ALIGN_UP(addr, PTE_SIZE)), + old_pid, pid); + trace_mmu_pid_flush_tlb_address(cpu, context, + pmd_virt_offset(_PAGE_ALIGN_UP(addr, PMD_SIZE)), + old_pid, pid); + trace_mmu_pid_flush_tlb_address(cpu, context, + pud_virt_offset(_PAGE_ALIGN_UP(addr, PUD_SIZE)), + old_pid, pid); + } +} + +/* + * Flush just one page of a specified user. + */ +static inline void +mmu_pid_flush_tlb_page(mm_context_t *context, + e2k_addr_t addr, + bool is_active, cpumask_t *mm_cpumask, + bool trace_enabled) +{ + int cpu = raw_smp_processor_id(); + unsigned long old_pid, pid; + + old_pid = context->cpumsk[cpu]; + + if (unlikely(old_pid == 0)) { + /* See comment in __flush_tlb_range(). */ + mmu_pid_flush_tlb_mm(context, is_active, mm_cpumask, trace_enabled); + pid = context->cpumsk[cpu]; + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_address(cpu, context, addr, + old_pid, pid); + } + return; + } + + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); + + pid = old_pid; + flush_TLB_page_begin(); + __flush_TLB_page(addr, CTX_HARDWARE(pid)); + /* flush virtual mapping of PTE entry (third level) */ + __flush_TLB_page(pte_virt_offset(addr), CTX_HARDWARE(pid)); + flush_TLB_page_end(); + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_address(cpu, context, addr, old_pid, pid); + trace_mmu_pid_flush_tlb_address(cpu, context, + pte_virt_offset(_PAGE_ALIGN_UP(addr, PTE_SIZE)), + old_pid, pid); + } +} + +/* + * Flush a specified range of pages + */ + +/* If the number of pages to be flushed is below this value, + * then only those pages will be flushed. + * + * Flushing one page takes ~150 cycles, flushing the whole mm + * takes ~400 cycles. Also note that __flush_tlb_range() may + * be called repeatedly for the same process so high values + * are bad. */ +#define FLUSH_TLB_RANGE_MAX_PAGES 8 + +static inline void +mmu_pid_flush_tlb_range(mm_context_t *context, + const e2k_addr_t start, const e2k_addr_t end, + bool is_active, cpumask_t *mm_cpumask, + bool trace_enabled) +{ + const long pages_num = (PAGE_ALIGN_DOWN(end) - PAGE_ALIGN_UP(start)) + / PAGE_SIZE; + int cpu = raw_smp_processor_id(); + unsigned long old_pid, pid; + + BUG_ON(start > end); + + DebugPT("range start 0x%lx end 0x%lx context 0x%lx PID 0x%lx CPU #%d\n", + PAGE_ALIGN_UP(start), PAGE_ALIGN_DOWN(end), + CTX_HARDWARE(context->cpumsk[cpu]), + context->cpumsk[cpu], cpu); + + old_pid = CTX_HARDWARE(context->cpumsk[cpu]); + if (pages_num <= FLUSH_TLB_RANGE_MAX_PAGES) { + unsigned long page, pmd_start, pmd_end; + + if (unlikely(old_pid == 0)) { + /* We were trying to flush a range of pages, + * but someone is flushing the whole mm. + * Now we cannot flush pages (we do not know + * the context) so we have to flush the whole mm. + * + * Even if we will receive the flush ipi we will + * just end up flushing mm twice - which is OK + * considering how rare this case is. */ + goto flush_mm; + } + + count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, pages_num); + + pid = old_pid; + flush_TLB_page_begin(); + for (page = PAGE_ALIGN_UP(start); page < end; + page += PAGE_SIZE) + __flush_TLB_page(page, pid); + /* + * flush virtual mapping of PTE entry (third level) + * + * Needed because Linux assumes that flush_tlb_*() + * interfaces flush both pte and pmd levels (this + * may be changed in future versions, in which case + * this flush can be removed). + */ + pmd_start = pte_virt_offset(round_down(start, PMD_SIZE)); + pmd_end = pte_virt_offset(round_up(end, PMD_SIZE)); + for (page = round_down(pmd_start, PAGE_SIZE); + page < pmd_end; page += PAGE_SIZE) + __flush_TLB_page(page, pid); + flush_TLB_page_end(); + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_range(cpu, context, + start, end, old_pid, pid); + trace_mmu_pid_flush_tlb_range(cpu, context, + pmd_start, pmd_end, old_pid, pid); + } + } else { +flush_mm: + /* Too many pages to flush. + * It is faster to change the context instead. + * If mm != current->active_mm then setting this + * CPU's mm context to 0 will do the trick, + * otherwise we duly increment it. */ + mmu_pid_flush_tlb_mm(context, is_active, mm_cpumask, trace_enabled); + pid = CTX_HARDWARE(context->cpumsk[cpu]); + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_range(cpu, context, + start, end, old_pid, pid); + } + } +} + +static inline void +mmu_pid_flush_pmd_tlb_range(mm_context_t *context, + unsigned long start, unsigned long end, + bool is_active, cpumask_t *mm_cpumask, + bool trace_enabled) +{ + long pages_num; + int cpu = raw_smp_processor_id(); + unsigned long old_pid, pid; + + BUG_ON(start > end); + + end = round_up(end, PMD_SIZE); + start = round_down(start, PMD_SIZE); + + pages_num = (end - start) / PMD_SIZE; + + old_pid = CTX_HARDWARE(context->cpumsk[cpu]); + if (pages_num <= FLUSH_TLB_RANGE_MAX_PAGES) { + unsigned long pmd_start, pmd_end; + e2k_addr_t page; + + if (unlikely(old_pid == 0)) { + /* We were trying to flush a range of pages, + * but someone is flushing the whole mm. + * Now we cannot flush pages (we do not know + * the context) so we have to flush the whole mm. + * + * Even if we will receive the flush ipi we will + * just end up flushing mm twice - which is OK + * considering how rare this case is. */ + goto flush_mm; + } + + count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, + pages_num * (PMD_SIZE / PTE_SIZE)); + + pid = old_pid; + flush_TLB_page_begin(); + for (page = start; page < end; page += PMD_SIZE) + __flush_TLB_page(page, pid); + /* + * flush virtual mapping of PTE entry (third level). + * + * When flushing high order page table entries, + * we must also flush all links below it. E.g. when + * flushing PMD, also flush PMD->PTE link (i.e. DTLB + * entry for address 0xff8000000000|(address >> 9)). + * + * Otherwise the following can happen: + * 1) High-order page is allocated. + * 2) Someone accesses the PMD->PTE link (e.g. half-spec. load) + * and creates invalid entry in DTLB. + * 3) High-order page is split into 4 Kb pages. + * 4) Someone accesses the PMD->PTE link address (e.g. DTLB + * entry probe) and reads the invalid entry created earlier. + */ + pmd_start = pte_virt_offset(round_down(start, PMD_SIZE)); + pmd_end = pte_virt_offset(round_up(end, PMD_SIZE)); + for (page = round_down(pmd_start, PAGE_SIZE); + page < pmd_end; page += PAGE_SIZE) + __flush_TLB_page(page, pid); + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_range(cpu, context, + start, end, old_pid, pid); + trace_mmu_pid_flush_tlb_range(cpu, context, + pmd_start, pmd_end, old_pid, pid); + } + flush_TLB_page_end(); + } else { +flush_mm: + /* Too many pages to flush. + * It is faster to change the context instead. + * If mm != current->active_mm then setting this + * CPU's mm context to 0 will do the trick, + * otherwise we duly increment it. */ + mmu_pid_flush_tlb_mm(context, is_active, mm_cpumask, trace_enabled); + pid = CTX_HARDWARE(context->cpumsk[cpu]); + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_range(cpu, context, + start, end, old_pid, pid); + } + } +} + +/* + * Flush the TLB entries mapping the virtually mapped linear page + * table corresponding to address range [start : end]. + */ +static inline void +mmu_pid_flush_tlb_pgtables(mm_context_t *context, + e2k_addr_t start, e2k_addr_t end, + bool is_active, cpumask_t *mm_cpumask, + bool trace_enabled) +{ + const long pages_num = (PAGE_ALIGN_DOWN(end) - PAGE_ALIGN_UP(start)) + / PAGE_SIZE; + int cpu = raw_smp_processor_id(); + unsigned long old_pid, pid; + + BUG_ON(start > end); + + DebugPT("range start 0x%lx end 0x%lx context 0x%lx pid 0x%lx CPU #%d\n", + PAGE_ALIGN_UP(start), PAGE_ALIGN_DOWN(end), + CTX_HARDWARE(context->cpumsk[cpu]), + context->cpumsk[cpu], cpu); + + old_pid = CTX_HARDWARE(context->cpumsk[cpu]); + if (pages_num <= FLUSH_TLB_RANGE_MAX_PAGES) { + e2k_addr_t page; + unsigned long range_begin, range_end; + + if (unlikely(old_pid == 0)) { + /* We were trying to flush a range of pages, + * but someone is flushing the whole mm. + * Now we cannot flush pages (we do not know + * the context) so we have to flush the whole mm. + * + * Even if we will receive the flush ipi we will + * just end up flushing mm twice - which is OK + * considering how rare this case is. */ + goto flush_mm; + } + + pid = old_pid; + flush_TLB_page_begin(); + + /* flush virtual mapping of PTE entries (third level) */ + range_begin = pte_virt_offset(_PAGE_ALIGN_UP(start, PTE_SIZE)); + range_end = pte_virt_offset(_PAGE_ALIGN_DOWN(end, PTE_SIZE)); + for (page = PAGE_ALIGN_UP(range_begin); page < range_end; + page += PAGE_SIZE) + __flush_TLB_page(page, pid); + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_range(cpu, context, + range_begin, range_end, old_pid, pid); + } + + /* flush virtual mapping of PMD entries (second level) */ + range_begin = pmd_virt_offset(_PAGE_ALIGN_UP(start, PMD_SIZE)); + range_end = pmd_virt_offset(_PAGE_ALIGN_DOWN(end, PMD_SIZE)); + for (page = PAGE_ALIGN_UP(range_begin); page < range_end; + page += PAGE_SIZE) + __flush_TLB_page(page, pid); + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_range(cpu, context, + range_begin, range_end, old_pid, pid); + } + + /* flush virtual mapping of PUD entries (first level) */ + range_begin = pud_virt_offset(_PAGE_ALIGN_UP(start, PUD_SIZE)); + range_end = pud_virt_offset(_PAGE_ALIGN_DOWN(end, PUD_SIZE)); + for (page = PAGE_ALIGN_UP(range_begin); page < range_end; + page += PAGE_SIZE) + __flush_TLB_page(page, pid); + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_range(cpu, context, + range_begin, range_end, old_pid, pid); + } + + flush_TLB_page_end(); + } else { +flush_mm: + /* Too many pages to flush. + * It is faster to change the context instead. + * If mm != current->active_mm then setting this + * CPU's mm context to 0 will do the trick, + * otherwise we duly increment it. */ + mmu_pid_flush_tlb_mm(context, is_active, mm_cpumask, trace_enabled); + pid = CTX_HARDWARE(context->cpumsk[cpu]); + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_range(cpu, context, + start, end, old_pid, pid); + } + } +} + +/* + * Flush a specified range of pages and the TLB entries mapping the virtually + * mapped linear page table corresponding to address range [start : end]. + */ +static inline void +mmu_pid_flush_tlb_range_and_pgtables(mm_context_t *context, + e2k_addr_t start, e2k_addr_t end, + bool is_active, cpumask_t *mm_cpumask, + bool trace_enabled) +{ + mmu_pid_flush_tlb_range(context, start, end, is_active, mm_cpumask, + trace_enabled); + mmu_pid_flush_tlb_pgtables(context, start, end, is_active, mm_cpumask, + trace_enabled); +} + +static inline void +mmu_pid_flush_tlb_page_and_pgtables(mm_context_t *context, + unsigned long address, + bool is_active, cpumask_t *mm_cpumask, + bool trace_enabled) +{ + unsigned long page; + unsigned long start = address, end = address + E2K_MAX_FORMAT; + unsigned long range_begin, range_end; + unsigned long pid = context->cpumsk[raw_smp_processor_id()]; + int cpu = raw_smp_processor_id(); + unsigned long old_pid, new_pid; + + old_pid = context->cpumsk[cpu]; + if (unlikely(old_pid == 0)) { + /* See comment in __flush_tlb_range(). */ + mmu_pid_flush_tlb_mm(context, is_active, mm_cpumask, trace_enabled); + pid = CTX_HARDWARE(context->cpumsk[cpu]); + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_range(cpu, context, + start, end, old_pid, pid); + } + return; + } + + new_pid = old_pid; + pid = CTX_HARDWARE(new_pid); + + flush_TLB_page_begin(); + + /* flush virtual mapping of PUD entries (first level) */ + range_begin = pud_virt_offset(_PAGE_ALIGN_UP(start, PUD_SIZE)); + range_end = pud_virt_offset(_PAGE_ALIGN_DOWN(end, PUD_SIZE)); + for (page = PAGE_ALIGN_UP(range_begin); page < range_end; + page += PAGE_SIZE) + __flush_TLB_page(page, pid); + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_range(cpu, context, + range_begin, range_end, old_pid, new_pid); + } + + /* flush virtual mapping of PMD entries (second level) */ + range_begin = pmd_virt_offset(_PAGE_ALIGN_UP(start, PMD_SIZE)); + range_end = pmd_virt_offset(_PAGE_ALIGN_DOWN(end, PMD_SIZE)); + for (page = PAGE_ALIGN_UP(range_begin); page < range_end; + page += PAGE_SIZE) + __flush_TLB_page(page, pid); + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_range(cpu, context, + range_begin, range_end, old_pid, new_pid); + } + + /* flush virtual mapping of PTE entries (third level) */ + range_begin = pte_virt_offset(_PAGE_ALIGN_UP(start, PTE_SIZE)); + range_end = pte_virt_offset(_PAGE_ALIGN_DOWN(end, PTE_SIZE)); + for (page = PAGE_ALIGN_UP(range_begin); page < range_end; + page += PAGE_SIZE) + __flush_TLB_page(page, pid); + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_range(cpu, context, + range_begin, range_end, old_pid, new_pid); + } + + for (page = PAGE_ALIGN_UP(start); page < end; page += PAGE_SIZE) + __flush_TLB_page(page, pid); + if (unlikely(trace_enabled)) { + trace_mmu_pid_flush_tlb_range(cpu, context, + start, end, old_pid, pid); + } + + flush_TLB_page_end(); +} + +#ifdef CONFIG_SMP + +/* + * Flush a specified user mapping + */ + +static inline void +mmu_pid_smp_flush_tlb_mm(mm_context_t *context, + void (*flush_ipi_func)(void *data), void *flush_ipi_data, + bool is_active, cpumask_t *mm_cpumask, + bool trace_enabled) +{ + preempt_disable(); + + /* Signal to all users of this mm that it has been flushed. + * Invalid context will be updated while activating or switching to. */ + memset(context->cpumsk, 0, nr_cpu_ids * sizeof(context->cpumsk[0])); + + /* See comment about memory barriers in do_switch_mm(). */ + smp_mb(); + + mmu_pid_flush_tlb_mm(context, is_active, mm_cpumask, trace_enabled); + + /* Check that mm_cpumask() has some other CPU set */ + if (cpumask_any_but(mm_cpumask, smp_processor_id()) < nr_cpu_ids) { + /* Send flush ipi to all other cpus in mm_cpumask(). */ + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); + smp_call_function_many(mm_cpumask, flush_ipi_func, + flush_ipi_data, 1); + } + + preempt_enable(); +} + +/* + * Flush a single page from TLB + */ + +static inline void +mmu_pid_smp_flush_tlb_page(mm_context_t *context, const e2k_addr_t addr, + void (*flush_ipi_func)(void *data), void *flush_ipi_data, + bool is_active, cpumask_t *mm_cpumask, + bool trace_enabled) +{ + int i, cpu; + + preempt_disable(); + + cpu = smp_processor_id(); + + /* See comment in smp_flush_tlb_range() */ + for (i = 0; i < nr_cpu_ids; i++) { + if (i == cpu) + continue; + context->cpumsk[i] = 0; + } + + mmu_pid_flush_tlb_page(context, addr, is_active, mm_cpumask, + trace_enabled); + + /* See comment about memory barriers in do_switch_mm(). */ + smp_mb(); + + /* Check that mm_cpumask() has some other CPU set */ + if (cpumask_any_but(mm_cpumask, cpu) < nr_cpu_ids) { + /* Send flush ipi to all other cpus in mm_cpumask(). */ + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); + smp_call_function_many(mm_cpumask, flush_ipi_func, + flush_ipi_data, 1); + } + + preempt_enable(); +} + +/* + * Flush a range of pages + */ + +static inline void +mmu_pid_smp_flush_tlb_range(mm_context_t *context, + const e2k_addr_t start, const e2k_addr_t end, + void (*flush_ipi_func)(void *data), void *flush_ipi_data, + bool is_active, cpumask_t *mm_cpumask, + bool trace_enabled) +{ + int cpu, i; + + preempt_disable(); + + cpu = smp_processor_id(); + + /* Signal to all users of this mm that it has been flushed. + * Invalid context will be updated while activating or switching to. + * + * Things to consider: + * + * 1) Clearing the whole context for CPUs to which we send the flush + * ipi looks unnecessary, but is needed to avoid race conditions. The + * problem is that there is a window between reading mm_cpumask() and + * deciding which context should be set to 0. In that window situation + * could have changed, so the only safe way is to set mm context on + * ALL cpus to 0. + * + * 2) Setting it to 0 essentially means that the cpus which receive the + * flush ipis cannot flush only a range of pages because they do not + * know the context, so they will flush the whole mm. + * + * 3) TODO FIXME This way of doing things is OK for 2 CPUs, for 4 CPUs, + * but it may become a problem for e2s with its 64 CPUs if there is a + * really-multi-threaded application running. If this is the case it + * would be better to implement scheme which will remember pending TLB + * flush requests. But such a scheme will greatly increase struct mm + * size (64 * 4 * 32 = 8 Kb for 64-processors system with a maximum + * of 4 simultaneously pending flushes each taking up 32 bytes). + * + * This problem (3) only gets worse when we are making all pages valid + * since EVERY mmap/sys_brk and some other calls will end up sending + * 63 flush ipis which will flush all the TLBs. + */ + for (i = 0; i < nr_cpu_ids; i++) { + if (i == cpu) + /* That being said, current CPU can still + * flush only the given range of pages. */ + continue; + context->cpumsk[i] = 0; + } + + mmu_pid_flush_tlb_range(context, start, end, is_active, mm_cpumask, + trace_enabled); + + /* See comment about memory barriers in do_switch_mm(). */ + smp_mb(); + + /* Check that mm_cpumask() has some other CPU set */ + if (cpumask_any_but(mm_cpumask, cpu) < nr_cpu_ids) { + /* Send flush ipi to all other cpus in mm_cpumask(). */ + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); + smp_call_function_many(mm_cpumask, flush_ipi_func, + flush_ipi_data, 1); + } + + preempt_enable(); +} + +/* + * As native_smp_flush_tlb_range() but for pmd's + */ +static inline void +mmu_pid_smp_flush_pmd_tlb_range(mm_context_t *context, + const e2k_addr_t start, const e2k_addr_t end, + void (*flush_ipi_func)(void *data), void *flush_ipi_data, + bool is_active, cpumask_t *mm_cpumask, + bool trace_enabled) + +{ + int cpu, i; + + preempt_disable(); + + cpu = smp_processor_id(); + + /* See comment in smp_flush_tlb_range() */ + for (i = 0; i < nr_cpu_ids; i++) { + if (i == cpu) + /* That being said, current CPU can still + * flush only the given range of pages. */ + continue; + context->cpumsk[i] = 0; + } + + mmu_pid_flush_pmd_tlb_range(context, start, end, is_active, mm_cpumask, + trace_enabled); + + /* See comment about memory barriers in do_switch_mm(). */ + smp_mb(); + + /* Check that mm_cpumask() has some other CPU set */ + if (cpumask_any_but(mm_cpumask, cpu) < nr_cpu_ids) { + /* Send flush ipi to all other cpus in mm_cpumask(). */ + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); + smp_call_function_many(mm_cpumask, flush_ipi_func, + flush_ipi_data, 1); + } + + preempt_enable(); +} + +/* + * Flush a range of pages and page tables. + */ + +static inline void +mmu_pid_smp_flush_tlb_range_and_pgtables(mm_context_t *context, + const e2k_addr_t start, const e2k_addr_t end, + void (*flush_ipi_func)(void *data), void *flush_ipi_data, + bool is_active, cpumask_t *mm_cpumask, + bool trace_enabled) +{ + int i, cpu; + + preempt_disable(); + + cpu = smp_processor_id(); + + /* See comment in smp_flush_tlb_range() */ + for (i = 0; i < nr_cpu_ids; i++) { + if (i == cpu) + continue; + context->cpumsk[i] = 0; + } + + mmu_pid_flush_tlb_range_and_pgtables(context, start, end, is_active, + mm_cpumask, trace_enabled); + + /* See comment about memory barriers in do_switch_mm(). */ + smp_mb(); + + /* Check that mm_cpumask() has some other CPU set */ + if (cpumask_any_but(mm_cpumask, cpu) < nr_cpu_ids) { + /* Send flush ipi to all other cpus in mm_cpumask(). */ + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); + smp_call_function_many(mm_cpumask, flush_ipi_func, + flush_ipi_data, 1); + } + + preempt_enable(); +} + +#endif /* CONFIG_SMP */ + +#endif /* _E2K_TLB_CONTEXT_H_ */ diff --git a/arch/e2k/include/asm/tlb_regs_access.h b/arch/e2k/include/asm/tlb_regs_access.h index 1d20f93..541bb9f 100644 --- a/arch/e2k/include/asm/tlb_regs_access.h +++ b/arch/e2k/include/asm/tlb_regs_access.h @@ -140,13 +140,14 @@ get_va_tlb_state(tlb_line_state_t *tlb, e2k_addr_t addr, bool large_page) tlb->huge = large_page; for (set_no = 0; set_no < NATIVE_TLB_SETS_NUM; set_no++) { - set_state = &tlb->sets[set_no]; tlb_tag_t tlb_tag; pte_t tlb_entry; + + set_state = &tlb->sets[set_no]; tlb_tag = get_va_tlb_set_tag(addr, set_no, large_page); tlb_entry = get_va_tlb_set_entry(addr, set_no, large_page); set_state->tlb_tag = tlb_tag; - set_state->tlb_entry; + set_state->tlb_entry = tlb_entry; } } diff --git a/arch/e2k/include/asm/tlbflush.h b/arch/e2k/include/asm/tlbflush.h index fabcd44..23f596a 100644 --- a/arch/e2k/include/asm/tlbflush.h +++ b/arch/e2k/include/asm/tlbflush.h @@ -36,6 +36,17 @@ extern void __flush_tlb_range_and_pgtables(struct mm_struct *mm, extern void __flush_tlb_address(e2k_addr_t addr); extern void __flush_tlb_address_pgtables(e2k_addr_t addr); +extern void native_smp_flush_tlb_all(void); +extern void native_smp_flush_tlb_mm(struct mm_struct *mm); +extern void native_smp_flush_tlb_page(struct vm_area_struct *vma, + e2k_addr_t addr); +extern void native_smp_flush_tlb_range(struct mm_struct *mm, + e2k_addr_t start, e2k_addr_t end); +extern void native_smp_flush_pmd_tlb_range(struct mm_struct *mm, + e2k_addr_t start, e2k_addr_t end); +extern void native_smp_flush_tlb_range_and_pgtables(struct mm_struct *mm, + e2k_addr_t start, e2k_addr_t end); + #ifdef CONFIG_COPY_USER_PGD_TO_KERNEL_ROOT_PT extern void __flush_cpu_root_pt_mm(struct mm_struct *mm); extern void __flush_cpu_root_pt(void); @@ -86,17 +97,6 @@ extern void __flush_cpu_root_pt(void); #include -extern void native_smp_flush_tlb_all(void); -extern void native_smp_flush_tlb_mm(struct mm_struct *mm); -extern void native_smp_flush_tlb_page(struct vm_area_struct *vma, - e2k_addr_t addr); -extern void native_smp_flush_tlb_range(struct mm_struct *mm, - e2k_addr_t start, e2k_addr_t end); -extern void native_smp_flush_pmd_tlb_range(struct mm_struct *mm, - e2k_addr_t start, e2k_addr_t end); -extern void native_smp_flush_tlb_range_and_pgtables(struct mm_struct *mm, - e2k_addr_t start, e2k_addr_t end); - #define flush_tlb_all native_smp_flush_tlb_all #define flush_tlb_mm native_smp_flush_tlb_mm #define flush_tlb_page(vma, addr) native_smp_flush_tlb_page(vma, addr) @@ -126,6 +126,8 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, { } +#ifdef CONFIG_KVM_HOST_MODE #include +#endif /* CONFIG_KVM_HOST_MODE */ #endif /* _E2K_TLBFLUSH_H */ diff --git a/arch/e2k/include/asm/trace-tlb-flush.h b/arch/e2k/include/asm/trace-tlb-flush.h new file mode 100644 index 0000000..f626cc2 --- /dev/null +++ b/arch/e2k/include/asm/trace-tlb-flush.h @@ -0,0 +1,188 @@ +#if !defined(_ASM_E2K_TRACE_TLB_FLUSH_H) || defined(TRACE_HEADER_MULTI_READ) +#define _ASM_E2K_TRACE_TLB_FLUSH_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM tlb + +TRACE_EVENT( + mmu_pid_flush_tlb_all, + + TP_PROTO(int cpu_id), + + TP_ARGS(cpu_id), + + TP_STRUCT__entry( + __field(int, cpu_id) + ), + + TP_fast_assign( + __entry->cpu_id = cpu_id; + ), + + TP_printk("cpu #%d flush TLB all", __entry->cpu_id) +); + +TRACE_EVENT( + mmu_pid_flush_tlb_mm, + + TP_PROTO(int cpu_id, mm_context_t *context, bool is_active, + unsigned long old_pid, unsigned long new_pid + ), + + TP_ARGS(cpu_id, context, is_active, old_pid, new_pid), + + TP_STRUCT__entry( + __field(int, cpu_id) + __field(mm_context_t *, context) + __field(bool, is_active) + __field(unsigned long, old_pid) + __field(unsigned long, new_pid) + ), + + TP_fast_assign( + __entry->cpu_id = cpu_id; + __entry->context = context; + __entry->is_active = is_active; + __entry->old_pid = old_pid; + __entry->new_pid = new_pid; + ), + + TP_printk("cpu #%d mm flushed pid 0x%lx %s 0x%lx", + __entry->cpu_id, + __entry->old_pid, (__entry->is_active) ? "updated to" : "zeroed", + __entry->new_pid + ) +); + +TRACE_EVENT( + mmu_pid_flush_tlb_address, + + TP_PROTO(int cpu_id, mm_context_t *context, e2k_addr_t addr, + unsigned long old_pid, unsigned long new_pid + ), + + TP_ARGS(cpu_id, context, addr, old_pid, new_pid), + + TP_STRUCT__entry( + __field(int, cpu_id) + __field(mm_context_t *, context) + __field(e2k_addr_t, addr) + __field(unsigned long, old_pid) + __field(unsigned long, new_pid) + ), + + TP_fast_assign( + __entry->cpu_id = cpu_id; + __entry->context = context; + __entry->addr = addr; + __entry->old_pid = old_pid; + __entry->new_pid = new_pid; + ), + + TP_printk("cpu #%d flushed addr %px and %s 0x%lx", + __entry->cpu_id, (void *)__entry->addr, + (__entry->old_pid == 0) ? "created new pid" : "pid former", + __entry->new_pid + ) +); + +TRACE_EVENT( + mmu_pid_flush_tlb_range, + + TP_PROTO(int cpu_id, mm_context_t *context, + e2k_addr_t start, e2k_addr_t end, + unsigned long old_pid, unsigned long new_pid + ), + + TP_ARGS(cpu_id, context, start, end, old_pid, new_pid), + + TP_STRUCT__entry( + __field(int, cpu_id) + __field(mm_context_t *, context) + __field(e2k_addr_t, start) + __field(e2k_addr_t, end) + __field(unsigned long, old_pid) + __field(unsigned long, new_pid) + ), + + TP_fast_assign( + __entry->cpu_id = cpu_id; + __entry->context = context; + __entry->start = start; + __entry->end = end; + __entry->old_pid = old_pid; + __entry->new_pid = new_pid; + ), + + TP_printk("cpu #%d flushed %s from %px to %px and %s 0x%lx", + __entry->cpu_id, + (__entry->old_pid != __entry->new_pid) ? + "all mm instead of range " : "only range", + (void *)__entry->start, (void *)__entry->end, + (__entry->old_pid != __entry->new_pid) ? + "created new pid" : "pid former", + __entry->new_pid + ) +); + +TRACE_EVENT( + va_tlb_state, + + TP_PROTO(e2k_addr_t address), + + TP_ARGS(address), + + TP_STRUCT__entry( + __field( e2k_addr_t, address ) + __field_struct( tlb_line_state_t, line ) + __field_struct( tlb_line_state_t, huge_line ) + __field( u64, dtlb_entry ) + __field( unsigned long, mmu_pptb ) + __field( unsigned long, mmu_pid ) + ), + + TP_fast_assign( + __entry->address = address; + get_va_tlb_state(&__entry->line, address, false); + get_va_tlb_state(&__entry->huge_line, address, true); + __entry->dtlb_entry = get_MMU_DTLB_ENTRY(address); + __entry->mmu_pptb = NATIVE_READ_MMU_U_PPTB_REG(); + __entry->mmu_pid = NATIVE_READ_MMU_PID_REG(); + ), + + TP_printk(" 0x%016lx : dtlb 0x%016llx U_PPTB 0x%lx PID 0x%lx\n" + " TLB set #0 tag 0x%016lx entry 0x%016lx\n" + " TLB set #1 tag 0x%016lx entry 0x%016lx\n" + " TLB set #2 tag 0x%016lx entry 0x%016lx\n" + " TLB set #3 tag 0x%016lx entry 0x%016lx\n" + " huge TLB set #2 tag 0x%016lx entry 0x%016lx\n" + " huge TLB set #3 tag 0x%016lx entry 0x%016lx", + __entry->address, __entry->dtlb_entry, + __entry->mmu_pptb, __entry->mmu_pid, + __entry->line.sets[0].tlb_tag, + pte_val(__entry->line.sets[0].tlb_entry), + __entry->line.sets[1].tlb_tag, + pte_val(__entry->line.sets[1].tlb_entry), + __entry->line.sets[2].tlb_tag, + pte_val(__entry->line.sets[2].tlb_entry), + __entry->line.sets[3].tlb_tag, + pte_val(__entry->line.sets[3].tlb_entry), + __entry->huge_line.sets[2].tlb_tag, + pte_val(__entry->huge_line.sets[2].tlb_entry), + __entry->huge_line.sets[3].tlb_tag, + pte_val(__entry->huge_line.sets[3].tlb_entry) + ) +); + +#endif /* _ASM_E2K_TRACE_TLB_FLUSH_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../arch/e2k/include/asm +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace-tlb-flush + +/* This part must be outside protection */ +#include diff --git a/arch/e2k/include/asm/trap_table.h b/arch/e2k/include/asm/trap_table.h index ec60a42..91ee634 100644 --- a/arch/e2k/include/asm/trap_table.h +++ b/arch/e2k/include/asm/trap_table.h @@ -162,6 +162,18 @@ extern const protected_system_call_func sys_call_table_entry8[NR_syscalls]; extern const system_call_func sys_protcall_table[NR_syscalls]; extern const system_call_func sys_call_table_deprecated[NR_syscalls]; +#ifndef CONFIG_CPU_HAS_FILL_INSTRUCTION +#define native_restore_some_values_after_fill(__regs, __from, __return_to_user) \ +do { \ + __regs = current_thread_info()->pt_regs; \ + if (!__builtin_constant_p(from)) \ + __from = current->thread.fill.from; \ + __return_to_user = current->thread.fill.return_to_user; \ +} while (false) +#else /* CONFIG_CPU_HAS_FILL_INSTRUCTION */ +#define native_restore_some_values_after_fill(__regs, __from, __return_to_user) +#endif /* !CONFIG_CPU_HAS_FILL_INSTRUCTION */ + #if !defined(CONFIG_PARAVIRT_GUEST) && !defined(CONFIG_KVM_GUEST_KERNEL) /* it is native kernel without any virtualization */ /* or it is host kernel with virtualization support */ @@ -205,6 +217,10 @@ kvm_mmio_page_fault(struct pt_regs *regs, trap_cellar_t *tcellar) return 0; } +#define restore_some_values_after_fill(__regs, __from, __return_to_user) \ + native_restore_some_values_after_fill(__regs, __from, \ + __return_to_user) + #ifndef CONFIG_VIRTUALIZATION /* it is native kernel without any virtualization */ diff --git a/arch/e2k/include/asm/traps.h b/arch/e2k/include/asm/traps.h index fa06f70..770e6c5 100644 --- a/arch/e2k/include/asm/traps.h +++ b/arch/e2k/include/asm/traps.h @@ -61,15 +61,16 @@ extern int constrict_user_data_stack(struct pt_regs *regs, unsigned long incr); extern int expand_user_data_stack(struct pt_regs *regs, unsigned long incr); extern void do_notify_resume(struct pt_regs *regs); -extern int parse_getsp_operation(struct trap_pt_regs *regs, int *incr); - extern void coredump_in_future(void); -enum { - GETSP_OP_IGNORE, +enum getsp_action { + GETSP_OP_FAIL = 1, + GETSP_OP_SIGSEGV, GETSP_OP_INCREMENT, GETSP_OP_DECREMENT }; +extern enum getsp_action parse_getsp_operation(const struct pt_regs *regs, + int *incr, void __user **fault_addr); static inline unsigned int user_trap_init(void) { @@ -124,8 +125,10 @@ static inline void kernel_trap_mask_init(void) { WRITE_OSEM_REG(user_trap_init()); #ifdef CONFIG_KVM_HOST_MODE - machine.rwd(E2K_REG_HCEM, user_hcall_init()); - machine.rwd(E2K_REG_HCEB, (unsigned long) __hypercalls_begin); + if (!paravirt_enabled()) { + machine.rwd(E2K_REG_HCEM, user_hcall_init()); + machine.rwd(E2K_REG_HCEB, (unsigned long) __hypercalls_begin); + } #endif } diff --git a/arch/e2k/include/uapi/asm/e2k_api.h b/arch/e2k/include/uapi/asm/e2k_api.h index 8348659..7da1619 100644 --- a/arch/e2k/include/uapi/asm/e2k_api.h +++ b/arch/e2k/include/uapi/asm/e2k_api.h @@ -104,7 +104,7 @@ typedef void *__e2k_ptr_t; #ifndef __ASSEMBLY__ -typedef unsigned long long __e2k_syscall_arg_t; +typedef unsigned long __e2k_syscall_arg_t; #define E2K_SYSCALL_CLOBBERS \ "ctpr1", "ctpr2", "ctpr3", \ diff --git a/arch/e2k/include/uapi/asm/kvm.h b/arch/e2k/include/uapi/asm/kvm.h index 2147e9a..dfa94cc 100644 --- a/arch/e2k/include/uapi/asm/kvm.h +++ b/arch/e2k/include/uapi/asm/kvm.h @@ -4,6 +4,7 @@ /* * KVM e2k specific structures and definitions * + * Note: you must update KVM_ARCH_API_VERSION if you change this interface. */ #ifndef __ASSEMBLY__ @@ -11,6 +12,13 @@ #include #include +/* + * e2k KVM api is not yet stable, so there is specific e2k arch + * refinement of the interface in format yymmdd so that the version + * number always monotonously increased + */ +#define KVM_ARCH_API_VERSION 210512 + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 /* Select e2k specific features in */ @@ -68,6 +76,16 @@ /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 +/* + * e2k arch-dependent limits for the nr of threads virtual CPUs + */ + +/* KVM manage */ +#define KVM_VM_MAX_LIMIT 1024 /* max number of VM IDs at use */ + +/* VCPUs manage */ +#define KVM_MAX_VCPUS 64 + typedef struct kvm_memory_alias { __u32 slot; /* this has a different namespace than memory slots */ __u32 flags; @@ -353,6 +371,7 @@ typedef struct kvm_guest_area_reserve { #define KVM_GUEST_NBSR_BASE_NODE_1 0x00040000 #define KVM_GUEST_NBSR_BASE_NODE_2 0x00080000 #define KVM_GUEST_NBSR_BASE_NODE_3 0x00100000 +#define KVM_HOST_INFO_VRAM_SIZE 0x00200000 /* flags of IO ports area mapping for guest */ #define KVM_IO_PORTS_MMAP 0x1ff00000000 /* > max physical memory */ @@ -450,8 +469,10 @@ typedef struct kvm_guest_nbsr_state { #ifndef __ASSEMBLY__ +#define KVM_GET_ARCH_API_VERSION _IO(KVMIO, 0xe1) +#define KVM_VCPU_THREAD_SETUP _IO(KVMIO, 0xe0) #define KVM_GET_GUEST_ADDRESS _IOWR(KVMIO, 0xe2, unsigned long *) -#define KVM_SETUP_VCPU _IO(KVMIO, 0xe3) +#define KVM_RESET_E2K_VCPU _IO(KVMIO, 0xe3) #define KVM_ALLOC_GUEST_AREA _IOWR(KVMIO, 0xe4, \ kvm_guest_area_alloc_t) #define KVM_VCPU_GUEST_STARTUP _IOW(KVMIO, 0xe5, \ @@ -480,8 +501,11 @@ typedef struct kvm_guest_nbsr_state { /* e2k-specific exit reasons from KVM to userspace assistance */ #define KVM_EXIT_E2K_NOTIFY_IO 33 +#define KVM_EXIT_E2K_SHUTDOWN 36 #define KVM_EXIT_E2K_RESTART 37 #define KVM_EXIT_E2K_PANIC 38 +#define KVM_EXIT_E2K_INTR 39 +#define KVM_EXIT_E2K_UNKNOWN 44 #endif /* __ASSEMBLY__ */ diff --git a/arch/e2k/include/uapi/asm/siginfo.h b/arch/e2k/include/uapi/asm/siginfo.h index f73f205..ddc8b3f 100644 --- a/arch/e2k/include/uapi/asm/siginfo.h +++ b/arch/e2k/include/uapi/asm/siginfo.h @@ -6,7 +6,7 @@ #define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) #define __ARCH_SI_TRAPNO -#define __ARCH_SI_BAND_T long +#define __ARCH_SI_BAND_T int #include diff --git a/include/linux/clk.h b/include/linux/clk.h index 18b7b95..f0b915a 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -798,7 +798,11 @@ static inline int __must_check devm_clk_bulk_get_all(struct device *dev, static inline struct clk *devm_get_clk_from_child(struct device *dev, struct device_node *np, const char *con_id) { +#ifdef CONFIG_MCST + return ERR_PTR(-ENOENT); +#else return NULL; +#endif } static inline void clk_put(struct clk *clk) {} diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 7743363..21281bc 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -118,6 +118,15 @@ struct ftrace_likely_data { #define notrace __attribute__((__no_instrument_function__)) #endif +#ifdef CONFIG_MCST +/* Some functions cannot be traced only on the host mode */ +#ifdef CONFIG_KVM_HOST_KERNEL +#define notrace_on_host notrace +#else /* !CONFIG_KVM_HOST_KERNEL */ +#define notrace_on_host +#endif /* CONFIG_KVM_HOST_KERNEL */ +#endif /* CONFIG_MCST */ + /* * it doesn't make sense on ARM (currently the only user of __naked) * to trace naked functions because then mcount is called without diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d19fd4e..d7601f9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -578,9 +578,6 @@ static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu) if (tmp == vcpu) return idx; BUG(); -#ifdef CONFIG_MCST - return 0; -#endif } #define kvm_for_each_memslot(memslot, slots) \ diff --git a/include/uapi/drm/mga2_drm.h b/include/uapi/drm/mga2_drm.h index c71e2ce..dc40065 100644 --- a/include/uapi/drm/mga2_drm.h +++ b/include/uapi/drm/mga2_drm.h @@ -15,7 +15,7 @@ #define DRM_IOCTL_MGA2_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_MGA2_GEM_CREATE, struct drm_mga2_gem_create) #define DRM_IOCTL_MGA2_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_MGA2_GEM_MMAP, struct drm_mga2_gem_mmap) #define DRM_IOCTL_MGA2_SYNC DRM_IO(DRM_COMMAND_BASE + DRM_MGA2_SYNC) -#define DRM_IOCTL_MGA2_INFO DRM_IOW(DRM_COMMAND_BASE + DRM_MGA2_INFO, struct drm_mga2_info) +#define DRM_IOCTL_MGA2_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_MGA2_INFO, struct drm_mga2_info) #define DRM_IOCTL_MGA2_AUC2 DRM_IOW(DRM_COMMAND_BASE + DRM_MGA2_AUC2, struct drm_mga2_bctrl) #define DRM_BCTRL_PAGES_NR 1 diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8649422..849daa0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -366,6 +366,11 @@ struct bpf_stack_build_id { }; }; +#ifdef __ptr128__ +#undef __aligned_u64 +#define __aligned_u64 char * +#endif + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h index 7a4074f..7fea0fd 100644 --- a/include/uapi/linux/if.h +++ b/include/uapi/linux/if.h @@ -132,10 +132,6 @@ enum net_device_flags { #define IFF_ECHO IFF_ECHO #endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */ -/* #ifdef CONFIG_MCST */ -#define IFF_SPINWAIT 0x80000 /* dev_rx_action thread pools, not schedule */ -/* #endif */ - #define IFF_VOLATILE (IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\ IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT)