linux-headers-5.4.0-2.11

This commit is contained in:
Alibek Omarov 2021-07-14 01:51:52 +03:00
parent f99e7af53c
commit f830966167
50 changed files with 3410 additions and 1963 deletions

View File

@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 4
SUBLEVEL = 91
EXTRAVERSION = -2.9
EXTRAVERSION = -2.11
NAME = Kleptomaniac Octopus
# *DOCUMENTATION*

View File

@ -77,6 +77,15 @@ extern void epic_send_IPI_mask_allbutself(const struct cpumask *mask,
extern void epic_wait_icr_idle(void);
extern void clear_cepic(void);
extern bool pcsm_adjust_enable;
struct pcs_handle {
void (*pcs_interrupt)(void);
};
extern void register_pcs_handle(const struct pcs_handle *handle);
extern void unregister_pcs_handle(void);
extern __visible void epic_smp_timer_interrupt(struct pt_regs *regs);
extern __visible void epic_smp_spurious_interrupt(struct pt_regs *regs);
extern __visible void epic_smp_error_interrupt(struct pt_regs *regs);

View File

@ -0,0 +1,852 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* include/asm-e2k/copy-hw-stacks.h
*
* Copyright 2021 mcst.ru
*/
#ifndef _E2K_COPY_HW_STACKS_H
#define _E2K_COPY_HW_STACKS_H
#include <linux/types.h>
#include <asm/mman.h>
#include <asm/pv_info.h>
#include <asm/process.h>
#include <asm/kvm/trace-hw-stacks.h>
#undef DEBUG_PV_UST_MODE
#undef DebugUST
#define DEBUG_PV_UST_MODE 0 /* guest user stacks debug */
#define DebugUST(fmt, args...) \
({ \
if (debug_guest_ust) \
pr_info("%s(): " fmt, __func__, ##args); \
})
#undef DEBUG_PV_SYSCALL_MODE
#define DEBUG_PV_SYSCALL_MODE 0 /* syscall injection debugging */
#if DEBUG_PV_UST_MODE || DEBUG_PV_SYSCALL_MODE
extern bool debug_guest_ust;
#else
#define debug_guest_ust false
#endif /* DEBUG_PV_UST_MODE || DEBUG_PV_SYSCALL_MODE */
#ifndef CONFIG_VIRTUALIZATION
/* it native kernel without virtualization support */
#else /* CONFIG_VIRTUALIZATION */
/* It is native host kernel with virtualization support */
/* or paravirtualized host and guest */
/* or native guest kernel
#include <asm/kvm/process.h>
*/
#endif /* ! CONFIG_VIRTUALIZATION */
typedef void (*trace_ps_frame_func_t)(kernel_mem_ps_t *base, kernel_mem_ps_t *frame);
typedef void (*trace_pcs_frame_func_t)(e2k_mem_crs_t *base, e2k_mem_crs_t *frame);
static inline void trace_proc_stack_frames(kernel_mem_ps_t *dst_ps_base,
kernel_mem_ps_t *src_ps_base, u64 ps_size,
trace_ps_frame_func_t trace_func)
{
int qreg, qreg_num;
kernel_mem_ps_t *dst_ps_frame, *src_ps_frame;
kernel_mem_ps_t rw;
qreg_num = ps_size / EXT_4_NR_SZ;
for (qreg = qreg_num - 1; qreg >= 0; qreg--) {
dst_ps_frame = &dst_ps_base[qreg];
src_ps_frame = &src_ps_base[qreg];
rw.word_lo = src_ps_frame->word_lo;
if (machine.native_iset_ver < E2K_ISET_V5) {
rw.word_hi = src_ps_frame->word_hi;
rw.ext_lo = src_ps_frame->ext_lo;
rw.ext_hi = src_ps_frame->ext_hi;
} else {
rw.word_hi = src_ps_frame->ext_lo;
rw.ext_lo = src_ps_frame->word_hi;
rw.ext_hi = src_ps_frame->ext_hi;
}
trace_func(dst_ps_frame, &rw);
}
}
static inline void trace_chain_stack_frames(e2k_mem_crs_t *dst_pcs_base,
e2k_mem_crs_t *src_pcs_base, u64 pcs_size,
trace_pcs_frame_func_t trace_func)
{
int crs_no, crs_num;
e2k_mem_crs_t *dst_pcs_frame, *src_pcs_frame;
e2k_mem_crs_t crs;
unsigned long flags;
crs_num = pcs_size / sizeof(crs);
raw_all_irq_save(flags);
for (crs_no = crs_num - 1; crs_no >= 0; crs_no--) {
dst_pcs_frame = &dst_pcs_base[crs_no];
src_pcs_frame = &src_pcs_base[crs_no];
crs = *src_pcs_frame;
trace_func(dst_pcs_frame, &crs);
}
raw_all_irq_restore(flags);
}
static inline void trace_host_hva_area(u64 *hva_base, u64 hva_size)
{
int line_no, line_num;
u64 *dst_hva_line;
unsigned long flags;
line_num = hva_size / (sizeof(u64) * 4);
raw_all_irq_save(flags);
for (line_no = line_num - 1; line_no >= 0; line_no--) {
dst_hva_line = &hva_base[line_no * 4];
trace_host_hva_area_line(dst_hva_line, (sizeof(u64) * 4));
}
if (line_num * (sizeof(u64) * 4) < hva_size) {
dst_hva_line = &hva_base[line_no * 4];
trace_host_hva_area_line(dst_hva_line,
hva_size - line_num * (sizeof(u64) * 4));
}
raw_all_irq_restore(flags);
}
static __always_inline void
native_kernel_hw_stack_frames_copy(u64 *dst, const u64 *src, unsigned long size)
{
void *dst_tail;
const void *src_tail;
u64 copied;
int i;
/*
* Kernel does not use FP registers so do not copy them.
* This only applies to CPUs before V5 instruction set
* (since V5 FP registers become general-purpose QP registers).
*/
if (cpu_has(CPU_FEAT_QPREG)) {
#pragma loop count (10)
for (i = 0; i < size / 64; i++)
E2K_TAGGED_MEMMOVE_64(&dst[8 * i], &src[8 * i]);
copied = round_down(size, 64);
dst_tail = (void *) dst + copied;
src_tail = (void *) src + copied;
} else {
#pragma loop count (5)
for (i = 0; i < size / 128; i++)
E2K_TAGGED_MEMMOVE_128_RF_V2(&dst[16 * i],
&src[16 * i]);
copied = round_down(size, 128);
dst_tail = (void *) dst + copied;
src_tail = (void *) src + copied;
if (size & 64) {
E2K_TAGGED_MEMMOVE_64(dst_tail, src_tail);
dst_tail += 64;
src_tail += 64;
}
}
if (size & 32)
E2K_TAGGED_MEMMOVE_32(dst_tail, src_tail);
}
static __always_inline void
native_collapse_kernel_pcs(u64 *dst, const u64 *src, u64 spilled_size)
{
e2k_pcsp_hi_t k_pcsp_hi;
u64 size;
int i;
DebugUST("current host chain stack index 0x%x, PCSHTP 0x%llx\n",
NATIVE_NV_READ_PCSP_HI_REG().PCSP_hi_ind,
NATIVE_READ_PCSHTP_REG_SVALUE());
NATIVE_FLUSHC;
k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
size = k_pcsp_hi.PCSP_hi_ind - spilled_size;
BUG_ON(!IS_ALIGNED(size, ALIGN_PCSTACK_TOP_SIZE) || (s64) size < 0);
#pragma loop count (2)
for (i = 0; i < size / 32; i++) {
u64 v0, v1, v2, v3;
v0 = src[4 * i];
v1 = src[4 * i + 1];
v2 = src[4 * i + 2];
v3 = src[4 * i + 3];
dst[4 * i] = v0;
dst[4 * i + 1] = v1;
dst[4 * i + 2] = v2;
dst[4 * i + 3] = v3;
}
k_pcsp_hi.PCSP_hi_ind -= spilled_size;
NATIVE_NV_NOIRQ_WRITE_PCSP_HI_REG(k_pcsp_hi);
DebugUST("move spilled chain part from host top %px to "
"bottom %px, size 0x%llx\n",
src, dst, size);
DebugUST("host kernel chain stack index is now 0x%x, "
"guest user PCSHTP 0x%llx\n",
k_pcsp_hi.PCSP_hi_ind, spilled_size);
}
static __always_inline void
native_collapse_kernel_ps(u64 *dst, const u64 *src, u64 spilled_size)
{
e2k_psp_hi_t k_psp_hi;
u64 size;
DebugUST("current host procedure stack index 0x%x, PSHTP 0x%x\n",
NATIVE_NV_READ_PSP_HI_REG().PSP_hi_ind,
NATIVE_NV_READ_PSHTP_REG().PSHTP_ind);
NATIVE_FLUSHR;
k_psp_hi = NATIVE_NV_READ_PSP_HI_REG();
size = k_psp_hi.PSP_hi_ind - spilled_size;
BUG_ON(!IS_ALIGNED(size, ALIGN_PSTACK_TOP_SIZE) || (s64) size < 0);
prefetchw_range(src, size);
native_kernel_hw_stack_frames_copy(dst, src, size);
k_psp_hi.PSP_hi_ind -= spilled_size;
NATIVE_NV_NOIRQ_WRITE_PSP_HI_REG(k_psp_hi);
DebugUST("move spilled procedure part from host top %px to "
"bottom %px, size 0x%llx\n",
src, dst, size);
DebugUST("host kernel procedure stack index is now 0x%x, "
"guest user PSHTP 0x%llx\n",
k_psp_hi.PSP_hi_ind, spilled_size);
}
#if defined(CONFIG_PARAVIRT_GUEST)
/* paravirtualized kernel (host and guest) */
#include <asm/paravirt/copy-hw-stacks.h>
#elif defined(CONFIG_KVM_GUEST_KERNEL)
/* It is native guest kernel (without paravirtualization) */
#include <asm/kvm/guest/copy-hw-stacks.h>
#elif defined(CONFIG_VIRTUALIZATION) || !defined(CONFIG_VIRTUALIZATION)
/* native kernel with virtualization support */
/* native kernel without virtualization support */
static __always_inline void
kernel_hw_stack_frames_copy(u64 *dst, const u64 *src, unsigned long size)
{
native_kernel_hw_stack_frames_copy(dst, src, size);
}
static __always_inline void
collapse_kernel_pcs(u64 *dst, const u64 *src, u64 spilled_size)
{
native_collapse_kernel_pcs(dst, src, spilled_size);
}
static __always_inline void
collapse_kernel_ps(u64 *dst, const u64 *src, u64 spilled_size)
{
native_collapse_kernel_ps(dst, src, spilled_size);
}
#else /* ??? */
#error "Undefined virtualization mode"
#endif /* CONFIG_PARAVIRT_GUEST */
static __always_inline u64 get_wsz(enum restore_caller from)
{
return NATIVE_READ_WD_REG().size >> 4;
}
static __always_inline u64 get_ps_clear_size(u64 cur_window_q,
e2k_pshtp_t pshtp)
{
s64 u_pshtp_size_q;
u_pshtp_size_q = GET_PSHTP_Q_INDEX(pshtp);
if (u_pshtp_size_q > E2K_MAXSR - cur_window_q)
u_pshtp_size_q = E2K_MAXSR - cur_window_q;
return E2K_MAXSR - (cur_window_q + u_pshtp_size_q);
}
static __always_inline s64 get_ps_copy_size(u64 cur_window_q, s64 u_pshtp_size)
{
return u_pshtp_size - (E2K_MAXSR - cur_window_q) * EXT_4_NR_SZ;
}
#ifdef CONFIG_CPU_HAS_FILL_INSTRUCTION
# define E2K_CF_MAX_FILL (E2K_CF_MAX_FILL_FILLC_q * 0x10)
#else
extern int cf_max_fill_return;
# define E2K_CF_MAX_FILL cf_max_fill_return
#endif
static __always_inline s64 get_pcs_copy_size(s64 u_pcshtp_size)
{
/* Before v6 it was possible to fill no more than 16 registers.
* Since E2K_MAXCR_q is much bigger than 16 we can be sure that
* there is enough space in CF for the FILL, so there is no
* need to take into account space taken by current window. */
return u_pcshtp_size - E2K_CF_MAX_FILL;
}
/*
* Copy hardware stack from user to *current* kernel stack.
* One has to be careful to avoid hardware FILL of this stack.
*/
static inline int __copy_user_to_current_hw_stack(void *dst, void __user *src,
unsigned long size, const pt_regs_t *regs, bool chain)
{
unsigned long min_flt, maj_flt, ts_flag;
if (likely(!host_test_intc_emul_mode(regs))) {
if (!__range_ok((unsigned long __force) src, size,
PAGE_OFFSET))
return -EFAULT;
}
ts_flag = set_ts_flag(TS_KERNEL_SYSCALL);
/*
* Every page fault here has a chance of FILL'ing the frame
* that is being copied, in which case we repeat the copy.
*/
do {
min_flt = READ_ONCE(current->min_flt);
maj_flt = READ_ONCE(current->maj_flt);
if (chain)
E2K_FLUSHC;
else
E2K_FLUSHR;
SET_USR_PFAULT("$.recovery_memcpy_fault");
fast_tagged_memory_copy_from_user(dst, src, size, regs,
TAGGED_MEM_STORE_REC_OPC |
MAS_BYPASS_L1_CACHE << LDST_REC_OPC_MAS_SHIFT,
TAGGED_MEM_LOAD_REC_OPC |
MAS_BYPASS_L1_CACHE << LDST_REC_OPC_MAS_SHIFT,
true);
if (RESTORE_USR_PFAULT) {
clear_ts_flag(ts_flag);
return -EFAULT;
}
} while (unlikely(min_flt != READ_ONCE(current->min_flt) ||
maj_flt != READ_ONCE(current->maj_flt)));
clear_ts_flag(ts_flag);
return 0;
}
static inline int copy_user_to_current_hw_stack(void *dst, void __user *src,
unsigned long size, pt_regs_t *regs, bool chain)
{
unsigned long flags;
int ret;
raw_all_irq_save(flags);
ret = __copy_user_to_current_hw_stack(dst, src, size, regs, chain);
raw_all_irq_restore(flags);
return ret;
}
static inline int copy_e2k_stack_from_user(void *dst, void __user *src,
unsigned long size, pt_regs_t *regs)
{
unsigned long ts_flag;
int ret;
if (likely(!host_test_intc_emul_mode(regs))) {
if (!__range_ok((unsigned long __force) src, size, PAGE_OFFSET))
return -EFAULT;
}
ts_flag = set_ts_flag(TS_KERNEL_SYSCALL);
ret = host_copy_from_user_with_tags(dst, src, size, regs);
clear_ts_flag(ts_flag);
return (ret) ? -EFAULT : 0;
}
static inline int copy_e2k_stack_to_user(void __user *dst, void *src,
unsigned long size, pt_regs_t *regs)
{
unsigned long ts_flag;
int ret;
if (likely(!host_test_intc_emul_mode(regs))) {
if (!__range_ok((unsigned long __force) dst, size, PAGE_OFFSET))
return -EFAULT;
}
ts_flag = set_ts_flag(TS_KERNEL_SYSCALL);
ret = host_copy_to_user_with_tags(dst, src, size, regs);
clear_ts_flag(ts_flag);
return (ret) ? -EFAULT : 0;
}
static __always_inline int
user_hw_stack_frames_copy(void __user *dst, void *src, unsigned long copy_size,
const pt_regs_t *regs, unsigned long hw_stack_ind, bool is_pcsp)
{
unsigned long ts_flag;
if (unlikely(hw_stack_ind < copy_size)) {
unsigned long flags;
raw_all_irq_save(flags);
if (is_pcsp) {
E2K_FLUSHC;
} else {
E2K_FLUSHR;
}
raw_all_irq_restore(flags);
}
SET_USR_PFAULT("$.recovery_memcpy_fault");
ts_flag = set_ts_flag(TS_KERNEL_SYSCALL);
fast_tagged_memory_copy_to_user(dst, src, copy_size, regs,
TAGGED_MEM_STORE_REC_OPC |
MAS_BYPASS_L1_CACHE << LDST_REC_OPC_MAS_SHIFT,
TAGGED_MEM_LOAD_REC_OPC |
MAS_BYPASS_L1_CACHE << LDST_REC_OPC_MAS_SHIFT, true);
clear_ts_flag(ts_flag);
if (RESTORE_USR_PFAULT) {
pr_err("process %s (%d) %s stack could not be copied "
"from %px to %px size 0x%lx (out of memory?)\n",
current->comm, current->pid,
(is_pcsp) ? "chain" : "procedure",
src, dst, copy_size);
return -EFAULT;
}
DebugUST("copying guest %s stack spilled to host from %px "
"to guest kernel stack from %px, size 0x%lx\n",
(is_pcsp) ? "chain" : "procedure", src, dst, copy_size);
return 0;
}
static __always_inline int
user_crs_frames_copy(e2k_mem_crs_t __user *u_frame, pt_regs_t *regs,
e2k_mem_crs_t *crs)
{
unsigned long ts_flag;
int ret;
ts_flag = set_ts_flag(TS_KERNEL_SYSCALL);
ret = host_copy_to_user(u_frame, crs, sizeof(*crs), regs);
clear_ts_flag(ts_flag);
if (unlikely(ret))
return -EFAULT;
return 0;
}
static __always_inline int user_psp_stack_copy(e2k_psp_lo_t u_psp_lo,
e2k_psp_hi_t u_psp_hi, s64 u_pshtp_size,
e2k_psp_lo_t k_psp_lo, e2k_psp_hi_t k_psp_hi,
unsigned long copy_size, const pt_regs_t *regs)
{
void __user *dst;
void *src;
int ret;
dst = (void __user *) (AS(u_psp_lo).base + AS(u_psp_hi).ind -
u_pshtp_size);
src = (void *) AS(k_psp_lo).base;
if (host_test_intc_emul_mode(regs) && trace_host_copy_hw_stack_enabled())
trace_host_copy_hw_stack(dst, src, copy_size, false);
ret = user_hw_stack_frames_copy(dst, src, copy_size,
regs, k_psp_hi.PSP_hi_ind, false);
if (host_test_intc_emul_mode(regs) && trace_host_proc_stack_frame_enabled())
trace_proc_stack_frames((kernel_mem_ps_t *)dst,
(kernel_mem_ps_t *)src, copy_size,
trace_host_proc_stack_frame);
return ret;
}
static __always_inline int user_pcsp_stack_copy(e2k_pcsp_lo_t u_pcsp_lo,
e2k_pcsp_hi_t u_pcsp_hi, s64 u_pcshtp_size,
e2k_pcsp_lo_t k_pcsp_lo, e2k_pcsp_hi_t k_pcsp_hi,
unsigned long copy_size, const pt_regs_t *regs)
{
void __user *dst;
void *src;
int ret;
dst = (void __user *)(AS(u_pcsp_lo).base + AS(u_pcsp_hi).ind -
u_pcshtp_size);
src = (void *) AS(k_pcsp_lo).base;
if (host_test_intc_emul_mode(regs) && trace_host_copy_hw_stack_enabled())
trace_host_copy_hw_stack(dst, src, copy_size, true);
ret = user_hw_stack_frames_copy(dst, src, copy_size,
regs, k_pcsp_hi.PCSP_hi_ind, true);
if (host_test_intc_emul_mode(regs) && trace_host_chain_stack_frame_enabled())
trace_chain_stack_frames((e2k_mem_crs_t *)dst,
(e2k_mem_crs_t *)src, copy_size,
trace_host_chain_stack_frame);
return ret;
}
/**
* user_hw_stacks_copy - copy user hardware stacks that have been
* SPILLed to kernel back to user space
* @stacks - saved user stack registers
* @cur_window_q - size of current window in procedure stack,
* needed only if @copy_full is not set
* @copy_full - set if want to copy _all_ of SPILLed stacks
*
* This does not update stacks->pshtp and stacks->pcshtp. Main reason is
* signals: if a signal arrives after copying then it must see a coherent
* state where saved stacks->pshtp and stacks->pcshtp values show how much
* data from user space is spilled to kernel space.
*/
static __always_inline int
native_user_hw_stacks_copy(struct e2k_stacks *stacks,
pt_regs_t *regs, u64 cur_window_q, bool copy_full)
{
trap_pt_regs_t *trap = regs->trap;
e2k_psp_lo_t u_psp_lo = stacks->psp_lo,
k_psp_lo = current_thread_info()->k_psp_lo;
e2k_psp_hi_t u_psp_hi = stacks->psp_hi;
e2k_pcsp_lo_t u_pcsp_lo = stacks->pcsp_lo,
k_pcsp_lo = current_thread_info()->k_pcsp_lo;
e2k_pcsp_hi_t u_pcsp_hi = stacks->pcsp_hi;
s64 u_pshtp_size, u_pcshtp_size, ps_copy_size, pcs_copy_size;
int ret;
u_pshtp_size = GET_PSHTP_MEM_INDEX(stacks->pshtp);
u_pcshtp_size = PCSHTP_SIGN_EXTEND(stacks->pcshtp);
/*
* Copy user's part from kernel stacks into user stacks
* Update user's stack registers
*/
if (copy_full) {
pcs_copy_size = u_pcshtp_size;
ps_copy_size = u_pshtp_size;
} else {
pcs_copy_size = get_pcs_copy_size(u_pcshtp_size);
ps_copy_size = get_ps_copy_size(cur_window_q, u_pshtp_size);
/* Make sure there is enough space in CF for the FILL */
BUG_ON((E2K_MAXCR_q - 4) * 16 < E2K_CF_MAX_FILL);
}
if (likely(pcs_copy_size <= 0 && ps_copy_size <= 0))
return 0;
if (unlikely(pcs_copy_size > 0)) {
e2k_pcsp_hi_t k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
/* Since not all user data has been SPILL'ed it is possible
* that we have already overflown user's hardware stack. */
if (unlikely(AS(u_pcsp_hi).ind > AS(u_pcsp_hi).size)) {
ret = handle_chain_stack_bounds(stacks, trap);
if (unlikely(ret)) {
pr_warning("process %s (%d) chain stack overflow (out of memory?)\n",
current->comm, current->pid);
return ret;
}
u_pcsp_lo = stacks->pcsp_lo;
u_pcsp_hi = stacks->pcsp_hi;
}
ret = user_pcsp_stack_copy(u_pcsp_lo, u_pcsp_hi, u_pcshtp_size,
k_pcsp_lo, k_pcsp_hi, pcs_copy_size, regs);
if (ret)
return ret;
}
if (unlikely(ps_copy_size > 0)) {
e2k_psp_hi_t k_psp_hi = NATIVE_NV_READ_PSP_HI_REG();
/* Since not all user data has been SPILL'ed it is possible
* that we have already overflowed user's hardware stack. */
if (unlikely(AS(u_psp_hi).ind > AS(u_psp_hi).size)) {
ret = handle_proc_stack_bounds(stacks, trap);
if (unlikely(ret)) {
pr_warning("process %s (%d) procedure stack overflow (out of memory?)\n",
current->comm, current->pid);
return ret;
}
u_psp_lo = stacks->psp_lo;
u_psp_hi = stacks->psp_hi;
}
ret = user_psp_stack_copy(u_psp_lo, u_psp_hi, u_pshtp_size,
k_psp_lo, k_psp_hi, ps_copy_size, regs);
if (ret)
return ret;
}
return 0;
}
static inline void collapse_kernel_hw_stacks(struct e2k_stacks *stacks)
{
e2k_pcsp_lo_t k_pcsp_lo = current_thread_info()->k_pcsp_lo;
e2k_psp_lo_t k_psp_lo = current_thread_info()->k_psp_lo;
unsigned long flags, spilled_pc_size, spilled_p_size;
e2k_pshtp_t pshtp = stacks->pshtp;
u64 *dst;
const u64 *src;
spilled_pc_size = PCSHTP_SIGN_EXTEND(stacks->pcshtp);
spilled_p_size = GET_PSHTP_MEM_INDEX(pshtp);
DebugUST("guest user spilled to host kernel stack part: chain 0x%lx "
"procedure 0x%lx\n",
spilled_pc_size, spilled_p_size);
/* When user tries to return from the last user frame
* we will have pcshtp = pcsp_hi.ind = 0. But situation
* with pcsp_hi.ind != 0 and pcshtp = 0 is impossible. */
if (WARN_ON_ONCE(spilled_pc_size < SZ_OF_CR &&
AS(stacks->pcsp_hi).ind != 0))
do_exit(SIGKILL);
/* Keep the last user frame (see user_hw_stacks_copy_full()) */
if (spilled_pc_size >= SZ_OF_CR) {
spilled_pc_size -= SZ_OF_CR;
DebugUST("Keep the prev user chain frame, so spilled chain "
"size is now 0x%lx\n",
spilled_pc_size);
}
raw_all_irq_save(flags);
if (spilled_pc_size) {
dst = (u64 *) AS(k_pcsp_lo).base;
src = (u64 *) (AS(k_pcsp_lo).base + spilled_pc_size);
collapse_kernel_pcs(dst, src, spilled_pc_size);
stacks->pcshtp = SZ_OF_CR;
apply_graph_tracer_delta(-spilled_pc_size);
}
if (spilled_p_size) {
dst = (u64 *) AS(k_psp_lo).base;
src = (u64 *) (AS(k_psp_lo).base + spilled_p_size);
collapse_kernel_ps(dst, src, spilled_p_size);
AS(pshtp).ind = 0;
stacks->pshtp = pshtp;
}
raw_all_irq_restore(flags);
}
/**
* user_hw_stacks_prepare - prepare user hardware stacks that have been
* SPILLed to kernel back to user space
* @stacks - saved user stack registers
* @cur_window_q - size of current window in procedure stack,
* needed only if @copy_full is not set
* @syscall - true if called upon direct system call exit (no signal handlers)
*
* This does two things:
*
* 1) It is possible that upon kernel entry pcshtp == 0 in some cases:
* - user signal handler had pcshtp==0x20 before return to sigreturn()
* - user context had pcshtp==0x20 before return to makecontext_trampoline()
* - chain stack underflow happened
* So it is possible in sigreturn() and traps, but not in system calls.
* If we are using the trick with return to FILL user hardware stacks than
* we must have frame in chain stack to return to. So in this case kernel's
* chain stack is moved up by one frame (0x20 bytes).
* We also fill the new frame with actual user data and update stacks->pcshtp,
* this is needed to keep the coherent state where saved stacks->pcshtp values
* shows how much data from user space has been spilled to kernel space.
*
* 2) It is not possible to always FILL all of user data that have been
* SPILLed to kernel stacks. So we manually copy the leftovers that can
* not be FILLed to user space.
* This copy does not update stacks->pshtp and stacks->pcshtp. Main reason
* is signals: if a signal arrives after copying then it must see a coherent
* state where saved stacks->pshtp and stacks->pcshtp values show how much
* data from user space has been spilled to kernel space.
*/
static __always_inline void native_user_hw_stacks_prepare(
struct e2k_stacks *stacks, pt_regs_t *regs,
u64 cur_window_q, enum restore_caller from, int syscall)
{
e2k_pcshtp_t u_pcshtp = stacks->pcshtp;
int ret;
BUG_ON(from & FROM_PV_VCPU_MODE);
/*
* 1) Make sure there is free space in kernel chain stack to return to
*/
if (!syscall && u_pcshtp == 0) {
unsigned long flags;
e2k_pcsp_lo_t u_pcsp_lo = stacks->pcsp_lo,
k_pcsp_lo = current_thread_info()->k_pcsp_lo;
e2k_pcsp_hi_t u_pcsp_hi = stacks->pcsp_hi, k_pcsp_hi;
e2k_mem_crs_t __user *u_cframe;
e2k_mem_crs_t *k_crs;
u64 u_cbase;
int ret = -EINVAL;
raw_all_irq_save(flags);
E2K_FLUSHC;
k_pcsp_hi = READ_PCSP_HI_REG();
BUG_ON(AS(k_pcsp_hi).ind);
AS(k_pcsp_hi).ind += SZ_OF_CR;
WRITE_PCSP_HI_REG(k_pcsp_hi);
k_crs = (e2k_mem_crs_t *) AS(k_pcsp_lo).base;
u_cframe = (e2k_mem_crs_t __user *) (AS(u_pcsp_lo).base +
AS(u_pcsp_hi).ind);
u_cbase = ((from & FROM_RETURN_PV_VCPU_TRAP) ||
host_test_intc_emul_mode(regs)) ?
u_pcsp_lo.PCSP_lo_base :
(u64) CURRENT_PCS_BASE();
if ((u64) u_cframe > u_cbase) {
ret = __copy_user_to_current_hw_stack(k_crs,
u_cframe - 1, sizeof(*k_crs), regs, true);
}
raw_all_irq_restore(flags);
/* Can happen if application returns until runs out of
* chain stack or there is no free memory for stacks.
* There is no user stack to return to - die. */
if (ret) {
SIGDEBUG_PRINT("SIGKILL. %s\n",
(ret == -EINVAL) ? "tried to return to kernel" :
"ran into Out-of-Memory on user stacks");
force_sig(SIGKILL);
return;
}
if (AS(u_pcsp_hi).ind < SZ_OF_CR) {
update_pcsp_regs(AS(u_pcsp_lo).base,
&u_pcsp_lo, &u_pcsp_hi);
stacks->pcsp_lo = u_pcsp_lo;
stacks->pcsp_hi = u_pcsp_hi;
BUG_ON(AS(u_pcsp_hi).ind < SZ_OF_CR);
}
u_pcshtp = SZ_OF_CR;
stacks->pcshtp = u_pcshtp;
}
/*
* 2) Copy user data that cannot be FILLed
*/
ret = native_user_hw_stacks_copy(stacks, regs, cur_window_q, false);
if (unlikely(ret))
do_exit(SIGKILL);
}
#ifndef CONFIG_VIRTUALIZATION
/* native kernel without virtualization support */
static __always_inline int
user_hw_stacks_copy(struct e2k_stacks *stacks,
pt_regs_t *regs, u64 cur_window_q, bool copy_full)
{
return native_user_hw_stacks_copy(stacks, regs, cur_window_q, copy_full);
}
static __always_inline void
host_user_hw_stacks_prepare(struct e2k_stacks *stacks, pt_regs_t *regs,
u64 cur_window_q, enum restore_caller from, int syscall)
{
native_user_hw_stacks_prepare(stacks, regs, cur_window_q,
from, syscall);
}
#elif defined(CONFIG_KVM_GUEST_KERNEL)
/* It is native guest kernel (without paravirtualization) */
#include <asm/kvm/guest/copy-hw-stacks.h>
#elif defined(CONFIG_PARAVIRT_GUEST)
/* It is paravirtualized kernel (host and guest) */
#include <asm/paravirt/copy-hw-stacks.h>
#elif defined(CONFIG_KVM_HOST_MODE)
/* It is host kernel with virtualization support */
#include <asm/kvm/copy-hw-stacks.h>
#else /* unknow mode */
#error "unknown virtualization mode"
#endif /* !CONFIG_VIRTUALIZATION */
/**
* user_hw_stacks_copy_full - copy part of user stacks that was SPILLed
* into kernel back to user stacks.
* @stacks - saved user stack registers
* @regs - pt_regs pointer
* @crs - last frame to copy
*
* If @crs is not NULL then the frame pointed to by it will also be copied
* to userspace. Note that 'stacks->pcsp_hi.ind' is _not_ updated after
* copying since it would leave stack in inconsistent state (with two
* copies of the same @crs frame), this is left to the caller. *
*
* Inlining this reduces the amount of memory to copy in
* collapse_kernel_hw_stacks().
*/
static inline int do_user_hw_stacks_copy_full(struct e2k_stacks *stacks,
pt_regs_t *regs, e2k_mem_crs_t *crs)
{
int ret;
/*
* Copy part of user stacks that were SPILLed into kernel stacks
*/
ret = user_hw_stacks_copy(stacks, regs, 0, true);
if (unlikely(ret))
return ret;
/*
* Nothing to FILL so remove the resulting hole from kernel stacks.
*
* IMPORTANT: there is always at least one user frame at the top of
* kernel stack - the one that issued a system call (in case of an
* exception we uphold this rule manually, see user_hw_stacks_prepare())
* We keep this ABI and _always_ leave space for one user frame,
* this way we can later FILL using return trick (otherwise there
* would be no space in chain stack for the trick).
*/
collapse_kernel_hw_stacks(stacks);
/*
* Copy saved %cr registers
*
* Caller must take care of filling of resulting hole
* (last user frame from pcshtp == SZ_OF_CR).
*/
if (crs) {
e2k_mem_crs_t __user *u_frame;
int ret;
u_frame = (void __user *) (AS(stacks->pcsp_lo).base +
AS(stacks->pcsp_hi).ind);
ret = user_crs_frames_copy(u_frame, regs, &regs->crs);
if (unlikely(ret))
return ret;
}
return 0;
}
#endif /* _E2K_COPY_HW_STACKS_H */

View File

@ -2241,6 +2241,7 @@ typedef union e2k_fpsr {
#define FPSR_reg word
typedef union {
u32 half_word[2];
struct {
u32 user : 1;
u32 system : 1;

View File

@ -1038,19 +1038,26 @@ _Pragma("no_asm_inline") \
: clobbers); \
})
#define NATIVE_EXIT_HANDLE_SYSCALL(sbr, usd_hi, usd_lo, upsr) \
({ \
asm volatile ("{rwd %0, %%sbr}" \
"{rwd %1, %%usd.hi}" \
asm volatile (ALTERNATIVE_1_ALTINSTR \
/* CPU_HWBUG_USD_ALIGNMENT version */ \
"{rwd %0, %%sbr;" \
" nop}" \
ALTERNATIVE_2_OLDINSTR \
/* Default version */ \
"{rwd %0, %%sbr}" \
ALTERNATIVE_3_FEATURE(%[facility]) \
"{rwd %2, %%usd.lo}" \
"{rwd %1, %%usd.hi}" \
"{rws %3, %%upsr;" \
" nop 4}\n" \
: \
: "ri" ((__e2k_u64_t) (sbr)), \
"ri" ((__e2k_u64_t) (usd_hi)), \
"ri" ((__e2k_u64_t) (usd_lo)), \
"ri" ((__e2k_u32_t) (upsr))); \
"ri" ((__e2k_u32_t) (upsr)), \
[facility] "i" (CPU_HWBUG_USD_ALIGNMENT)); \
})
@ -1093,6 +1100,15 @@ _Pragma("no_asm_inline") \
: "r" ((__e2k_u64_t) (val))); \
})
#define NATIVE_SET_MMUREG_CLOSED(reg_mnemonic, val, nop) \
({ \
asm volatile ("{nop " #nop "\n" \
" mmurw %0, %%" #reg_mnemonic "}" \
: \
: "r" ((u64) (val))); \
})
#define NATIVE_TAGGED_LOAD_TO_MMUREG(reg_mnemonic, _addr) \
do { \
unsigned long long _tmp; \

View File

@ -23,8 +23,6 @@
#include <asm/e2k.h>
#include <asm/pgtable_def.h>
#define CHK_DEBUGGER(trapnr, signr, error_code, address, regs, after)
#define IS_KERNEL_THREAD(task, mm) \
({ \
e2k_addr_t ps_base; \
@ -248,6 +246,8 @@ host_ftrace_dump(void)
{
return;
}
static const bool kvm_debug = false;
#else /* CONFIG_VIRTUALIZATION */
/* it is native host kernel with virtualization support */
/* or it is paravirtualized host/guest kernel */

View File

@ -11,6 +11,8 @@
#ifndef __ASSEMBLY__
void do_sic_error_interrupt(void);
static inline bool cpu_has_epic(void)
{
if (cpu_has(CPU_FEAT_EPIC))

View File

@ -35,18 +35,12 @@ extern int hw_breakpoint_exceptions_notify(
extern void hw_breakpoint_pmu_read(struct perf_event *bp);
#ifdef CONFIG_HAVE_HW_BREAKPOINT
extern int bp_data_overflow_handle(struct pt_regs *regs);
extern int bp_instr_overflow_handle(struct pt_regs *regs);
extern void bp_data_overflow_handle(struct pt_regs *regs);
extern void bp_instr_overflow_handle(struct pt_regs *regs);
extern void clear_ptrace_hw_breakpoint(struct task_struct *tsk);
#else /* ! CONFIG_HAVE_HW_BREAKPOINT */
static inline int bp_data_overflow_handle(struct pt_regs *regs)
{
return 0;
}
static inline int bp_instr_overflow_handle(struct pt_regs *regs)
{
return 0;
}
static inline void bp_data_overflow_handle(struct pt_regs *regs) { }
static inline void bp_instr_overflow_handle(struct pt_regs *regs) { }
static inline void clear_ptrace_hw_breakpoint(struct task_struct *tsk) {}
#endif /* CONFIG_HAVE_HW_BREAKPOINT */

View File

@ -61,18 +61,14 @@ static inline int is_kprobe_break1_trap(struct pt_regs *regs)
return *instr == KPROBE_BREAK_1;
}
extern int kprobe_instr_debug_handle(struct pt_regs *);
extern void kprobe_instr_debug_handle(struct pt_regs *);
#else
static inline int is_kprobe_break1_trap(struct pt_regs *regs)
{
return false;
}
static inline int kprobe_instr_debug_handle(struct pt_regs *regs)
{
return 0;
}
static inline void kprobe_instr_debug_handle(struct pt_regs *regs) { }
#endif /* #ifdef CONFIG_KPROBES */
#ifdef CONFIG_KRETPROBES

View File

@ -0,0 +1,462 @@
/*
* KVM guest kernel processes support
* Copyright 2011 Salavat S. Guiliazov (atic@mcst.ru)
*/
#ifndef _E2K_KVM_COPY_HW_STACKS_H
#define _E2K_KVM_COPY_HW_STACKS_H
#include <linux/kvm_host.h>
#include <asm/cpu_regs.h>
#include <asm/regs_state.h>
#include <asm/kvm/thread_info.h>
#include <asm/kvm/mmu.h>
#include <asm/kvm/page.h>
#include <asm/kvm/switch.h>
#undef DEBUG_KVM_GUEST_STACKS_MODE
#undef DebugGUST
#define DEBUG_KVM_GUEST_STACKS_MODE 0 /* guest user stacks */
/* copy debug */
#define DebugGUST(fmt, args...) \
({ \
if (DEBUG_KVM_GUEST_STACKS_MODE) \
pr_info("%s(): " fmt, __func__, ##args); \
})
#ifdef CONFIG_KVM_HOST_MODE
static inline void
prepare_pv_vcpu_inject_stacks(struct kvm_vcpu *vcpu, pt_regs_t *regs)
{
e2k_stacks_t *stacks, *g_stacks;
gthread_info_t *gti = pv_vcpu_get_gti(vcpu);
if (regs->g_stacks_valid) {
/* already prepared */
return;
}
/* all stacks at empty state, because of guest user recursion */
/* of trap/system calls can not be */
g_stacks = &regs->g_stacks;
g_stacks->usd_lo = gti->g_usd_lo;
g_stacks->usd_hi = gti->g_usd_hi;
g_stacks->top = gti->g_sbr.SBR_base;
g_stacks->psp_lo = gti->g_psp_lo;
g_stacks->psp_hi = gti->g_psp_hi;
g_stacks->pcsp_lo = gti->g_pcsp_lo;
g_stacks->pcsp_hi = gti->g_pcsp_hi;
/* pshtp & pcshtp from guest user stack real state upon trap/syscall */
stacks = &regs->stacks;
g_stacks->pshtp = stacks->pshtp;
g_stacks->pcshtp = stacks->pcshtp;
regs->g_stacks_valid = true;
regs->g_stacks_active = false;
regs->need_inject = false;
}
#undef EMULATE_EMPTY_CHAIN_STACK /* only to debug */
#ifdef EMULATE_EMPTY_CHAIN_STACK
static __always_inline void
pv_vcpu_emulate_empty_chain_staks(struct kvm_vcpu *vcpu, pt_regs_t *regs,
e2k_stacks_t *stacks, bool guest_user)
{
e2k_pcshtp_t pcshtp;
unsigned long flags;
e2k_pcsp_lo_t g_pcsp_lo, k_pcsp_lo;
e2k_pcsp_hi_t g_pcsp_hi, k_pcsp_hi;
e2k_mem_crs_t __user *g_cframe;
e2k_mem_crs_t *k_crs;
int ret;
pcshtp = stacks->pcshtp;
if (!(guest_user && pcshtp <= 0x40))
return;
g_pcsp_lo = regs->stacks.pcsp_lo;
g_pcsp_hi = regs->stacks.pcsp_hi;
raw_all_irq_save(flags);
NATIVE_FLUSHC;
k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
k_pcsp_lo = NATIVE_NV_READ_PCSP_LO_REG();
BUG_ON(AS(k_pcsp_hi).ind != pcshtp);
k_crs = (e2k_mem_crs_t *) AS(k_pcsp_lo).base;
g_cframe = (e2k_mem_crs_t __user *) (AS(g_pcsp_lo).base +
AS(g_pcsp_hi).ind - pcshtp);
ret = user_hw_stack_frames_copy(g_cframe, k_crs, pcshtp, regs,
k_pcsp_hi.PCSP_hi_ind, true);
if (ret) {
pr_err("%s(): copy to user stack failed\n", __func__);
BUG_ON(true);
}
k_pcsp_hi.PCSP_hi_ind -= pcshtp;
pcshtp = 0;
regs->stacks.pcshtp = pcshtp;
stacks->pcshtp = pcshtp;
NATIVE_NV_NOIRQ_WRITE_PCSP_HI_REG(k_pcsp_hi);
raw_all_irq_restore(flags);
}
#else /* !EMULATE_EMPTY_CHAIN_STACK */
static __always_inline void
pv_vcpu_emulate_empty_chain_staks(struct kvm_vcpu *vcpu, pt_regs_t *regs,
e2k_stacks_t *stacks, bool guest_user)
{
}
#endif /* EMULATE_EMPTY_CHAIN_STACK */
/**
* pv_vcpu_user_hw_stacks_copy - check size of user hardware stacks that have
* been SPILLed to kernel back to guest space
* @regs - saved guest user stack registers
* @cur_window_q - size of current window in procedure stack
*
* All guest user's stacks part were already copied to guest kernel stacks,
* so it need only check that it was full size and nothing to copy here
*/
static __always_inline int
pv_vcpu_user_hw_stacks_copy(pt_regs_t *regs, e2k_stacks_t *stacks,
u64 cur_window_q, bool guest_user)
{
e2k_psp_lo_t g_psp_lo = stacks->psp_lo,
k_psp_lo = current_thread_info()->k_psp_lo;
e2k_psp_hi_t g_psp_hi = stacks->psp_hi;
e2k_pcsp_lo_t g_pcsp_lo = stacks->pcsp_lo,
k_pcsp_lo = current_thread_info()->k_pcsp_lo;
e2k_pcsp_hi_t g_pcsp_hi = stacks->pcsp_hi;
s64 g_pshtp_size, g_pcshtp_size, ps_copy_size, pcs_copy_size;
int ret;
DebugUST("guest kernel chain state: base 0x%llx ind 0x%x size 0x%x\n",
g_pcsp_lo.PCSP_lo_base, g_pcsp_hi.PCSP_hi_ind,
g_pcsp_hi.PCSP_hi_size);
DebugUST("guest kernel proc state: base 0x%llx ind 0x%x size 0x%x\n",
g_psp_lo.PSP_lo_base, g_psp_hi.PSP_hi_ind,
g_psp_hi.PSP_hi_size);
g_pshtp_size = GET_PSHTP_MEM_INDEX(stacks->pshtp);
g_pcshtp_size = PCSHTP_SIGN_EXTEND(stacks->pcshtp);
DebugUST("guest kernel chain stack PCSHTP 0x%llx, "
"proc stack PSHTP 0x%llx cur window 0x%llx\n",
g_pcshtp_size, g_pshtp_size, cur_window_q);
/*
* FIXME: the current implementation of the guest user signal handler
* injection uses direct copying to guest hardware stacks.
* It is bad decision, needs to be corrected
KVM_BUG_ON(is_paging(current_thread_info()->vcpu) &&
(g_psp_lo.PSP_lo_base < GUEST_TASK_SIZE ||
g_pcsp_lo.PCSP_lo_base < GUEST_TASK_SIZE));
*/
/*
* Calculate size of user's part to copy from kernel stacks
* into guest kernel stacks
*/
pcs_copy_size = get_pcs_copy_size(g_pcshtp_size);
ps_copy_size = get_ps_copy_size(cur_window_q, g_pshtp_size);
/* Make sure there is enough space in CF for the FILL */
BUG_ON((E2K_MAXCR_q - 4) * 16 < E2K_CF_MAX_FILL);
DebugUST("to copy chain stack 0x%llx, proc stack 0x%llx\n",
pcs_copy_size, ps_copy_size);
if (likely(pcs_copy_size <= 0 && ps_copy_size <= 0))
return 0;
if (unlikely(pcs_copy_size > 0)) {
e2k_pcsp_hi_t k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
void __user *dst;
void *src;
/* Since SPILL'ed guest user data will be copyed to guest */
/* kernel stacks then cannot be any overflow of user's */
/* hardware stack. */
if (unlikely(AS(g_pcsp_hi).ind > AS(g_pcsp_hi).size)) {
pr_err("%s(): guest kernel chain stack overflow "
"(out of memory?): ind 0x%x size 0x%x\n",
__func__, g_pcsp_hi.PCSP_hi_ind,
g_pcsp_hi.PCSP_hi_size);
KVM_BUG_ON(true);
}
dst = (void __user *)(g_pcsp_lo.PCSP_lo_base +
g_pcsp_hi.PCSP_hi_ind);
if (!guest_user) {
/* stack index has been incremented on PCSHTP */
dst -= g_pcshtp_size;
}
src = (void *)k_pcsp_lo.PCSP_lo_base;
if (trace_host_copy_hw_stack_enabled())
trace_host_copy_hw_stack(dst, src, pcs_copy_size, true);
ret = user_hw_stack_frames_copy(dst, src, pcs_copy_size, regs,
k_pcsp_hi.PCSP_hi_ind, true);
if (trace_host_chain_stack_frame_enabled())
trace_chain_stack_frames((e2k_mem_crs_t *)dst,
(e2k_mem_crs_t *)src, pcs_copy_size,
trace_host_chain_stack_frame);
if (ret)
return ret;
if (guest_user) {
g_pcsp_hi.PCSP_hi_ind += pcs_copy_size;
stacks->pcsp_hi = g_pcsp_hi;
DebugGUST("guest user chain stack frames copied from "
"host %px to guest kernel from %px size 0x%llx "
"PCSP.ind 0x%x\n",
src, dst, pcs_copy_size, g_pcsp_hi.PCSP_hi_ind);
}
}
if (unlikely(ps_copy_size > 0)) {
e2k_psp_hi_t k_psp_hi = NATIVE_NV_READ_PSP_HI_REG();
void __user *dst;
void *src;
/* Since SPILL'ed guest user data will be copyed to guest */
/* kernel stacks then cannot be any overflow of user's */
/* hardware stack. */
if (unlikely(AS(g_psp_hi).ind > AS(g_psp_hi).size)) {
pr_err("%s(): guest kernel proc stack overflow "
"(out of memory?): ind 0x%x size 0x%x\n",
__func__, g_psp_hi.PSP_hi_ind,
g_psp_hi.PSP_hi_size);
KVM_BUG_ON(true);
}
dst = (void __user *)(g_psp_lo.PSP_lo_base +
g_psp_hi.PSP_hi_ind);
if (!guest_user) {
/* stack index has been incremented on PSHTP */
dst -= g_pshtp_size;
}
src = (void *)k_psp_lo.PSP_lo_base;
if (trace_host_copy_hw_stack_enabled())
trace_host_copy_hw_stack(dst, src, ps_copy_size, false);
ret = user_hw_stack_frames_copy(dst, src, ps_copy_size, regs,
k_psp_hi.PSP_hi_ind, false);
if (trace_host_proc_stack_frame_enabled())
trace_proc_stack_frames((kernel_mem_ps_t *)dst,
(kernel_mem_ps_t *)src, ps_copy_size,
trace_host_proc_stack_frame);
if (ret)
return ret;
if (guest_user) {
g_psp_hi.PSP_hi_ind += ps_copy_size;
stacks->psp_hi = g_psp_hi;
DebugGUST("guest user proc stack frames copied from "
"host %px to guest kernel from %px size 0x%llx "
"PSP.ind 0x%x\n",
src, dst, ps_copy_size, g_psp_hi.PSP_hi_ind);
}
}
return 0;
}
/**
* pv_vcpu_user_hw_stacks_prepare - prepare guest user hardware stacks
that have been SPILLed to kernel back
to guest user space
* @regs - saved guest user stack registers
* @cur_window_q - size of current window in procedure stack
* @syscall - true if called upon direct system call exit (no signal handlers)
*
* This does two things:
*
* 1) It is possible that upon kernel entry pcshtp == 0 in some cases:
* - user signal handler had pcshtp==0x20 before return to sigreturn()
* - user context had pcshtp==0x20 before return to makecontext_trampoline()
* - chain stack underflow happened
* So it is possible in sigreturn() and traps, but not in system calls.
* If we are using the trick with return to FILL user hardware stacks than
* we must have frame in chain stack to return to. So in this case kernel's
* chain stack is moved up by one frame (0x20 bytes).
* We also fill the new frame with actual user data and update stacks->pcshtp,
* this is needed to keep the coherent state where saved stacks->pcshtp values
* shows how much data from user space has been spilled to kernel space.
*
* 2) It is not possible to always FILL all of user data that have been
* SPILLed to kernel stacks. So we manually copy the leftovers that can
* not be FILLed to user space.
* This copy does not update stacks->pshtp and stacks->pcshtp. Main reason
* is signals: if a signal arrives after copying then it must see a coherent
* state where saved stacks->pshtp and stacks->pcshtp values show how much
* data from user space has been spilled to kernel space.
*/
static __always_inline void
pv_vcpu_user_hw_stacks_prepare(struct kvm_vcpu *vcpu, pt_regs_t *regs,
u64 cur_window_q, enum restore_caller from, int syscall)
{
e2k_stacks_t *stacks;
e2k_pcshtp_t pcshtp;
bool guest_user;
bool paging = is_paging(vcpu);
int ret;
if (likely(paging)) {
guest_user = !!(syscall || !pv_vcpu_trap_on_guest_kernel(regs));
} else {
guest_user = false;
}
if (guest_user) {
if (from & FROM_PV_VCPU_MODE) {
/* all preparation has been made */
/* by host & guest handler */
return;
}
/* trap on/syscall from guest user, so regs keeps user */
/* registers state and it need use guest kernel stacks */
/* in empty state to handle this trap/syscall */
if (!regs->g_stacks_valid) {
prepare_pv_vcpu_inject_stacks(vcpu, regs);
}
stacks = &regs->g_stacks;
} else {
/* trap on guest kernel, so regs already points to guest */
/* kernel stacks and trap will be handled by host */
/* same as other user's processes traps */
stacks = &regs->stacks;
}
/* only to debug on simulator : pcshtp == 0 */
pv_vcpu_emulate_empty_chain_staks(vcpu, regs, stacks, guest_user);
pcshtp = stacks->pcshtp;
DebugUST("guest kernel chain stack state: base 0x%llx ind 0x%x "
"size 0x%x\n",
stacks->pcsp_lo.PCSP_lo_base,
stacks->pcsp_hi.PCSP_hi_ind,
stacks->pcsp_hi.PCSP_hi_size);
DebugUST("host kernel chain stack state: base 0x%llx ind 0x%x "
"size 0x%x\n",
NATIVE_NV_READ_PCSP_LO_REG().PCSP_lo_base,
NATIVE_NV_READ_PCSP_HI_REG().PCSP_hi_ind,
NATIVE_NV_READ_PCSP_HI_REG().PCSP_hi_size);
DebugUST("guest kernel chain stack size to fill PCSHTP 0x%x\n",
pcshtp);
/*
* 1) Make sure there is free space in kernel chain stack to return to
*/
if (!syscall && pcshtp == 0 && !guest_user) {
unsigned long flags;
e2k_pcsp_lo_t g_pcsp_lo = stacks->pcsp_lo,
k_pcsp_lo = current_thread_info()->k_pcsp_lo;
e2k_pcsp_hi_t g_pcsp_hi = stacks->pcsp_hi, k_pcsp_hi;
e2k_mem_crs_t __user *g_cframe;
e2k_mem_crs_t *k_crs;
int ret = -EINVAL;
raw_all_irq_save(flags);
NATIVE_FLUSHC;
k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
BUG_ON(AS(k_pcsp_hi).ind);
AS(k_pcsp_hi).ind += SZ_OF_CR;
NATIVE_NV_NOIRQ_WRITE_PCSP_HI_REG(k_pcsp_hi);
k_crs = (e2k_mem_crs_t *) AS(k_pcsp_lo).base;
g_cframe = (e2k_mem_crs_t __user *) (AS(g_pcsp_lo).base +
AS(g_pcsp_hi).ind);
if ((u64) g_cframe > (u64) AS(g_pcsp_lo).base) {
ret = __copy_user_to_current_hw_stack(k_crs,
g_cframe - 1, sizeof(*k_crs), regs, true);
}
raw_all_irq_restore(flags);
/* Can happen if application returns until runs out of
* chain stack or there is no free memory for stacks.
* There is no user stack to return to - die. */
if (ret) {
E2K_LMS_HALT_OK;
pr_err("%s(): SIGKILL. %s\n",
__func__,
(ret == -EINVAL) ?
"tried to return to kernel"
:
"ran into Out-of-Memory on user stacks");
force_sig(SIGKILL);
return;
}
DebugUST("copy guest user chain frame from %px to kernel "
"bottom from %px\n",
g_cframe - 1, k_crs);
if (AS(g_pcsp_hi).ind < SZ_OF_CR) {
pr_err("%s(): guest kernel chain stack underflow\n",
__func__);
KVM_BUG_ON(true);
}
pcshtp = SZ_OF_CR;
stacks->pcshtp = pcshtp;
DebugUST("guest kernel chain stack to FILL PCSHTP "
"set to 0x%x\n",
stacks->pcshtp);
} else if (!syscall && pcshtp == 0 && guest_user) {
e2k_pcsp_hi_t k_pcsp_hi;
unsigned long flags;
/* set flag for unconditional injection to do not copy */
/* from guest user space */
regs->need_inject = true;
/* reserve one bottom frames for trampoline */
/* the guest handler replaces guest user trapped frame */
raw_all_irq_save(flags);
NATIVE_FLUSHC;
k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
BUG_ON(k_pcsp_hi.PCSP_hi_ind);
k_pcsp_hi.PCSP_hi_ind += 1 * SZ_OF_CR;
NATIVE_NV_NOIRQ_WRITE_PCSP_HI_REG(k_pcsp_hi);
raw_all_irq_restore(flags);
}
/*
* 2) Copy user data that cannot be FILLed
*/
ret = pv_vcpu_user_hw_stacks_copy(regs, stacks, cur_window_q,
guest_user);
if (unlikely(ret))
do_exit(SIGKILL);
}
/* Same as for native kernel without virtualization support */
static __always_inline int
user_hw_stacks_copy(struct e2k_stacks *stacks,
pt_regs_t *regs, u64 cur_window_q, bool copy_full)
{
return native_user_hw_stacks_copy(stacks, regs, cur_window_q, copy_full);
}
static __always_inline void
host_user_hw_stacks_prepare(struct e2k_stacks *stacks, pt_regs_t *regs,
u64 cur_window_q, enum restore_caller from, int syscall)
{
struct kvm_vcpu *vcpu;
if (likely(!kvm_test_intc_emul_flag(regs))) {
/* trap on/syscall from host user processes */
return native_user_hw_stacks_prepare(stacks, regs,
cur_window_q, from, syscall);
}
vcpu = current_thread_info()->vcpu;
KVM_BUG_ON(vcpu == NULL);
pv_vcpu_user_hw_stacks_prepare(vcpu, regs, cur_window_q, from, syscall);
}
#endif /* CONFIG_KVM_HOST_MODE */
#ifdef CONFIG_KVM_GUEST_KERNEL
/* it is native guest kernel */
#include <asm/kvm/guest/copy-hw-stacks.h>
#else /* CONFIG_VIRTUALIZATION && ! CONFIG_KVM_GUEST_KERNEL */
/* it is native host kernel with virtualization support */
/* or it is paravirtualized host and guest kernel */
#endif /* ! CONFIG_KVM_GUEST_KERNEL */
#endif /* ! _E2K_KVM_COPY_HW_STACKS_H */

View File

@ -11,6 +11,8 @@
#include <linux/types.h>
#include <asm/kvm/vcpu-regs-debug-inline.h>
extern bool kvm_debug;
/*
* Some definitions to print/dump/show stacks
*/

View File

@ -86,6 +86,7 @@ kvm_mmu_set_init_gmm_root(struct kvm_vcpu *vcpu, hpa_t root)
}
gmm->u_pptb = vcpu->arch.mmu.get_vcpu_u_pptb(vcpu);
gmm->os_pptb = vcpu->arch.mmu.get_vcpu_os_pptb(vcpu);
gmm->u_vptb = vcpu->arch.mmu.get_vcpu_u_vptb(vcpu);
}
static inline pgd_t *
kvm_mmu_get_gmm_root(struct gmm_struct *gmm)
@ -208,15 +209,12 @@ switch_guest_pgd(pgd_t *next_pgd)
pgd_to_set = next_pgd;
}
KVM_BUG_ON(PCSHTP_SIGN_EXTEND(NATIVE_READ_PCSHTP_REG_SVALUE()) != 0);
reload_root_pgd(pgd_to_set);
/* FIXME: support of guest secondary space is not yet implemented
reload_secondary_page_dir(mm);
*/
/* any function call can fill old state of hardware stacks */
/* so after all calls do flush stacks again */
NATIVE_FLUSHCPU;
E2K_WAIT(_all_e);
}
#define DO_NOT_USE_ACTIVE_GMM /* turn OFF optimization */

View File

@ -0,0 +1,568 @@
/*
* KVM guest kernel processes support
* Copyright 2011 Salavat S. Guiliazov (atic@mcst.ru)
*/
#ifndef _E2K_KVM_GUEST_COPY_HW_STACKS_H
#define _E2K_KVM_GUEST_COPY_HW_STACKS_H
#include <asm/kvm/hypercall.h>
#include <asm/cpu_regs_types.h>
#include <asm/stacks.h>
#include <asm/kvm/guest/trace-hw-stacks.h>
extern bool debug_ustacks;
#undef DEBUG_USER_STACKS_MODE
#undef DebugUST
#define DEBUG_USER_STACKS_MODE 0 /* guest user stacks debug mode */
#define DebugUST(fmt, args...) \
({ \
if (debug_ustacks) \
pr_info("%s(): " fmt, __func__, ##args); \
})
static inline void
kvm_kernel_hw_stack_frames_copy(u64 *dst, const u64 *src, unsigned long size)
{
fast_tagged_memory_copy(dst, src, size,
TAGGED_MEM_STORE_REC_OPC |
MAS_BYPASS_L1_CACHE << LDST_REC_OPC_MAS_SHIFT,
TAGGED_MEM_LOAD_REC_OPC |
MAS_BYPASS_L1_CACHE << LDST_REC_OPC_MAS_SHIFT, true);
}
static __always_inline void
kvm_collapse_kernel_ps(u64 *dst, const u64 *src, u64 spilled_size)
{
e2k_psp_hi_t k_psp_hi;
u64 ps_ind, ps_size;
u64 size;
DebugUST("current host procedure stack index 0x%x, PSHTP 0x%x\n",
NATIVE_NV_READ_PSP_HI_REG().PSP_hi_ind,
NATIVE_NV_READ_PSHTP_REG().PSHTP_ind);
KVM_COPY_STACKS_TO_MEMORY();
ATOMIC_GET_HW_PS_SIZES(ps_ind, ps_size);
size = ps_ind - spilled_size;
BUG_ON(!IS_ALIGNED(size, ALIGN_PSTACK_TOP_SIZE) || (s64) size < 0);
kvm_kernel_hw_stack_frames_copy(dst, src, size);
k_psp_hi = NATIVE_NV_READ_PSP_HI_REG();
k_psp_hi.PSP_hi_ind = size;
HYPERVISOR_update_psp_hi(k_psp_hi.PSP_hi_half);
DebugUST("move spilled procedure part from host top %px to "
"bottom %px, size 0x%llx\n",
src, dst, size);
DebugUST("host kernel procedure stack index is now 0x%x, "
"guest user PSHTP 0x%llx\n",
k_psp_hi.PSP_hi_ind, spilled_size);
}
static __always_inline void
kvm_collapse_kernel_pcs(u64 *dst, const u64 *src, u64 spilled_size)
{
e2k_pcsp_hi_t k_pcsp_hi;
u64 pcs_ind, pcs_size;
u64 size;
DebugUST("current host chain stack index 0x%x, PCSHTP 0x%llx\n",
NATIVE_NV_READ_PCSP_HI_REG().PCSP_hi_ind,
NATIVE_READ_PCSHTP_REG_SVALUE());
KVM_COPY_STACKS_TO_MEMORY();
ATOMIC_GET_HW_PCS_SIZES(pcs_ind, pcs_size);
size = pcs_ind - spilled_size;
BUG_ON(!IS_ALIGNED(size, ALIGN_PCSTACK_TOP_SIZE) || (s64) size < 0);
kvm_kernel_hw_stack_frames_copy(dst, src, size);
k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
k_pcsp_hi.PCSP_hi_ind = size;
HYPERVISOR_update_pcsp_hi(k_pcsp_hi.PCSP_hi_half);
DebugUST("move spilled chain part from host top %px to "
"bottom %px, size 0x%llx\n",
src, dst, size);
DebugUST("host kernel chain stack index is now 0x%x, "
"guest user PCSHTP 0x%llx\n",
k_pcsp_hi.PCSP_hi_ind, spilled_size);
}
static __always_inline int
copy_stack_page_from_kernel(void __user *dst, void *src, e2k_size_t to_copy,
bool is_chain)
{
int ret;
ret = HYPERVISOR_copy_hw_stacks_frames(dst, src, to_copy, is_chain);
return ret;
}
static __always_inline int
copy_stack_page_to_user(void __user *dst, void *src, e2k_size_t to_copy,
bool is_chain)
{
struct page *page = NULL;
unsigned long addr = (unsigned long)dst;
void *k_dst;
e2k_size_t offset;
mm_segment_t seg;
unsigned long ts_flag;
int npages;
int ret;
if (to_copy == 0)
return 0;
DebugUST("started to copy %s stack from kernel stack %px to user %px "
"size 0x%lx\n",
(is_chain) ? "chain" : "procedure",
src, dst, to_copy);
seg = get_fs();
set_fs(K_USER_DS);
ts_flag = set_ts_flag(TS_KERNEL_SYSCALL);
do {
npages = __get_user_pages_fast(addr, 1, 1, &page);
if (npages == 1)
break;
npages = get_user_pages_unlocked(addr, 1, &page, FOLL_WRITE);
if (npages == 1)
break;
clear_ts_flag(ts_flag);
set_fs(seg);
ret = -EFAULT;
goto failed;
} while (npages != 1);
clear_ts_flag(ts_flag);
set_fs(seg);
offset = addr & ~PAGE_MASK;
k_dst = page_address(page) + offset;
DebugUST("copy stack frames from kernel %px to user %px, size 0x%lx\n",
src, k_dst, to_copy);
ret = copy_stack_page_from_kernel(k_dst, src, to_copy, is_chain);
if (ret != 0) {
pr_err("%s(): copy %s stack to user %px from kernel %px, "
"size 0x%lx failed, error %d\n",
__func__, (is_chain) ? "chain" : "procedure",
src, k_dst, to_copy, ret);
goto failed_copy;
}
failed_copy:
put_page(page);
failed:
return ret;
}
static __always_inline int
kvm_copy_user_stack_from_kernel(void __user *dst, void *src,
e2k_size_t to_copy, bool is_chain)
{
e2k_size_t offset, len, copied = 0;
int ret;
if (to_copy == 0)
return 0;
DebugUST("started to copy %s stack from kernel stack %px to user %px "
"size 0x%lx\n",
(is_chain) ? "chain" : "procedure",
src, dst, to_copy);
if (trace_guest_copy_hw_stack_enabled())
trace_guest_copy_hw_stack(dst, src, to_copy, is_chain);
do {
offset = (unsigned long)dst & ~PAGE_MASK;
len = min(to_copy, PAGE_SIZE - offset);
ret = copy_stack_page_to_user(dst, src, len, is_chain);
if (ret != 0)
goto failed;
dst += len;
src += len;
to_copy -= len;
copied += len;
} while (to_copy > 0);
if (!is_chain && trace_guest_proc_stack_frame_enabled()) {
if (trace_guest_va_tlb_state_enabled()) {
trace_guest_va_tlb_state((e2k_addr_t)dst);
}
trace_proc_stack_frames((kernel_mem_ps_t *)(src - copied),
(kernel_mem_ps_t *)(src - copied), copied,
trace_guest_proc_stack_frame);
trace_proc_stack_frames((kernel_mem_ps_t *)(dst - copied),
(kernel_mem_ps_t *)(dst - copied), copied,
trace_guest_proc_stack_frame);
}
if (is_chain && trace_guest_chain_stack_frame_enabled()) {
if (trace_guest_va_tlb_state_enabled()) {
trace_guest_va_tlb_state((e2k_addr_t)dst);
}
trace_chain_stack_frames((e2k_mem_crs_t *)(src - copied),
(e2k_mem_crs_t *)(src - copied), copied,
trace_guest_chain_stack_frame);
trace_chain_stack_frames((e2k_mem_crs_t *)(dst - copied),
(e2k_mem_crs_t *)(dst - copied), copied,
trace_guest_chain_stack_frame);
}
return 0;
failed:
pr_err("%s(): failed, error %d\n", __func__, ret);
return ret;
}
static __always_inline int
kvm_user_hw_stacks_copy(pt_regs_t *regs)
{
e2k_psp_lo_t psp_lo;
e2k_psp_hi_t psp_hi;
e2k_pshtp_t pshtp;
e2k_pcsp_lo_t pcsp_lo;
e2k_pcsp_hi_t pcsp_hi;
e2k_pcshtp_t pcshtp;
e2k_stacks_t *stacks;
void __user *dst;
void *src;
long copyed_ps_size, copyed_pcs_size, to_copy, from, there_are;
int ret;
if (unlikely(irqs_disabled())) {
pr_err("%s() called with IRQs disabled PSP: 0x%lx UPSR: 0x%lx "
"under UPSR %d\n",
__func__, KVM_READ_PSR_REG_VALUE(),
KVM_READ_UPSR_REG_VALUE(),
kvm_get_vcpu_state()->irqs_under_upsr);
local_irq_enable();
WARN_ON(true);
}
stacks = &regs->stacks;
copyed_ps_size = regs->copyed.ps_size;
copyed_pcs_size = regs->copyed.pcs_size;
if (unlikely(copyed_ps_size || copyed_pcs_size)) {
/* stacks have been already copyed */
BUG_ON(copyed_ps_size != GET_PSHTP_MEM_INDEX(stacks->pshtp) &&
GET_PSHTP_MEM_INDEX(stacks->pshtp) != 0);
BUG_ON(copyed_pcs_size != PCSHTP_SIGN_EXTEND(stacks->pcshtp) &&
PCSHTP_SIGN_EXTEND(stacks->pcshtp) != SZ_OF_CR);
return 0;
}
ret = HYPERVISOR_copy_stacks_to_memory();
if (ret != 0) {
pr_err("%s(): flush of kernel stacks failed, error %d\n",
__func__, ret);
goto failed;
}
/* copy user part of procedure stack from kernel back to user */
ATOMIC_READ_HW_STACKS_REGS(psp_lo.PSP_lo_half, psp_hi.PSP_hi_half,
pshtp.PSHTP_reg,
pcsp_lo.PCSP_lo_half, pcsp_hi.PCSP_hi_half,
pcshtp);
src = (void *)psp_lo.PSP_lo_base;
DebugUST("procedure stack at kernel from %px, size 0x%x, ind 0x%x, "
"pshtp 0x%llx\n",
src, psp_hi.PSP_hi_size, psp_hi.PSP_hi_ind, pshtp.PSHTP_reg);
BUG_ON(psp_hi.PSP_hi_ind > psp_hi.PSP_hi_size);
if (stacks->psp_hi.PSP_hi_ind >= stacks->psp_hi.PSP_hi_size) {
/* procedure stack overflow, need expand */
ret = handle_proc_stack_bounds(stacks, regs->trap);
if (unlikely(ret)) {
pr_err("%s(): could not handle process %s (%d) "
"procedure stack overflow, error %d\n",
__func__, current->comm, current->pid, ret);
goto failed;
}
}
to_copy = GET_PSHTP_MEM_INDEX(stacks->pshtp);
BUG_ON(to_copy < 0);
from = stacks->psp_hi.PSP_hi_ind - to_copy;
BUG_ON(from < 0);
dst = (void __user *)stacks->psp_lo.PSP_lo_base + from;
DebugUST("procedure stack at user from %px, ind 0x%x, "
"pshtp size to copy 0x%lx\n",
dst, stacks->psp_hi.PSP_hi_ind, to_copy);
there_are = stacks->psp_hi.PSP_hi_size - from;
if (there_are < to_copy) {
pr_err("%s(): user procedure stack overflow, there are 0x%lx "
"to copy need 0x%lx, not yet implemented\n",
__func__, there_are, to_copy);
BUG_ON(true);
}
if (to_copy > 0) {
ret = kvm_copy_user_stack_from_kernel(dst, src, to_copy, false);
if (ret != 0) {
pr_err("%s(): procedure stack copying from kernel %px "
"to user %px, size 0x%lx failed, error %d\n",
__func__, src, dst, to_copy, ret);
goto failed;
}
regs->copyed.ps_size = to_copy;
}
/* copy user part of chain stack from kernel back to user */
src = (void *)pcsp_lo.PCSP_lo_base;
DebugUST("chain stack at kernel from %px, size 0x%x, ind 0x%x, "
"pcshtp 0x%x\n",
src, pcsp_hi.PCSP_hi_size, pcsp_hi.PCSP_hi_ind, pcshtp);
BUG_ON(pcsp_hi.PCSP_hi_ind + PCSHTP_SIGN_EXTEND(pcshtp) >
pcsp_hi.PCSP_hi_size);
if (stacks->pcsp_hi.PCSP_hi_ind >= stacks->pcsp_hi.PCSP_hi_size) {
/* chain stack overflow, need expand */
ret = handle_chain_stack_bounds(stacks, regs->trap);
if (unlikely(ret)) {
pr_err("%s(): could not handle process %s (%d) "
"chain stack overflow, error %d\n",
__func__, current->comm, current->pid, ret);
goto failed;
}
}
to_copy = PCSHTP_SIGN_EXTEND(stacks->pcshtp);
BUG_ON(to_copy < 0);
from = stacks->pcsp_hi.PCSP_hi_ind - to_copy;
BUG_ON(from < 0);
dst = (void *)stacks->pcsp_lo.PCSP_lo_base + from;
BUG_ON(to_copy > pcsp_hi.PCSP_hi_ind + PCSHTP_SIGN_EXTEND(pcshtp));
DebugUST("chain stack at user from %px, ind 0x%x, "
"pcshtp size to copy 0x%lx\n",
dst, stacks->pcsp_hi.PCSP_hi_ind, to_copy);
there_are = stacks->pcsp_hi.PCSP_hi_size - from;
if (there_are < to_copy) {
pr_err("%s(): user chain stack overflow, there are 0x%lx "
"to copy need 0x%lx, not yet implemented\n",
__func__, there_are, to_copy);
BUG_ON(true);
}
if (to_copy > 0) {
ret = kvm_copy_user_stack_from_kernel(dst, src, to_copy, true);
if (ret != 0) {
pr_err("%s(): chain stack copying from kernel %px "
"to user %px, size 0x%lx failed, error %d\n",
__func__, src, dst, to_copy, ret);
goto failed;
}
regs->copyed.pcs_size = to_copy;
}
failed:
if (DEBUG_USER_STACKS_MODE)
debug_ustacks = false;
return ret;
}
/*
* Copy additional frames injected to the guest kernel stack, but these frames
* are for guest user stack and should be copyed from kernel back to the top
* of user.
*/
static __always_inline int
kvm_copy_injected_pcs_frames_to_user(pt_regs_t *regs, int frames_num)
{
e2k_size_t pcs_ind, pcs_size;
e2k_addr_t pcs_base;
int pcsh_top;
e2k_stacks_t *stacks;
void __user *dst;
void *src;
long copyed_frames_size, to_copy, from, there_are, frames_size;
int ret;
BUG_ON(irqs_disabled());
frames_size = frames_num * SZ_OF_CR;
copyed_frames_size = regs->copyed.pcs_injected_frames_size;
if (unlikely(copyed_frames_size >= frames_size)) {
/* all frames have been already copyed */
return 0;
} else {
/* copyed only part of frames - not implemented case */
BUG_ON(copyed_frames_size != 0);
}
stacks = &regs->stacks;
ATOMIC_GET_HW_PCS_SIZES_BASE_TOP(pcs_ind, pcs_size, pcs_base, pcsh_top);
/* guest user stacks part spilled to kernel should be already copyed */
BUG_ON(PCSHTP_SIGN_EXTEND(regs->copyed.pcs_size != stacks->pcshtp));
src = (void *)(pcs_base + regs->copyed.pcs_size);
DebugUST("chain stack at kernel from %px, size 0x%lx + 0x%lx, "
"ind 0x%lx, pcsh top 0x%x\n",
src, pcs_size, frames_size, pcs_ind, pcsh_top);
BUG_ON(regs->copyed.pcs_size + frames_size > pcs_ind + pcsh_top);
if (stacks->pcsp_hi.PCSP_hi_ind + frames_size >
stacks->pcsp_hi.PCSP_hi_size) {
/* user chain stack can overflow, need expand */
ret = handle_chain_stack_bounds(stacks, regs->trap);
if (unlikely(ret)) {
pr_err("%s(): could not handle process %s (%d) "
"chain stack overflow, error %d\n",
__func__, current->comm, current->pid, ret);
goto failed;
}
}
to_copy = frames_size;
BUG_ON(to_copy < 0);
from = stacks->pcsp_hi.PCSP_hi_ind;
BUG_ON(from < regs->copyed.pcs_size);
dst = (void *)stacks->pcsp_lo.PCSP_lo_base + from;
DebugUST("chain stack at user from %px, ind 0x%x, "
"frames size to copy 0x%lx\n",
dst, stacks->pcsp_hi.PCSP_hi_ind, to_copy);
there_are = stacks->pcsp_hi.PCSP_hi_size - from;
if (there_are < to_copy) {
pr_err("%s(): user chain stack overflow, there are 0x%lx "
"to copy need 0x%lx, not yet implemented\n",
__func__, there_are, to_copy);
BUG_ON(true);
}
if (likely(to_copy > 0)) {
ret = kvm_copy_user_stack_from_kernel(dst, src, to_copy, true);
if (ret != 0) {
pr_err("%s(): chain stack copying from kernel %px "
"to user %px, size 0x%lx failed, error %d\n",
__func__, src, dst, to_copy, ret);
goto failed;
}
regs->copyed.pcs_injected_frames_size = to_copy;
/* increment chain stack pointer */
stacks->pcsp_hi.PCSP_hi_ind += to_copy;
} else {
BUG_ON(true);
ret = 0;
}
failed:
if (DEBUG_USER_STACKS_MODE)
debug_ustacks = false;
return ret;
}
/**
* user_hw_stacks_prepare - prepare user hardware stacks that have been
* SPILLed to kernel back to user space
* @stacks - saved user stack registers
* @cur_window_q - size of current window in procedure stack,
* needed only if @copy_full is not set
* @syscall - true if called upon direct system call exit (no signal handlers)
*
* This does two things:
*
* 1) It is possible that upon kernel entry pcshtp == 0 in some cases:
* - user signal handler had pcshtp==0x20 before return to sigreturn()
* - user context had pcshtp==0x20 before return to makecontext_trampoline()
* - chain stack underflow happened
* So it is possible in sigreturn() and traps, but not in system calls.
* If we are using the trick with return to FILL user hardware stacks than
* we must have frame in chain stack to return to. So in this case kernel's
* chain stack is moved up by one frame (0x20 bytes).
* We also fill the new frame with actual user data and update stacks->pcshtp,
* this is needed to keep the coherent state where saved stacks->pcshtp values
* shows how much data from user space has been spilled to kernel space.
*
* 2) It is not possible to always FILL all of user data that have been
* SPILLed to kernel stacks. So we manually copy the leftovers that can
* not be FILLed to user space.
* This copy does not update stacks->pshtp and stacks->pcshtp. Main reason
* is signals: if a signal arrives after copying then it must see a coherent
* state where saved stacks->pshtp and stacks->pcshtp values show how much
* data from user space has been spilled to kernel space.
*/
static __always_inline int kvm_user_hw_stacks_prepare(
struct e2k_stacks *stacks, pt_regs_t *regs,
u64 cur_window_q, enum restore_caller from, int syscall)
{
e2k_pcshtp_t u_pcshtp = stacks->pcshtp;
int ret;
BUG_ON(!kvm_trap_user_mode(regs));
BUG_ON(from & FROM_PV_VCPU_MODE);
/*
* 1) Make sure there is free space in kernel chain stack to return to
*/
if (!syscall && u_pcshtp == 0) {
DebugUST("%s(): PCSHTP is empty\n", __func__);
}
/*
* 2) User data copying will be done some later at
* kvm_prepare_user_hv_stacks()
*/
ret = kvm_user_hw_stacks_copy(regs);
if (ret != 0) {
pr_err("%s(): copying of hardware stacks failed< error %d\n",
__func__, ret);
do_exit(SIGKILL);
}
return ret;
}
static inline int
kvm_ret_from_fork_prepare_hv_stacks(struct pt_regs *regs)
{
return kvm_user_hw_stacks_copy(regs);
}
#ifdef CONFIG_KVM_GUEST_KERNEL
/* native guest kernel */
static __always_inline void
kernel_hw_stack_frames_copy(u64 *dst, const u64 *src, unsigned long size)
{
kvm_kernel_hw_stack_frames_copy(dst, src, size);
}
static __always_inline void
collapse_kernel_ps(u64 *dst, const u64 *src, u64 spilled_size)
{
kvm_collapse_kernel_ps(dst, src, spilled_size);
}
static __always_inline void
collapse_kernel_pcs(u64 *dst, const u64 *src, u64 spilled_size)
{
kvm_collapse_kernel_pcs(dst, src, spilled_size);
}
static __always_inline int
user_hw_stacks_copy(struct e2k_stacks *stacks,
pt_regs_t *regs, u64 cur_window_q, bool copy_full)
{
return kvm_user_hw_stacks_copy(regs);
}
static __always_inline void host_user_hw_stacks_prepare(
struct e2k_stacks *stacks, pt_regs_t *regs,
u64 cur_window_q, enum restore_caller from, int syscall)
{
if (regs->sys_num == __NR_e2k_longjmp2) {
/* hardware stacks already are prepared */
return;
}
kvm_user_hw_stacks_prepare(stacks, regs, cur_window_q,
from, syscall);
}
static inline int
ret_from_fork_prepare_hv_stacks(struct pt_regs *regs)
{
return kvm_ret_from_fork_prepare_hv_stacks(regs);
}
#endif /* CONFIG_KVM_GUEST_KERNEL */
#endif /* !(_E2K_KVM_GUEST_COPY_HW_STACKS_H) */

View File

@ -11,25 +11,6 @@
#include <asm/stacks.h>
#include <asm/ptrace.h>
#undef DEBUG_USER_STACKS_MODE
#undef DebugKVMUS
#define DEBUG_USER_STACKS_MODE 0
#define DebugKVMUS(fmt, args...) \
({ \
if (DEBUG_USER_STACKS_MODE) \
pr_info("%s(): " fmt, __func__, ##args); \
})
extern bool debug_ustacks;
#undef DEBUG_USER_STACKS_MODE
#undef DebugUST
#define DEBUG_USER_STACKS_MODE 0 /* guest user stacks debug mode */
#define DebugUST(fmt, args...) \
({ \
if (debug_ustacks) \
pr_info("%s(): " fmt, __func__, ##args); \
})
/* real flush of hardware stacks should be done by host hypercall */
/* so here nothing to do */
#ifdef CONFIG_KVM_GUEST_HW_PV
@ -129,474 +110,8 @@ kvm_preserve_user_hw_stacks_to_copy(e2k_stacks_t *u_stacks,
/* after copying and therefore are not preserve */
}
static inline void
kvm_kernel_hw_stack_frames_copy(u64 *dst, const u64 *src, unsigned long size)
{
fast_tagged_memory_copy(dst, src, size,
TAGGED_MEM_STORE_REC_OPC |
MAS_BYPASS_L1_CACHE << LDST_REC_OPC_MAS_SHIFT,
TAGGED_MEM_LOAD_REC_OPC |
MAS_BYPASS_L1_CACHE << LDST_REC_OPC_MAS_SHIFT, true);
}
static __always_inline void
kvm_collapse_kernel_ps(u64 *dst, const u64 *src, u64 spilled_size)
{
e2k_psp_hi_t k_psp_hi;
u64 ps_ind, ps_size;
u64 size;
DebugUST("current host procedure stack index 0x%x, PSHTP 0x%x\n",
NATIVE_NV_READ_PSP_HI_REG().PSP_hi_ind,
NATIVE_NV_READ_PSHTP_REG().PSHTP_ind);
KVM_COPY_STACKS_TO_MEMORY();
ATOMIC_GET_HW_PS_SIZES(ps_ind, ps_size);
size = ps_ind - spilled_size;
BUG_ON(!IS_ALIGNED(size, ALIGN_PSTACK_TOP_SIZE) || (s64) size < 0);
kvm_kernel_hw_stack_frames_copy(dst, src, size);
k_psp_hi = NATIVE_NV_READ_PSP_HI_REG();
k_psp_hi.PSP_hi_ind = size;
HYPERVISOR_update_psp_hi(k_psp_hi.PSP_hi_half);
DebugUST("move spilled procedure part from host top %px to "
"bottom %px, size 0x%llx\n",
src, dst, size);
DebugUST("host kernel procedure stack index is now 0x%x, "
"guest user PSHTP 0x%llx\n",
k_psp_hi.PSP_hi_ind, spilled_size);
}
static __always_inline void
kvm_collapse_kernel_pcs(u64 *dst, const u64 *src, u64 spilled_size)
{
e2k_pcsp_hi_t k_pcsp_hi;
u64 pcs_ind, pcs_size;
u64 size;
DebugUST("current host chain stack index 0x%x, PCSHTP 0x%llx\n",
NATIVE_NV_READ_PCSP_HI_REG().PCSP_hi_ind,
NATIVE_READ_PCSHTP_REG_SVALUE());
KVM_COPY_STACKS_TO_MEMORY();
ATOMIC_GET_HW_PCS_SIZES(pcs_ind, pcs_size);
size = pcs_ind - spilled_size;
BUG_ON(!IS_ALIGNED(size, ALIGN_PCSTACK_TOP_SIZE) || (s64) size < 0);
kvm_kernel_hw_stack_frames_copy(dst, src, size);
k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
k_pcsp_hi.PCSP_hi_ind = size;
HYPERVISOR_update_pcsp_hi(k_pcsp_hi.PCSP_hi_half);
DebugUST("move spilled chain part from host top %px to "
"bottom %px, size 0x%llx\n",
src, dst, size);
DebugUST("host kernel chain stack index is now 0x%x, "
"guest user PCSHTP 0x%llx\n",
k_pcsp_hi.PCSP_hi_ind, spilled_size);
}
static __always_inline int
copy_stack_page_from_kernel(void __user *dst, void *src, e2k_size_t to_copy,
bool is_chain)
{
int ret;
ret = HYPERVISOR_copy_hw_stacks_frames(dst, src, to_copy, is_chain);
return ret;
}
static __always_inline int
copy_stack_page_to_user(void __user *dst, void *src, e2k_size_t to_copy,
bool is_chain)
{
struct page *page = NULL;
unsigned long addr = (unsigned long)dst;
void *k_dst;
e2k_size_t offset;
mm_segment_t seg;
unsigned long ts_flag;
int npages;
int ret;
if (to_copy == 0)
return 0;
DebugUST("started to copy %s stack from kernel stack %px to user %px "
"size 0x%lx\n",
(is_chain) ? "chain" : "procedure",
src, dst, to_copy);
seg = get_fs();
set_fs(K_USER_DS);
ts_flag = set_ts_flag(TS_KERNEL_SYSCALL);
do {
npages = __get_user_pages_fast(addr, 1, 1, &page);
if (npages == 1)
break;
npages = get_user_pages_unlocked(addr, 1, &page, FOLL_WRITE);
if (npages == 1)
break;
clear_ts_flag(ts_flag);
set_fs(seg);
ret = -EFAULT;
goto failed;
} while (npages != 1);
clear_ts_flag(ts_flag);
set_fs(seg);
offset = addr & ~PAGE_MASK;
k_dst = page_address(page) + offset;
DebugUST("copy stack frames from kernel %px to user %px, size 0x%lx\n",
src, k_dst, to_copy);
ret = copy_stack_page_from_kernel(k_dst, src, to_copy, is_chain);
if (ret != 0) {
pr_err("%s(): copy %s stack to user %px from kernel %px, "
"size 0x%lx failed, error %d\n",
__func__, (is_chain) ? "chain" : "procedure",
src, k_dst, to_copy, ret);
goto failed_copy;
}
failed_copy:
put_page(page);
failed:
return ret;
}
static __always_inline int
kvm_copy_user_stack_from_kernel(void __user *dst, void *src,
e2k_size_t to_copy, bool is_chain)
{
e2k_size_t offset, len;
int ret;
if (to_copy == 0)
return 0;
DebugUST("started to copy %s stack from kernel stack %px to user %px "
"size 0x%lx\n",
(is_chain) ? "chain" : "procedure",
src, dst, to_copy);
do {
offset = (unsigned long)dst & ~PAGE_MASK;
len = min(to_copy, PAGE_SIZE - offset);
ret = copy_stack_page_to_user(dst, src, len, is_chain);
if (ret != 0)
goto failed;
dst += len;
src += len;
to_copy -= len;
} while (to_copy > 0);
return 0;
failed:
return ret;
}
static __always_inline int
kvm_user_hw_stacks_copy(pt_regs_t *regs)
{
e2k_psp_lo_t psp_lo;
e2k_psp_hi_t psp_hi;
e2k_pshtp_t pshtp;
e2k_pcsp_lo_t pcsp_lo;
e2k_pcsp_hi_t pcsp_hi;
e2k_pcshtp_t pcshtp;
e2k_stacks_t *stacks;
void __user *dst;
void *src;
long copyed_ps_size, copyed_pcs_size, to_copy, from, there_are;
int ret;
if (unlikely(irqs_disabled())) {
pr_err("%s() called with IRQs disabled PSP: 0x%lx UPSR: 0x%lx "
"under UPSR %d\n",
__func__, KVM_READ_PSR_REG_VALUE(),
KVM_READ_UPSR_REG_VALUE(),
kvm_get_vcpu_state()->irqs_under_upsr);
local_irq_enable();
WARN_ON(true);
}
stacks = &regs->stacks;
copyed_ps_size = regs->copyed.ps_size;
copyed_pcs_size = regs->copyed.pcs_size;
if (unlikely(copyed_ps_size || copyed_pcs_size)) {
/* stacks have been already copyed */
BUG_ON(copyed_ps_size != GET_PSHTP_MEM_INDEX(stacks->pshtp) &&
GET_PSHTP_MEM_INDEX(stacks->pshtp) != 0);
BUG_ON(copyed_pcs_size != PCSHTP_SIGN_EXTEND(stacks->pcshtp) &&
PCSHTP_SIGN_EXTEND(stacks->pcshtp) != SZ_OF_CR);
return 0;
}
ret = HYPERVISOR_copy_stacks_to_memory();
if (ret != 0) {
pr_err("%s(): flush of kernel stacks failed, error %d\n",
__func__, ret);
goto failed;
}
/* copy user part of procedure stack from kernel back to user */
ATOMIC_READ_HW_STACKS_REGS(psp_lo.PSP_lo_half, psp_hi.PSP_hi_half,
pshtp.PSHTP_reg,
pcsp_lo.PCSP_lo_half, pcsp_hi.PCSP_hi_half,
pcshtp);
src = (void *)psp_lo.PSP_lo_base;
DebugUST("procedure stack at kernel from %px, size 0x%x, ind 0x%x, "
"pshtp 0x%llx\n",
src, psp_hi.PSP_hi_size, psp_hi.PSP_hi_ind, pshtp.PSHTP_reg);
BUG_ON(psp_hi.PSP_hi_ind > psp_hi.PSP_hi_size);
if (stacks->psp_hi.PSP_hi_ind >= stacks->psp_hi.PSP_hi_size) {
/* procedure stack overflow, need expand */
ret = handle_proc_stack_bounds(stacks, regs->trap);
if (unlikely(ret)) {
pr_err("%s(): could not handle process %s (%d) "
"procedure stack overflow, error %d\n",
__func__, current->comm, current->pid, ret);
goto failed;
}
}
to_copy = GET_PSHTP_MEM_INDEX(stacks->pshtp);
BUG_ON(to_copy < 0);
from = stacks->psp_hi.PSP_hi_ind - to_copy;
BUG_ON(from < 0);
dst = (void __user *)stacks->psp_lo.PSP_lo_base + from;
DebugUST("procedure stack at user from %px, ind 0x%x, "
"pshtp size to copy 0x%lx\n",
dst, stacks->psp_hi.PSP_hi_ind, to_copy);
there_are = stacks->psp_hi.PSP_hi_size - from;
if (there_are < to_copy) {
pr_err("%s(): user procedure stack overflow, there are 0x%lx "
"to copy need 0x%lx, not yet implemented\n",
__func__, there_are, to_copy);
BUG_ON(true);
}
if (to_copy > 0) {
ret = kvm_copy_user_stack_from_kernel(dst, src, to_copy, false);
if (ret != 0) {
pr_err("%s(): procedure stack copying from kernel %px "
"to user %px, size 0x%lx failed, error %d\n",
__func__, src, dst, to_copy, ret);
goto failed;
}
regs->copyed.ps_size = to_copy;
}
/* copy user part of chain stack from kernel back to user */
src = (void *)pcsp_lo.PCSP_lo_base;
DebugUST("chain stack at kernel from %px, size 0x%x, ind 0x%x, "
"pcshtp 0x%x\n",
src, pcsp_hi.PCSP_hi_size, pcsp_hi.PCSP_hi_ind, pcshtp);
BUG_ON(pcsp_hi.PCSP_hi_ind + PCSHTP_SIGN_EXTEND(pcshtp) >
pcsp_hi.PCSP_hi_size);
if (stacks->pcsp_hi.PCSP_hi_ind >= stacks->pcsp_hi.PCSP_hi_size) {
/* chain stack overflow, need expand */
ret = handle_chain_stack_bounds(stacks, regs->trap);
if (unlikely(ret)) {
pr_err("%s(): could not handle process %s (%d) "
"chain stack overflow, error %d\n",
__func__, current->comm, current->pid, ret);
goto failed;
}
}
to_copy = PCSHTP_SIGN_EXTEND(stacks->pcshtp);
BUG_ON(to_copy < 0);
from = stacks->pcsp_hi.PCSP_hi_ind - to_copy;
BUG_ON(from < 0);
dst = (void *)stacks->pcsp_lo.PCSP_lo_base + from;
BUG_ON(to_copy > pcsp_hi.PCSP_hi_ind + PCSHTP_SIGN_EXTEND(pcshtp));
DebugUST("chain stack at user from %px, ind 0x%x, "
"pcshtp size to copy 0x%lx\n",
dst, stacks->pcsp_hi.PCSP_hi_ind, to_copy);
there_are = stacks->pcsp_hi.PCSP_hi_size - from;
if (there_are < to_copy) {
pr_err("%s(): user chain stack overflow, there are 0x%lx "
"to copy need 0x%lx, not yet implemented\n",
__func__, there_are, to_copy);
BUG_ON(true);
}
if (to_copy > 0) {
ret = kvm_copy_user_stack_from_kernel(dst, src, to_copy, true);
if (ret != 0) {
pr_err("%s(): chain stack copying from kernel %px "
"to user %px, size 0x%lx failed, error %d\n",
__func__, src, dst, to_copy, ret);
goto failed;
}
regs->copyed.pcs_size = to_copy;
}
failed:
if (DEBUG_USER_STACKS_MODE)
debug_ustacks = false;
return ret;
}
/*
* Copy additional frames injected to the guest kernel stack, but these frames
* are for guest user stack and should be copyed from kernel back to the top
* of user.
*/
static __always_inline int
kvm_copy_injected_pcs_frames_to_user(pt_regs_t *regs, int frames_num)
{
e2k_size_t pcs_ind, pcs_size;
e2k_addr_t pcs_base;
int pcsh_top;
e2k_stacks_t *stacks;
void __user *dst;
void *src;
long copyed_frames_size, to_copy, from, there_are, frames_size;
int ret;
BUG_ON(irqs_disabled());
frames_size = frames_num * SZ_OF_CR;
copyed_frames_size = regs->copyed.pcs_injected_frames_size;
if (unlikely(copyed_frames_size >= frames_size)) {
/* all frames have been already copyed */
return 0;
} else {
/* copyed only part of frames - not implemented case */
BUG_ON(copyed_frames_size != 0);
}
stacks = &regs->stacks;
ATOMIC_GET_HW_PCS_SIZES_BASE_TOP(pcs_ind, pcs_size, pcs_base, pcsh_top);
/* guest user stacks part spilled to kernel should be already copyed */
BUG_ON(PCSHTP_SIGN_EXTEND(regs->copyed.pcs_size != stacks->pcshtp));
src = (void *)(pcs_base + regs->copyed.pcs_size);
DebugUST("chain stack at kernel from %px, size 0x%lx + 0x%lx, "
"ind 0x%lx, pcsh top 0x%x\n",
src, pcs_size, frames_size, pcs_ind, pcsh_top);
BUG_ON(regs->copyed.pcs_size + frames_size > pcs_ind + pcsh_top);
if (stacks->pcsp_hi.PCSP_hi_ind + frames_size >
stacks->pcsp_hi.PCSP_hi_size) {
/* user chain stack can overflow, need expand */
ret = handle_chain_stack_bounds(stacks, regs->trap);
if (unlikely(ret)) {
pr_err("%s(): could not handle process %s (%d) "
"chain stack overflow, error %d\n",
__func__, current->comm, current->pid, ret);
goto failed;
}
}
to_copy = frames_size;
BUG_ON(to_copy < 0);
from = stacks->pcsp_hi.PCSP_hi_ind;
BUG_ON(from < regs->copyed.pcs_size);
dst = (void *)stacks->pcsp_lo.PCSP_lo_base + from;
DebugUST("chain stack at user from %px, ind 0x%x, "
"frames size to copy 0x%lx\n",
dst, stacks->pcsp_hi.PCSP_hi_ind, to_copy);
there_are = stacks->pcsp_hi.PCSP_hi_size - from;
if (there_are < to_copy) {
pr_err("%s(): user chain stack overflow, there are 0x%lx "
"to copy need 0x%lx, not yet implemented\n",
__func__, there_are, to_copy);
BUG_ON(true);
}
if (likely(to_copy > 0)) {
ret = kvm_copy_user_stack_from_kernel(dst, src, to_copy, true);
if (ret != 0) {
pr_err("%s(): chain stack copying from kernel %px "
"to user %px, size 0x%lx failed, error %d\n",
__func__, src, dst, to_copy, ret);
goto failed;
}
regs->copyed.pcs_injected_frames_size = to_copy;
/* increment chain stack pointer */
stacks->pcsp_hi.PCSP_hi_ind += to_copy;
} else {
BUG_ON(true);
ret = 0;
}
failed:
if (DEBUG_USER_STACKS_MODE)
debug_ustacks = false;
return ret;
}
extern void kvm_get_mm_notifier(thread_info_t *ti, struct mm_struct *mm);
/**
* user_hw_stacks_prepare - prepare user hardware stacks that have been
* SPILLed to kernel back to user space
* @stacks - saved user stack registers
* @cur_window_q - size of current window in procedure stack,
* needed only if @copy_full is not set
* @syscall - true if called upon direct system call exit (no signal handlers)
*
* This does two things:
*
* 1) It is possible that upon kernel entry pcshtp == 0 in some cases:
* - user signal handler had pcshtp==0x20 before return to sigreturn()
* - user context had pcshtp==0x20 before return to makecontext_trampoline()
* - chain stack underflow happened
* So it is possible in sigreturn() and traps, but not in system calls.
* If we are using the trick with return to FILL user hardware stacks than
* we must have frame in chain stack to return to. So in this case kernel's
* chain stack is moved up by one frame (0x20 bytes).
* We also fill the new frame with actual user data and update stacks->pcshtp,
* this is needed to keep the coherent state where saved stacks->pcshtp values
* shows how much data from user space has been spilled to kernel space.
*
* 2) It is not possible to always FILL all of user data that have been
* SPILLed to kernel stacks. So we manually copy the leftovers that can
* not be FILLed to user space.
* This copy does not update stacks->pshtp and stacks->pcshtp. Main reason
* is signals: if a signal arrives after copying then it must see a coherent
* state where saved stacks->pshtp and stacks->pcshtp values show how much
* data from user space has been spilled to kernel space.
*/
static __always_inline int kvm_user_hw_stacks_prepare(
struct e2k_stacks *stacks, pt_regs_t *regs,
u64 cur_window_q, enum restore_caller from, int syscall)
{
e2k_pcshtp_t u_pcshtp = stacks->pcshtp;
int ret;
BUG_ON(!kvm_trap_user_mode(regs));
BUG_ON(from & FROM_PV_VCPU_MODE);
/*
* 1) Make sure there is free space in kernel chain stack to return to
*/
if (!syscall && u_pcshtp == 0) {
DebugUST("%s(): PCSHTP is empty\n", __func__);
}
/*
* 2) User data copying will be done some later at
* kvm_prepare_user_hv_stacks()
*/
ret = kvm_user_hw_stacks_copy(regs);
if (ret != 0) {
pr_err("%s(): copying of hardware stacks failed< error %d\n",
__func__, ret);
do_exit(SIGKILL);
}
return ret;
}
static inline int
kvm_ret_from_fork_prepare_hv_stacks(struct pt_regs *regs)
{
return kvm_user_hw_stacks_copy(regs);
}
static __always_inline void
kvm_jump_to_ttable_entry(struct pt_regs *regs, enum restore_caller from)
{
@ -823,55 +338,12 @@ preserve_user_hw_stacks_to_copy(e2k_stacks_t *u_stacks,
kvm_preserve_user_hw_stacks_to_copy(u_stacks, cur_stacks);
}
static __always_inline void
kernel_hw_stack_frames_copy(u64 *dst, const u64 *src, unsigned long size)
{
kvm_kernel_hw_stack_frames_copy(dst, src, size);
}
static __always_inline void
collapse_kernel_ps(u64 *dst, const u64 *src, u64 spilled_size)
{
kvm_collapse_kernel_ps(dst, src, spilled_size);
}
static __always_inline void
collapse_kernel_pcs(u64 *dst, const u64 *src, u64 spilled_size)
{
kvm_collapse_kernel_pcs(dst, src, spilled_size);
}
static __always_inline int
user_hw_stacks_copy(struct e2k_stacks *stacks,
pt_regs_t *regs, u64 cur_window_q, bool copy_full)
{
return kvm_user_hw_stacks_copy(regs);
}
static __always_inline void host_user_hw_stacks_prepare(
struct e2k_stacks *stacks, pt_regs_t *regs,
u64 cur_window_q, enum restore_caller from, int syscall)
{
if (regs->sys_num == __NR_e2k_longjmp2) {
/* hardware stacks already are prepared */
return;
}
kvm_user_hw_stacks_prepare(stacks, regs, cur_window_q,
from, syscall);
}
static __always_inline void
host_exit_to_usermode_loop(struct pt_regs *regs, bool syscall, bool has_signal)
{
/* native & guest kernels cannot be as host */
}
static inline int
ret_from_fork_prepare_hv_stacks(struct pt_regs *regs)
{
return kvm_ret_from_fork_prepare_hv_stacks(regs);
}
static __always_inline void
jump_to_ttable_entry(struct pt_regs *regs, enum restore_caller from)
{
@ -885,8 +357,6 @@ virt_cpu_thread_init(struct task_struct *boot_task)
KVM_GET_VCPU_STATE_BASE(vcpu_state_base);
task_thread_info(boot_task)->vcpu_state_base = vcpu_state_base;
if (!IS_HV_GM())
kvm_vcpu_boot_thread_init(boot_task);
}
static inline int

View File

@ -0,0 +1,41 @@
#ifndef _E2K_KVM_GUEST_TRACE_DEFS_H_
#define _E2K_KVM_GUEST_TRACE_DEFS_H_
#include <linux/types.h>
#include <asm/mmu_types.h>
#include <asm/pgtable_def.h>
#include <asm/kvm/hypercall.h>
static inline void
trace_kvm_get_gva_spt_translation(e2k_addr_t address,
pgdval_t *pgd, pudval_t *pud, pmdval_t *pmd, pteval_t *pte, int *pt_level)
{
mmu_spt_trans_t spt_trans;
int ret;
ret = HYPERVISOR_get_spt_translation(address, &spt_trans);
if (unlikely(ret != 0)) {
pr_err("%s() : host could not get guest address 0x%lx "
"translation at SPTs, error %d\n",
__func__, address, ret);
*pgd = -1;
*pt_level = E2K_PGD_LEVEL_NUM;
return;
}
*pt_level = spt_trans.pt_levels;
if (*pt_level <= E2K_PGD_LEVEL_NUM) {
*pgd = spt_trans.pgd;
}
if (*pt_level <= E2K_PUD_LEVEL_NUM) {
*pud = spt_trans.pud;
}
if (*pt_level <= E2K_PMD_LEVEL_NUM) {
*pmd = spt_trans.pmd;
}
if (*pt_level <= E2K_PTE_LEVEL_NUM) {
*pte = spt_trans.pte;
}
}
#endif /* _E2K_KVM_GUEST_TRACE_DEFS_H_ */

View File

@ -0,0 +1,308 @@
#if !defined(_KVM_GUEST_TRACE_COPY_HW_STACKS_H) || defined(TRACE_HEADER_MULTI_READ)
#define _KVM_GUEST_TRACE_COPY_HW_STACKS_H
#include <linux/tracepoint.h>
#include <linux/hugetlb.h>
#include <asm/trace-defs.h>
#include <asm/trace_pgtable-v2.h>
#include <asm/trace_pgtable-v6.h>
#include <asm/pgtable_def.h>
#include <asm/kvm/guest/trace-defs.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM guest
TRACE_EVENT(
guest_copy_hw_stack,
TP_PROTO(void *dst, void *src, unsigned long size, bool is_chain),
TP_ARGS(dst, src, size, is_chain),
TP_STRUCT__entry(
__field( void *, dst )
__field( void *, src )
__field( u64, size )
__field( bool, is_chain )
__field( pgdval_t, dst_pgd )
__field( pudval_t, dst_pud )
__field( pmdval_t, dst_pmd )
__field( pteval_t, dst_pte )
__field( int, dst_pt_level )
__field( pgdval_t, src_pgd )
__field( pudval_t, src_pud )
__field( pmdval_t, src_pmd )
__field( pteval_t, src_pte )
__field( int, src_pt_level )
__field( pgdval_t, dst_spt_pgd )
__field( pudval_t, dst_spt_pud )
__field( pmdval_t, dst_spt_pmd )
__field( pteval_t, dst_spt_pte )
__field( int, dst_spt_level )
__field( pgdval_t, src_spt_pgd )
__field( pudval_t, src_spt_pud )
__field( pmdval_t, src_spt_pmd )
__field( pteval_t, src_spt_pte )
__field( int, src_spt_level )
),
TP_fast_assign(
__entry->dst = dst;
__entry->src = src;
__entry->size = size;
__entry->is_chain = is_chain;
trace_get_va_translation(current->mm, (e2k_addr_t)dst,
&__entry->dst_pgd, &__entry->dst_pud, &__entry->dst_pmd,
&__entry->dst_pte, &__entry->dst_pt_level);
trace_kvm_get_gva_spt_translation((e2k_addr_t)dst,
&__entry->dst_spt_pgd, &__entry->dst_spt_pud,
&__entry->dst_spt_pmd, &__entry->dst_spt_pte,
&__entry->dst_spt_level);
trace_get_va_translation(current->mm, (e2k_addr_t)src,
&__entry->src_pgd, &__entry->src_pud, &__entry->src_pmd,
&__entry->src_pte, &__entry->src_pt_level);
trace_kvm_get_gva_spt_translation((e2k_addr_t)src,
&__entry->src_spt_pgd, &__entry->src_spt_pud,
&__entry->src_spt_pmd, &__entry->src_spt_pte,
&__entry->src_spt_level);
),
TP_printk("copy %s stack guest user <- guest kernel: dst %px "
"src %px size %llx\n"
" user guest dst %px : pgd 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pud 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pmd 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pte 0x%016lx : %s\n"
" Access mode: %s%s\n"
" user guest dst spt %px : pgd 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pud 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pmd 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pte 0x%016lx : %s\n"
" Access mode: %s%s\n"
" kernel guest src %px : pgd 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pud 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pmd 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pte 0x%016lx : %s\n"
" Access mode: %s%s\n"
" kernel guest src spt %px : pgd 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pud 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pmd 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pte 0x%016lx : %s\n"
" Access mode: %s%s",
(__entry->is_chain) ? "chain" : "procedure",
__entry->dst,
__entry->src,
__entry->size,
__entry->dst,
(__entry->dst_pt_level <= E2K_PGD_LEVEL_NUM) ?
__entry->dst_pgd : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->dst_pgd,
__entry->dst_pt_level <= E2K_PGD_LEVEL_NUM),
(__entry->dst_pt_level <= E2K_PUD_LEVEL_NUM) ?
__entry->dst_pud : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->dst_pud,
__entry->dst_pt_level <= E2K_PUD_LEVEL_NUM),
(__entry->dst_pt_level <= E2K_PMD_LEVEL_NUM) ?
__entry->dst_pmd : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->dst_pmd,
__entry->dst_pt_level <= E2K_PMD_LEVEL_NUM),
(__entry->dst_pt_level <= E2K_PTE_LEVEL_NUM) ?
__entry->dst_pte : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->dst_pte,
__entry->dst_pt_level <= E2K_PTE_LEVEL_NUM),
__entry->dst,
(__entry->dst_spt_level <= E2K_PGD_LEVEL_NUM) ?
__entry->dst_spt_pgd : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->dst_spt_pgd,
__entry->dst_spt_level <= E2K_PGD_LEVEL_NUM),
(__entry->dst_spt_level <= E2K_PUD_LEVEL_NUM) ?
__entry->dst_spt_pud : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->dst_spt_pud,
__entry->dst_spt_level <= E2K_PUD_LEVEL_NUM),
(__entry->dst_spt_level <= E2K_PMD_LEVEL_NUM) ?
__entry->dst_spt_pmd : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->dst_spt_pmd,
__entry->dst_spt_level <= E2K_PMD_LEVEL_NUM),
(__entry->dst_spt_level <= E2K_PTE_LEVEL_NUM) ?
__entry->dst_spt_pte : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->dst_spt_pte,
__entry->dst_spt_level <= E2K_PTE_LEVEL_NUM),
__entry->src,
(__entry->src_pt_level <= E2K_PGD_LEVEL_NUM) ?
__entry->src_pgd : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->src_pgd,
__entry->src_pt_level <= E2K_PGD_LEVEL_NUM),
(__entry->src_pt_level <= E2K_PUD_LEVEL_NUM) ?
__entry->src_pud : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->src_pud,
__entry->src_pt_level <= E2K_PUD_LEVEL_NUM),
(__entry->src_pt_level <= E2K_PMD_LEVEL_NUM) ?
__entry->src_pmd : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->src_pmd,
__entry->src_pt_level <= E2K_PMD_LEVEL_NUM),
(__entry->src_pt_level <= E2K_PTE_LEVEL_NUM) ?
__entry->src_pte : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->src_pte,
__entry->src_pt_level <= E2K_PTE_LEVEL_NUM),
__entry->src,
(__entry->src_spt_level <= E2K_PGD_LEVEL_NUM) ?
__entry->src_spt_pgd : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->src_spt_pgd,
__entry->src_spt_level <= E2K_PGD_LEVEL_NUM),
(__entry->src_spt_level <= E2K_PUD_LEVEL_NUM) ?
__entry->src_spt_pud : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->src_spt_pud,
__entry->src_spt_level <= E2K_PUD_LEVEL_NUM),
(__entry->src_spt_level <= E2K_PMD_LEVEL_NUM) ?
__entry->src_spt_pmd : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->src_spt_pmd,
__entry->src_spt_level <= E2K_PMD_LEVEL_NUM),
(__entry->src_spt_level <= E2K_PTE_LEVEL_NUM) ?
__entry->src_spt_pte : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->src_spt_pte,
__entry->src_spt_level <= E2K_PTE_LEVEL_NUM)
)
);
TRACE_EVENT(
guest_proc_stack_frame,
TP_PROTO(kernel_mem_ps_t *ps_base, kernel_mem_ps_t *ps_frame),
TP_ARGS(ps_base, ps_frame),
TP_STRUCT__entry(
__field( kernel_mem_ps_t *, ps_base )
__field_struct( kernel_mem_ps_t, ps_frame )
__field( pgprotval_t, dtlb_entry )
),
TP_fast_assign(
__entry->ps_base = ps_base;
__entry->ps_frame = *ps_frame;
__entry->dtlb_entry = HYPERVISOR_mmu_probe((e2k_addr_t)ps_base,
KVM_MMU_PROBE_ENTRY);
),
TP_printk(" %px (dtlb 0x%016lx) : 0x%016lx 0x%016lx",
__entry->ps_base, __entry->dtlb_entry,
__entry->ps_frame.word_lo,
__entry->ps_frame.word_hi)
);
TRACE_EVENT(
guest_chain_stack_frame,
TP_PROTO(e2k_mem_crs_t *pcs_base, e2k_mem_crs_t *pcs_frame),
TP_ARGS(pcs_base, pcs_frame),
TP_STRUCT__entry(
__field( e2k_mem_crs_t *, pcs_base )
__field_struct( e2k_mem_crs_t, pcs_frame )
__field( pgprotval_t, dtlb_entry )
),
TP_fast_assign(
__entry->pcs_base = pcs_base;
__entry->pcs_frame = *pcs_frame;
__entry->dtlb_entry = HYPERVISOR_mmu_probe((e2k_addr_t)pcs_base,
KVM_MMU_PROBE_ENTRY);
),
TP_printk(" %px (dtlb 0x%016lx) : 0x%016llx 0x%016llx "
"0x%016llx 0x%016llx",
__entry->pcs_base, __entry->dtlb_entry,
__entry->pcs_frame.cr0_lo.CR0_lo_half,
__entry->pcs_frame.cr0_hi.CR0_hi_half,
__entry->pcs_frame.cr1_lo.CR1_lo_half,
__entry->pcs_frame.cr1_hi.CR1_hi_half)
);
TRACE_EVENT(
guest_va_tlb_state,
TP_PROTO(e2k_addr_t address),
TP_ARGS(address),
TP_STRUCT__entry(
__field( e2k_addr_t, address )
__field( tlb_tag_t, set0_tag )
__field_struct( pte_t, set0_entry )
__field( tlb_tag_t, set1_tag )
__field_struct( pte_t, set1_entry )
__field( tlb_tag_t, set2_tag )
__field_struct( pte_t, set2_entry )
__field( tlb_tag_t, set3_tag )
__field_struct( pte_t, set3_entry )
__field( tlb_tag_t, setH_tag )
__field_struct( pte_t, setH_entry )
__field( u64, dtlb_entry )
__field( unsigned long, mmu_pptb )
__field( unsigned long, mmu_pid )
),
TP_fast_assign(
__entry->address = address;
__entry->set0_tag = HYPERVISOR_get_tlb_set_tag(address, 0, false);
pte_val(__entry->set0_entry) =
HYPERVISOR_get_tlb_set_entry(address, 0, false);
__entry->set1_tag = HYPERVISOR_get_tlb_set_tag(address, 1, false);
pte_val(__entry->set1_entry) =
HYPERVISOR_get_tlb_set_entry(address, 1, false);
__entry->set2_tag = HYPERVISOR_get_tlb_set_tag(address, 2, false);
pte_val(__entry->set2_entry) =
HYPERVISOR_get_tlb_set_entry(address, 2, false);
__entry->set3_tag = HYPERVISOR_get_tlb_set_tag(address, 3, false);
pte_val(__entry->set3_entry) =
HYPERVISOR_get_tlb_set_entry(address, 3, false);
__entry->setH_tag = HYPERVISOR_get_tlb_set_tag(address, 3, true);
pte_val(__entry->setH_entry) =
HYPERVISOR_get_tlb_set_entry(address, 3, true);
__entry->dtlb_entry = HYPERVISOR_mmu_probe(address,
KVM_MMU_PROBE_ENTRY);
__entry->mmu_pptb = HYPERVISOR_get_host_mmu_pptb();
__entry->mmu_pid = HYPERVISOR_get_host_mmu_pid();
),
TP_printk(" 0x%016lx : dtlb 0x%016llx U_PPTB 0x%lx PID 0x%lx\n"
" TLB set #0 tag 0x%016lx entry 0x%016lx\n"
" TLB set #1 tag 0x%016lx entry 0x%016lx\n"
" TLB set #2 tag 0x%016lx entry 0x%016lx\n"
" TLB set #3 tag 0x%016lx entry 0x%016lx\n"
" TLB set #H tag 0x%016lx entry 0x%016lx",
__entry->address, __entry->dtlb_entry,
__entry->mmu_pptb, __entry->mmu_pid,
__entry->set0_tag, pte_val(__entry->set0_entry),
__entry->set1_tag, pte_val(__entry->set1_entry),
__entry->set2_tag, pte_val(__entry->set2_entry),
__entry->set3_tag, pte_val(__entry->set3_entry),
__entry->setH_tag, pte_val(__entry->setH_entry)
)
);
#endif /* _KVM_GUEST_TRACE_COPY_HW_STACKS_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH ../../arch/e2k/include/asm/kvm/guest
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE trace-hw-stacks
/* This part must be outside protection */
#include <trace/define_trace.h>

View File

@ -205,8 +205,10 @@ static inline unsigned long generic_hypercall6(unsigned long nr,
/* time (cycles) */
#define KVM_HCALL_GET_GUEST_RUNNING_TIME 6 /* get running time of guest */
/* VCPU at cycles */
#define KVM_HCALL_GET_VCPU_START_THREAD 8 /* register on host the guest */
/* kernel VCPU booting thread */
#define KVM_HCALL_GET_HOST_MMU_PPTB 7 /* get the host MMU PPTB register */
/* state */
#define KVM_HCALL_GET_TLB_SET_TAG 8 /* get tag of TLB line set */
#define KVM_HCALL_GET_TLB_SET_ENTRY 9 /* get entry of TLB line set */
#define KVM_HCALL_UPDATE_PSP_HI 10 /* write updated value to */
/* PSP_hi register */
#define KVM_HCALL_UPDATE_PCSP_HI 11 /* write updated value to */
@ -215,6 +217,8 @@ static inline unsigned long generic_hypercall6(unsigned long nr,
/* guest as task */
#define KVM_HCALL_UPDATE_WD_PSIZE 13 /* write updated psize field */
/* to the WD register */
#define KVM_HCALL_GET_HOST_MMU_PID 14 /* get the host MMU PID register */
/* state */
#define KVM_HCALL_MOVE_TAGGED_DATA 15 /* move quad value from to */
#define KVM_HCALL_UNFREEZE_TRAPS 16 /* unfreeze TIRs & trap */
/* cellar */
@ -303,9 +307,27 @@ HYPERVISOR_get_guest_running_time(void)
}
static inline unsigned long
HYPERVISOR_get_vcpu_start_thread(void)
HYPERVISOR_get_tlb_set_tag(e2k_addr_t va, int set_no, bool huge)
{
return light_hypercall0(KVM_HCALL_GET_VCPU_START_THREAD);
return light_hypercall3(KVM_HCALL_GET_TLB_SET_TAG, va, set_no, huge);
}
static inline unsigned long
HYPERVISOR_get_tlb_set_entry(e2k_addr_t va, int set_no, bool huge)
{
return light_hypercall3(KVM_HCALL_GET_TLB_SET_ENTRY, va, set_no, huge);
}
static inline unsigned long
HYPERVISOR_get_host_mmu_pptb(void)
{
return light_hypercall0(KVM_HCALL_GET_HOST_MMU_PPTB);
}
static inline unsigned long
HYPERVISOR_get_host_mmu_pid(void)
{
return light_hypercall0(KVM_HCALL_GET_HOST_MMU_PID);
}
static inline unsigned long
@ -611,6 +633,8 @@ HYPERVISOR_switch_to_expanded_guest_chain_stack(long delta_size,
/* in page tables and flush tlb */
#define KVM_HCALL_SYNC_ADDR_RANGE 135 /* sync ptes in page */
/* tables without flushing tlb */
#define KVM_HCALL_GET_SPT_TRANSLATION 137 /* get full translation of guest */
/* address at shadow PTs */
#define KVM_HCALL_RECOVERY_FAULTED_TAGGED_STORE 141
/* recovery faulted store */
/* tagged value operations */
@ -1252,10 +1276,11 @@ HYPERVISOR_guest_csd_lock_try_wait(void *lock)
}
static inline unsigned long
HYPERVISOR_pt_atomic_update(unsigned long gpa, void __user *old_gpte,
unsigned atomic_op, unsigned long prot_mask)
HYPERVISOR_pt_atomic_update(int gmmid_nr, unsigned long gpa,
void __user *old_gpte,
unsigned atomic_op, unsigned long prot_mask)
{
return generic_hypercall4(KVM_HCALL_PT_ATOMIC_UPDATE,
return generic_hypercall5(KVM_HCALL_PT_ATOMIC_UPDATE, (int)gmmid_nr,
gpa, (unsigned long)old_gpte, atomic_op, prot_mask);
}
@ -1400,6 +1425,28 @@ HYPERVISOR_host_printk(char *msg, int size)
return generic_hypercall2(KVM_HCALL_HOST_PRINTK, (unsigned long)msg,
(unsigned long)size);
}
/*
* The guest virtual address info of full track of translation
* at the host shadow PTs
*/
typedef struct mmu_spt_trans {
int pt_levels; /* the last significant level of PT */
unsigned long pgd;
unsigned long pud;
unsigned long pmd;
unsigned long pte;
} mmu_spt_trans_t;
static inline unsigned long
HYPERVISOR_get_spt_translation(e2k_addr_t address,
mmu_spt_trans_t __user *trans_info)
{
return generic_hypercall2(KVM_HCALL_GET_SPT_TRANSLATION, address,
(unsigned long)trans_info);
}
static inline unsigned long
HYPERVISOR_print_guest_kernel_ptes(e2k_addr_t address)
{

View File

@ -124,6 +124,16 @@ static inline void reset_spt_gpa_fault(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.spt_gpa_fault = false;
}
static inline unsigned long get_mmu_u_pptb_reg(void)
{
return NATIVE_READ_MMU_U_PPTB_REG();
}
static inline unsigned long get_mmu_pid_reg(void)
{
return NATIVE_READ_MMU_PID_REG();
}
static inline hpa_t
kvm_get_gp_phys_root(struct kvm_vcpu *vcpu)
{
@ -286,6 +296,11 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
return (struct kvm_mmu_page *)page_private(page);
}
extern void kvm_get_spt_translation(struct kvm_vcpu *vcpu, e2k_addr_t address,
pgdval_t *pgd, pudval_t *pud, pmdval_t *pmd,
pteval_t *pte, int *pt_level);
extern unsigned long kvm_get_gva_to_hva(struct kvm_vcpu *vcpu, gva_t gva);
static inline gpa_t kvm_hva_to_gpa(struct kvm *kvm, unsigned long hva)
{
struct kvm_memslots *slots;

View File

@ -31,8 +31,8 @@ struct kvm_page_track_notifier_node {
* @new: the data was written to the address.
* @bytes: the written length.
*/
void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
int bytes);
void (*track_write)(struct kvm_vcpu *vcpu, struct gmm_struct *gmm,
gpa_t gpa, const u8 *new, int bytes);
/*
* It is called when memory slot is being moved or removed
* users can drop write-protection for the pages in that memory slot
@ -68,8 +68,8 @@ kvm_page_track_register_notifier(struct kvm *kvm,
void
kvm_page_track_unregister_notifier(struct kvm *kvm,
struct kvm_page_track_notifier_node *n);
void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
int bytes);
void kvm_page_track_write(struct kvm_vcpu *vcpu, struct gmm_struct *gmm,
gpa_t gpa, const u8 *new, int bytes);
void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
#else /* ! CONFIG_KVM_HV_MMU */
static inline void kvm_page_track_init(struct kvm *kvm)

View File

@ -15,16 +15,6 @@
#include <asm/kvm/page.h>
#include <asm/kvm/switch.h>
#undef DEBUG_KVM_GUEST_STACKS_MODE
#undef DebugGUST
#define DEBUG_KVM_GUEST_STACKS_MODE 0 /* guest user stacks */
/* copy debug */
#define DebugGUST(fmt, args...) \
({ \
if (DEBUG_KVM_GUEST_STACKS_MODE) \
pr_info("%s(): " fmt, __func__, ##args); \
})
extern void kvm_clear_host_thread_info(thread_info_t *ti);
extern gthread_info_t *create_guest_start_thread_info(struct kvm_vcpu *vcpu);
extern int kvm_resume_vm_thread(void);
@ -233,416 +223,6 @@ void go2guest(long fn, bool priv_guest);
INIT_HOST_VCPU_STATE_GREG_COPY(__ti, vcpu); \
})
static inline void
prepare_pv_vcpu_inject_stacks(struct kvm_vcpu *vcpu, pt_regs_t *regs)
{
e2k_stacks_t *stacks, *g_stacks;
gthread_info_t *gti = pv_vcpu_get_gti(vcpu);
if (regs->g_stacks_valid) {
/* already prepared */
return;
}
/* all stacks at empty state, because of guest user recursion */
/* of trap/system calls can not be */
g_stacks = &regs->g_stacks;
g_stacks->usd_lo = gti->g_usd_lo;
g_stacks->usd_hi = gti->g_usd_hi;
g_stacks->top = gti->g_sbr.SBR_base;
g_stacks->psp_lo = gti->g_psp_lo;
g_stacks->psp_hi = gti->g_psp_hi;
g_stacks->pcsp_lo = gti->g_pcsp_lo;
g_stacks->pcsp_hi = gti->g_pcsp_hi;
/* pshtp & pcshtp from guest user stack real state upon trap/syscall */
stacks = &regs->stacks;
g_stacks->pshtp = stacks->pshtp;
g_stacks->pcshtp = stacks->pcshtp;
regs->g_stacks_valid = true;
regs->g_stacks_active = false;
regs->need_inject = false;
}
#undef EMULATE_EMPTY_CHAIN_STACK /* only to debug */
#ifdef EMULATE_EMPTY_CHAIN_STACK
static __always_inline void
pv_vcpu_emulate_empty_chain_staks(struct kvm_vcpu *vcpu, pt_regs_t *regs,
e2k_stacks_t *stacks, bool guest_user)
{
e2k_pcshtp_t pcshtp;
unsigned long flags;
e2k_pcsp_lo_t g_pcsp_lo, k_pcsp_lo;
e2k_pcsp_hi_t g_pcsp_hi, k_pcsp_hi;
e2k_mem_crs_t __user *g_cframe;
e2k_mem_crs_t *k_crs;
int ret;
pcshtp = stacks->pcshtp;
if (!(guest_user && pcshtp <= 0x40))
return;
g_pcsp_lo = regs->stacks.pcsp_lo;
g_pcsp_hi = regs->stacks.pcsp_hi;
raw_all_irq_save(flags);
NATIVE_FLUSHC;
k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
k_pcsp_lo = NATIVE_NV_READ_PCSP_LO_REG();
BUG_ON(AS(k_pcsp_hi).ind != pcshtp);
k_crs = (e2k_mem_crs_t *) AS(k_pcsp_lo).base;
g_cframe = (e2k_mem_crs_t __user *) (AS(g_pcsp_lo).base +
AS(g_pcsp_hi).ind - pcshtp);
ret = user_hw_stack_frames_copy(g_cframe, k_crs, pcshtp, regs,
k_pcsp_hi.PCSP_hi_ind, true);
if (ret) {
pr_err("%s(): copy to user stack failed\n", __func__);
BUG_ON(true);
}
k_pcsp_hi.PCSP_hi_ind -= pcshtp;
pcshtp = 0;
regs->stacks.pcshtp = pcshtp;
stacks->pcshtp = pcshtp;
NATIVE_NV_NOIRQ_WRITE_PCSP_HI_REG(k_pcsp_hi);
raw_all_irq_restore(flags);
}
#else /* !EMULATE_EMPTY_CHAIN_STACK */
static __always_inline void
pv_vcpu_emulate_empty_chain_staks(struct kvm_vcpu *vcpu, pt_regs_t *regs,
e2k_stacks_t *stacks, bool guest_user)
{
}
#endif /* EMULATE_EMPTY_CHAIN_STACK */
/**
* pv_vcpu_user_hw_stacks_copy - check size of user hardware stacks that have
* been SPILLed to kernel back to guest space
* @regs - saved guest user stack registers
* @cur_window_q - size of current window in procedure stack
*
* All guest user's stacks part were already copied to guest kernel stacks,
* so it need only check that it was full size and nothing to copy here
*/
static __always_inline int
pv_vcpu_user_hw_stacks_copy(pt_regs_t *regs, e2k_stacks_t *stacks,
u64 cur_window_q, bool guest_user)
{
e2k_psp_lo_t g_psp_lo = stacks->psp_lo,
k_psp_lo = current_thread_info()->k_psp_lo;
e2k_psp_hi_t g_psp_hi = stacks->psp_hi;
e2k_pcsp_lo_t g_pcsp_lo = stacks->pcsp_lo,
k_pcsp_lo = current_thread_info()->k_pcsp_lo;
e2k_pcsp_hi_t g_pcsp_hi = stacks->pcsp_hi;
s64 g_pshtp_size, g_pcshtp_size, ps_copy_size, pcs_copy_size;
int ret;
DebugUST("guest kernel chain state: base 0x%llx ind 0x%x size 0x%x\n",
g_pcsp_lo.PCSP_lo_base, g_pcsp_hi.PCSP_hi_ind,
g_pcsp_hi.PCSP_hi_size);
DebugUST("guest kernel proc state: base 0x%llx ind 0x%x size 0x%x\n",
g_psp_lo.PSP_lo_base, g_psp_hi.PSP_hi_ind,
g_psp_hi.PSP_hi_size);
g_pshtp_size = GET_PSHTP_MEM_INDEX(stacks->pshtp);
g_pcshtp_size = PCSHTP_SIGN_EXTEND(stacks->pcshtp);
DebugUST("guest kernel chain stack PCSHTP 0x%llx, "
"proc stack PSHTP 0x%llx cur window 0x%llx\n",
g_pcshtp_size, g_pshtp_size, cur_window_q);
/*
* FIXME: the current implementation of the guest user signal handler
* injection uses direct copying to guest hardware stacks.
* It is bad decision, needs to be corrected
KVM_BUG_ON(is_paging(current_thread_info()->vcpu) &&
(g_psp_lo.PSP_lo_base < GUEST_TASK_SIZE ||
g_pcsp_lo.PCSP_lo_base < GUEST_TASK_SIZE));
*/
/*
* Calculate size of user's part to copy from kernel stacks
* into guest kernel stacks
*/
pcs_copy_size = get_pcs_copy_size(g_pcshtp_size);
ps_copy_size = get_ps_copy_size(cur_window_q, g_pshtp_size);
/* Make sure there is enough space in CF for the FILL */
BUG_ON((E2K_MAXCR_q - 4) * 16 < E2K_CF_MAX_FILL);
DebugUST("to copy chain stack 0x%llx, proc stack 0x%llx\n",
pcs_copy_size, ps_copy_size);
if (likely(pcs_copy_size <= 0 && ps_copy_size <= 0))
return 0;
if (unlikely(pcs_copy_size > 0)) {
e2k_pcsp_hi_t k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
void __user *dst;
void *src;
/* Since SPILL'ed guest user data will be copyed to guest */
/* kernel stacks then cannot be any overflow of user's */
/* hardware stack. */
if (unlikely(AS(g_pcsp_hi).ind > AS(g_pcsp_hi).size)) {
pr_err("%s(): guest kernel chain stack overflow "
"(out of memory?): ind 0x%x size 0x%x\n",
__func__, g_pcsp_hi.PCSP_hi_ind,
g_pcsp_hi.PCSP_hi_size);
KVM_BUG_ON(true);
}
dst = (void __user *)(g_pcsp_lo.PCSP_lo_base +
g_pcsp_hi.PCSP_hi_ind);
if (!guest_user) {
/* stack index has been incremented on PCSHTP */
dst -= g_pcshtp_size;
}
src = (void *)k_pcsp_lo.PCSP_lo_base;
ret = user_hw_stack_frames_copy(dst, src, pcs_copy_size, regs,
k_pcsp_hi.PCSP_hi_ind, true);
if (ret)
return ret;
if (guest_user) {
g_pcsp_hi.PCSP_hi_ind += pcs_copy_size;
stacks->pcsp_hi = g_pcsp_hi;
DebugGUST("guest user chain stack frames copied from "
"host %px to guest kernel from %px size 0x%llx "
"PCSP.ind 0x%x\n",
src, dst, pcs_copy_size, g_pcsp_hi.PCSP_hi_ind);
}
}
if (unlikely(ps_copy_size > 0)) {
e2k_psp_hi_t k_psp_hi = NATIVE_NV_READ_PSP_HI_REG();
void __user *dst;
void *src;
/* Since SPILL'ed guest user data will be copyed to guest */
/* kernel stacks then cannot be any overflow of user's */
/* hardware stack. */
if (unlikely(AS(g_psp_hi).ind > AS(g_psp_hi).size)) {
pr_err("%s(): guest kernel proc stack overflow "
"(out of memory?): ind 0x%x size 0x%x\n",
__func__, g_psp_hi.PSP_hi_ind,
g_psp_hi.PSP_hi_size);
KVM_BUG_ON(true);
}
dst = (void __user *)(g_psp_lo.PSP_lo_base +
g_psp_hi.PSP_hi_ind);
if (!guest_user) {
/* stack index has been incremented on PSHTP */
dst -= g_pshtp_size;
}
src = (void *)k_psp_lo.PSP_lo_base;
ret = user_hw_stack_frames_copy(dst, src, ps_copy_size, regs,
k_psp_hi.PSP_hi_ind, false);
if (ret)
return ret;
if (guest_user) {
g_psp_hi.PSP_hi_ind += ps_copy_size;
stacks->psp_hi = g_psp_hi;
DebugGUST("guest user proc stack frames copied from "
"host %px to guest kernel from %px size 0x%llx "
"PSP.ind 0x%x\n",
src, dst, ps_copy_size, g_psp_hi.PSP_hi_ind);
}
}
return 0;
}
/**
* pv_vcpu_user_hw_stacks_prepare - prepare guest user hardware stacks
that have been SPILLed to kernel back
to guest user space
* @regs - saved guest user stack registers
* @cur_window_q - size of current window in procedure stack
* @syscall - true if called upon direct system call exit (no signal handlers)
*
* This does two things:
*
* 1) It is possible that upon kernel entry pcshtp == 0 in some cases:
* - user signal handler had pcshtp==0x20 before return to sigreturn()
* - user context had pcshtp==0x20 before return to makecontext_trampoline()
* - chain stack underflow happened
* So it is possible in sigreturn() and traps, but not in system calls.
* If we are using the trick with return to FILL user hardware stacks than
* we must have frame in chain stack to return to. So in this case kernel's
* chain stack is moved up by one frame (0x20 bytes).
* We also fill the new frame with actual user data and update stacks->pcshtp,
* this is needed to keep the coherent state where saved stacks->pcshtp values
* shows how much data from user space has been spilled to kernel space.
*
* 2) It is not possible to always FILL all of user data that have been
* SPILLed to kernel stacks. So we manually copy the leftovers that can
* not be FILLed to user space.
* This copy does not update stacks->pshtp and stacks->pcshtp. Main reason
* is signals: if a signal arrives after copying then it must see a coherent
* state where saved stacks->pshtp and stacks->pcshtp values show how much
* data from user space has been spilled to kernel space.
*/
static __always_inline void
pv_vcpu_user_hw_stacks_prepare(struct kvm_vcpu *vcpu, pt_regs_t *regs,
u64 cur_window_q, enum restore_caller from, int syscall)
{
e2k_stacks_t *stacks;
e2k_pcshtp_t pcshtp;
bool guest_user;
bool paging = is_paging(vcpu);
int ret;
if (likely(paging)) {
guest_user = !!(syscall || !pv_vcpu_trap_on_guest_kernel(regs));
} else {
guest_user = false;
}
if (guest_user) {
if (from & FROM_PV_VCPU_MODE) {
/* all preparation has been made */
/* by host & guest handler */
return;
}
/* trap on/syscall from guest user, so regs keeps user */
/* registers state and it need use guest kernel stacks */
/* in empty state to handle this trap/syscall */
if (!regs->g_stacks_valid) {
prepare_pv_vcpu_inject_stacks(vcpu, regs);
}
stacks = &regs->g_stacks;
} else {
/* trap on guest kernel, so regs already points to guest */
/* kernel stacks and trap will be handled by host */
/* same as other user's processes traps */
stacks = &regs->stacks;
}
/* only to debug on simulator : pcshtp == 0 */
pv_vcpu_emulate_empty_chain_staks(vcpu, regs, stacks, guest_user);
pcshtp = stacks->pcshtp;
DebugUST("guest kernel chain stack state: base 0x%llx ind 0x%x "
"size 0x%x\n",
stacks->pcsp_lo.PCSP_lo_base,
stacks->pcsp_hi.PCSP_hi_ind,
stacks->pcsp_hi.PCSP_hi_size);
DebugUST("host kernel chain stack state: base 0x%llx ind 0x%x "
"size 0x%x\n",
NATIVE_NV_READ_PCSP_LO_REG().PCSP_lo_base,
NATIVE_NV_READ_PCSP_HI_REG().PCSP_hi_ind,
NATIVE_NV_READ_PCSP_HI_REG().PCSP_hi_size);
DebugUST("guest kernel chain stack size to fill PCSHTP 0x%x\n",
pcshtp);
/*
* 1) Make sure there is free space in kernel chain stack to return to
*/
if (!syscall && pcshtp == 0 && !guest_user) {
unsigned long flags;
e2k_pcsp_lo_t g_pcsp_lo = stacks->pcsp_lo,
k_pcsp_lo = current_thread_info()->k_pcsp_lo;
e2k_pcsp_hi_t g_pcsp_hi = stacks->pcsp_hi, k_pcsp_hi;
e2k_mem_crs_t __user *g_cframe;
e2k_mem_crs_t *k_crs;
int ret = -EINVAL;
raw_all_irq_save(flags);
NATIVE_FLUSHC;
k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
BUG_ON(AS(k_pcsp_hi).ind);
AS(k_pcsp_hi).ind += SZ_OF_CR;
NATIVE_NV_NOIRQ_WRITE_PCSP_HI_REG(k_pcsp_hi);
k_crs = (e2k_mem_crs_t *) AS(k_pcsp_lo).base;
g_cframe = (e2k_mem_crs_t __user *) (AS(g_pcsp_lo).base +
AS(g_pcsp_hi).ind);
if ((u64) g_cframe > (u64) AS(g_pcsp_lo).base) {
ret = __copy_user_to_current_hw_stack(k_crs,
g_cframe - 1, sizeof(*k_crs), regs, true);
}
raw_all_irq_restore(flags);
/* Can happen if application returns until runs out of
* chain stack or there is no free memory for stacks.
* There is no user stack to return to - die. */
if (ret) {
E2K_LMS_HALT_OK;
pr_err("%s(): SIGKILL. %s\n",
__func__,
(ret == -EINVAL) ?
"tried to return to kernel"
:
"ran into Out-of-Memory on user stacks");
force_sig(SIGKILL);
return;
}
DebugUST("copy guest user chain frame from %px to kernel "
"bottom from %px\n",
g_cframe - 1, k_crs);
if (AS(g_pcsp_hi).ind < SZ_OF_CR) {
pr_err("%s(): guest kernel chain stack underflow\n",
__func__);
KVM_BUG_ON(true);
}
pcshtp = SZ_OF_CR;
stacks->pcshtp = pcshtp;
DebugUST("guest kernel chain stack to FILL PCSHTP "
"set to 0x%x\n",
stacks->pcshtp);
} else if (!syscall && pcshtp == 0 && guest_user) {
e2k_pcsp_hi_t k_pcsp_hi;
unsigned long flags;
/* set flag for unconditional injection to do not copy */
/* from guest user space */
regs->need_inject = true;
/* reserve one bottom frames for trampoline */
/* the guest handler replaces guest user trapped frame */
raw_all_irq_save(flags);
NATIVE_FLUSHC;
k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
BUG_ON(k_pcsp_hi.PCSP_hi_ind);
k_pcsp_hi.PCSP_hi_ind += 1 * SZ_OF_CR;
NATIVE_NV_NOIRQ_WRITE_PCSP_HI_REG(k_pcsp_hi);
raw_all_irq_restore(flags);
}
/*
* 2) Copy user data that cannot be FILLed
*/
ret = pv_vcpu_user_hw_stacks_copy(regs, stacks, cur_window_q,
guest_user);
if (unlikely(ret))
do_exit(SIGKILL);
}
/* Same as for native kernel without virtualization support */
static __always_inline int
user_hw_stacks_copy(struct e2k_stacks *stacks,
pt_regs_t *regs, u64 cur_window_q, bool copy_full)
{
return native_user_hw_stacks_copy(stacks, regs, cur_window_q, copy_full);
}
static __always_inline void
host_user_hw_stacks_prepare(struct e2k_stacks *stacks, pt_regs_t *regs,
u64 cur_window_q, enum restore_caller from, int syscall)
{
struct kvm_vcpu *vcpu;
if (likely(!kvm_test_intc_emul_flag(regs))) {
/* trap on/syscall from host user processes */
return native_user_hw_stacks_prepare(stacks, regs,
cur_window_q, from, syscall);
}
vcpu = current_thread_info()->vcpu;
KVM_BUG_ON(vcpu == NULL);
pv_vcpu_user_hw_stacks_prepare(vcpu, regs, cur_window_q, from, syscall);
}
static __always_inline void
host_exit_to_usermode_loop(struct pt_regs *regs, bool syscall, bool has_signal)
{

View File

@ -101,8 +101,7 @@ native_guest_syscall_enter(struct pt_regs *regs)
#ifdef CONFIG_VIRTUALIZATION
/*
* For interceptions just switch actual registers with saved values
* in 'sw_ctxt'.
* For interceptions just switch actual registers with saved values in 'sw_ctxt'.
*
* For hypercalls:
* 1) Enter hypercall.
@ -128,8 +127,7 @@ static inline void kvm_switch_stack_regs(struct kvm_sw_cpu_context *sw_ctxt,
AW(sbr) = NATIVE_NV_READ_SBR_REG_VALUE();
}
NATIVE_NV_WRITE_USBR_USD_REG(sw_ctxt->sbr,
sw_ctxt->usd_hi, sw_ctxt->usd_lo);
NATIVE_NV_WRITE_USBR_USD_REG(sw_ctxt->sbr, sw_ctxt->usd_hi, sw_ctxt->usd_lo);
if (ctxt_save) {
KVM_BUG_ON(sw_ctxt->saved.valid);
@ -225,14 +223,27 @@ static inline void kvm_switch_mmu_regs(struct kvm_sw_cpu_context *sw_ctxt,
static inline void kvm_switch_to_guest_mmu_pid(struct kvm_vcpu *vcpu)
{
mm_context_t *gmm_context;
unsigned long mask;
gmm_context = pv_vcpu_get_gmm_context(vcpu);
reload_mmu_pid(gmm_context, smp_processor_id());
mask = get_mmu_pid(gmm_context, smp_processor_id());
reload_context_mask(mask);
}
static inline unsigned long kvm_get_guest_mmu_pid(struct kvm_vcpu *vcpu)
{
mm_context_t *gmm_context;
gmm_context = pv_vcpu_get_gmm_context(vcpu);
return gmm_context->cpumsk[smp_processor_id()];
}
static inline void kvm_switch_to_host_mmu_pid(struct mm_struct *mm)
{
reload_context(mm, smp_processor_id());
unsigned long mask;
mask = get_mmu_context(mm, smp_processor_id());
reload_context_mask(mask);
}
static inline void kvm_switch_debug_regs(struct kvm_sw_cpu_context *sw_ctxt,
@ -479,6 +490,8 @@ static inline void host_guest_enter(struct thread_info *ti,
if (flags & DEBUG_REGS_SWITCH)
kvm_switch_debug_regs(sw_ctxt, true);
KVM_BUG_ON(vcpu->is_hv && !NATIVE_READ_MMU_US_CL_D());
/* Switch data stack after all function calls */
if (flags & USD_CONTEXT_SWITCH) {
if (!(flags & FROM_HYPERCALL_SWITCH) || !vcpu->is_hv) {
@ -506,6 +519,8 @@ static inline void host_guest_enter_light(struct thread_info *ti,
kvm_switch_cu_regs(sw_ctxt);
KVM_BUG_ON(vcpu->is_hv && !NATIVE_READ_MMU_US_CL_D());
/* Switch data stack after all function calls */
if (!from_sdisp) {
if (!vcpu->is_hv) {
@ -513,6 +528,7 @@ static inline void host_guest_enter_light(struct thread_info *ti,
} else {
/* restore saved source pointers of host stack */
kvm_switch_stack_regs(sw_ctxt, false, true);
kvm_switch_clw_regs(sw_ctxt, true);
}
}
}
@ -540,6 +556,8 @@ static inline void host_guest_exit(struct thread_info *ti,
}
}
KVM_BUG_ON(vcpu->is_hv && !NATIVE_READ_MMU_US_CL_D());
if (flags & FROM_HYPERCALL_SWITCH) {
/*
* Hypercalls - both hardware and software virtualization
@ -649,6 +667,8 @@ static inline void host_guest_exit_light(struct thread_info *ti,
KVM_BUG_ON(sw_ctxt->in_hypercall);
sw_ctxt->in_hypercall = true;
KVM_BUG_ON(vcpu->is_hv && !NATIVE_READ_MMU_US_CL_D());
HOST_SAVE_KERNEL_GREGS_AS_LIGHT(ti);
ONLY_SET_KERNEL_GREGS(ti);

View File

@ -0,0 +1,23 @@
#ifndef _E2K_KVM_TRACE_DEFS_H_
#define _E2K_KVM_TRACE_DEFS_H_
#include <linux/types.h>
#include <asm/mmu_types.h>
#include <asm/pgtable_def.h>
#include <asm/kvm/mmu.h>
static inline void
trace_kvm_get_va_translation(struct kvm_vcpu *vcpu, e2k_addr_t address,
pgdval_t *pgd, pudval_t *pud, pmdval_t *pmd, pteval_t *pte, int *pt_level)
{
kvm_get_spt_translation(vcpu, address, pgd, pud, pmd, pte, pt_level);
}
static inline unsigned long
trace_kvm_get_gva_to_hva(struct kvm_vcpu *vcpu, gva_t gva)
{
return kvm_get_gva_to_hva(vcpu, gva);
}
#endif /* _E2K_KVM_TRACE_DEFS_H_ */

View File

@ -0,0 +1,367 @@
#if !defined(_KVM_TRACE_COPY_HW_STACKS_H) || defined(TRACE_HEADER_MULTI_READ)
#define _KVM_TRACE_COPY_HW_STACKS_H
#include <linux/tracepoint.h>
#include <linux/hugetlb.h>
#include <asm/trace-defs.h>
#include <asm/trace_pgtable-v2.h>
#include <asm/trace_pgtable-v6.h>
#include <asm/pgtable_def.h>
#include <asm/kvm/trace-defs.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM host
#ifdef CONFIG_KVM_HOST_MODE
TRACE_EVENT(
host_copy_hw_stack,
TP_PROTO(void *dst, void *src, unsigned long size, bool is_chain),
TP_ARGS(dst, src, size, is_chain),
TP_STRUCT__entry(
__field_struct(struct kvm_vcpu *, vcpu )
__field( void *, dst )
__field( void *, src )
__field( u64, size )
__field( bool, is_chain )
__field( pgdval_t, dst_pgd )
__field( pudval_t, dst_pud )
__field( pmdval_t, dst_pmd )
__field( pteval_t, dst_pte )
__field( int, dst_pt_level )
__field( pgdval_t, hva_pgd )
__field( pudval_t, hva_pud )
__field( pmdval_t, hva_pmd )
__field( pteval_t, hva_pte )
__field( int, hva_pt_level )
__field( unsigned long, hva )
),
TP_fast_assign(
unsigned long hva;
__entry->vcpu = current_thread_info()->vcpu;
__entry->dst = dst;
__entry->src = src;
__entry->size = size;
__entry->is_chain = is_chain;
trace_kvm_get_va_translation(__entry->vcpu, (e2k_addr_t)dst,
&__entry->dst_pgd, &__entry->dst_pud, &__entry->dst_pmd,
&__entry->dst_pte, &__entry->dst_pt_level);
hva = trace_kvm_get_gva_to_hva(__entry->vcpu, (gva_t)dst);
__entry->hva = hva;
trace_get_va_translation(current->mm, (e2k_addr_t)hva,
&__entry->hva_pgd, &__entry->hva_pud, &__entry->hva_pmd,
&__entry->hva_pte, &__entry->hva_pt_level);
),
TP_printk("VCPU #%d copy %s stack kernel guest <- kernel host: dst %px "
"src %px size %llx\n"
" kernel guest dst GVA %px : pgd 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pud 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pmd 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pte 0x%016lx : %s\n"
" Access mode: %s%s\n"
" kernel host dst HVA %px : pgd 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pud 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pmd 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pte 0x%016lx : %s\n"
" Access mode: %s%s",
__entry->vcpu->vcpu_id,
(__entry->is_chain) ? "chain" : "procedure",
__entry->dst,
__entry->src,
__entry->size,
__entry->dst,
(__entry->dst_pt_level <= E2K_PGD_LEVEL_NUM) ?
__entry->dst_pgd : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->dst_pgd,
__entry->dst_pt_level <= E2K_PGD_LEVEL_NUM),
(__entry->dst_pt_level <= E2K_PUD_LEVEL_NUM) ?
__entry->dst_pud : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->dst_pud,
__entry->dst_pt_level <= E2K_PUD_LEVEL_NUM),
(__entry->dst_pt_level <= E2K_PMD_LEVEL_NUM) ?
__entry->dst_pmd : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->dst_pmd,
__entry->dst_pt_level <= E2K_PMD_LEVEL_NUM),
(__entry->dst_pt_level <= E2K_PTE_LEVEL_NUM) ?
__entry->dst_pte : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->dst_pte,
__entry->dst_pt_level <= E2K_PTE_LEVEL_NUM),
(void *)__entry->hva,
(__entry->hva_pt_level <= E2K_PGD_LEVEL_NUM) ?
__entry->hva_pgd : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->hva_pgd,
__entry->hva_pt_level <= E2K_PGD_LEVEL_NUM),
(__entry->hva_pt_level <= E2K_PUD_LEVEL_NUM) ?
__entry->hva_pud : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->hva_pud,
__entry->hva_pt_level <= E2K_PUD_LEVEL_NUM),
(__entry->hva_pt_level <= E2K_PMD_LEVEL_NUM) ?
__entry->hva_pmd : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->hva_pmd,
__entry->hva_pt_level <= E2K_PMD_LEVEL_NUM),
(__entry->hva_pt_level <= E2K_PTE_LEVEL_NUM) ?
__entry->hva_pte : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->hva_pte,
__entry->hva_pt_level <= E2K_PTE_LEVEL_NUM)
)
);
TRACE_EVENT(
host_proc_stack_frame,
TP_PROTO(kernel_mem_ps_t *ps_base, kernel_mem_ps_t *ps_frame),
TP_ARGS(ps_base, ps_frame),
TP_STRUCT__entry(
__field( kernel_mem_ps_t *, ps_base )
__field_struct( kernel_mem_ps_t, ps_frame )
__field( pgprotval_t, dtlb_entry )
),
TP_fast_assign(
__entry->ps_base = ps_base;
__entry->ps_frame = *ps_frame;
__entry->dtlb_entry = get_MMU_DTLB_ENTRY((e2k_addr_t)ps_base);
),
TP_printk(" %px (dtlb 0x%016lx) : 0x%016lx 0x%016lx",
__entry->ps_base, __entry->dtlb_entry,
__entry->ps_frame.word_lo, __entry->ps_frame.word_hi)
);
TRACE_EVENT(
host_chain_stack_frame,
TP_PROTO(e2k_mem_crs_t *pcs_base, e2k_mem_crs_t *pcs_frame),
TP_ARGS(pcs_base, pcs_frame),
TP_STRUCT__entry(
__field( e2k_mem_crs_t *, pcs_base )
__field_struct( e2k_mem_crs_t, pcs_frame )
__field( pgprotval_t, dtlb_entry )
),
TP_fast_assign(
__entry->pcs_base = pcs_base;
__entry->pcs_frame = *pcs_frame;
__entry->dtlb_entry = get_MMU_DTLB_ENTRY((e2k_addr_t)pcs_base);
),
TP_printk(" %px (dtlb 0x%016lx) : 0x%016llx 0x%016llx "
"0x%016llx 0x%016llx",
__entry->pcs_base, __entry->dtlb_entry,
__entry->pcs_frame.cr0_lo.CR0_lo_half,
__entry->pcs_frame.cr0_hi.CR0_hi_half,
__entry->pcs_frame.cr1_lo.CR1_lo_half,
__entry->pcs_frame.cr1_hi.CR1_hi_half)
);
TRACE_EVENT(
host_copy_hva_area,
TP_PROTO(void *dst, void *src, unsigned long size),
TP_ARGS(dst, src, size),
TP_STRUCT__entry(
__field( void *, dst )
__field( void *, src )
__field( u64, size )
__field( pgdval_t, dst_pgd )
__field( pudval_t, dst_pud )
__field( pmdval_t, dst_pmd )
__field( pteval_t, dst_pte )
__field( int, dst_pt_level )
__field( pgdval_t, src_pgd )
__field( pudval_t, src_pud )
__field( pmdval_t, src_pmd )
__field( pteval_t, src_pte )
__field( int, src_pt_level )
),
TP_fast_assign(
__entry->dst = dst;
__entry->src = src;
__entry->size = size;
trace_get_va_translation(current->mm, (e2k_addr_t)dst,
&__entry->dst_pgd, &__entry->dst_pud, &__entry->dst_pmd,
&__entry->dst_pte, &__entry->dst_pt_level);
trace_get_va_translation(current->mm, (e2k_addr_t)src,
&__entry->src_pgd, &__entry->src_pud, &__entry->src_pmd,
&__entry->src_pte, &__entry->src_pt_level);
),
TP_printk("copy area user guest <- kernel guest: dst %px "
"src %px size %llx\n"
" kernel guest dst HVA %px : pgd 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pud 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pmd 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pte 0x%016lx : %s\n"
" Access mode: %s%s\n"
" kernel guest dst HVA %px : pgd 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pud 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pmd 0x%016lx : %s\n"
" Access mode: %s%s\n"
" pte 0x%016lx : %s\n"
" Access mode: %s%s",
__entry->dst,
__entry->src,
__entry->size,
__entry->dst,
(__entry->dst_pt_level <= E2K_PGD_LEVEL_NUM) ?
__entry->dst_pgd : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->dst_pgd,
__entry->dst_pt_level <= E2K_PGD_LEVEL_NUM),
(__entry->dst_pt_level <= E2K_PUD_LEVEL_NUM) ?
__entry->dst_pud : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->dst_pud,
__entry->dst_pt_level <= E2K_PUD_LEVEL_NUM),
(__entry->dst_pt_level <= E2K_PMD_LEVEL_NUM) ?
__entry->dst_pmd : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->dst_pmd,
__entry->dst_pt_level <= E2K_PMD_LEVEL_NUM),
(__entry->dst_pt_level <= E2K_PTE_LEVEL_NUM) ?
__entry->dst_pte : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->dst_pte,
__entry->dst_pt_level <= E2K_PTE_LEVEL_NUM),
__entry->src,
(__entry->src_pt_level <= E2K_PGD_LEVEL_NUM) ?
__entry->src_pgd : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->src_pgd,
__entry->src_pt_level <= E2K_PGD_LEVEL_NUM),
(__entry->src_pt_level <= E2K_PUD_LEVEL_NUM) ?
__entry->src_pud : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->src_pud,
__entry->src_pt_level <= E2K_PUD_LEVEL_NUM),
(__entry->src_pt_level <= E2K_PMD_LEVEL_NUM) ?
__entry->src_pmd : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->src_pmd,
__entry->src_pt_level <= E2K_PMD_LEVEL_NUM),
(__entry->src_pt_level <= E2K_PTE_LEVEL_NUM) ?
__entry->src_pte : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->src_pte,
__entry->src_pt_level <= E2K_PTE_LEVEL_NUM)
)
);
TRACE_EVENT(
host_hva_area_line,
TP_PROTO(u64 *hva_base, u64 size),
TP_ARGS(hva_base, size),
TP_STRUCT__entry(
__field( u64 *, hva_base )
__array( u64, hva_line, 4 )
__field( u64, size )
__field( pgprotval_t, dtlb_entry )
),
TP_fast_assign(
__entry->hva_base = hva_base;
__entry->hva_line[0] =
(size >= 1 * sizeof(u64)) ? hva_base[0] : -1;
__entry->hva_line[1] =
(size >= 2 * sizeof(u64)) ? hva_base[1] : -1;
__entry->hva_line[2] =
(size >= 3 * sizeof(u64)) ? hva_base[2] : -1;
__entry->hva_line[3] =
(size >= 4 * sizeof(u64)) ? hva_base[3] : -1;
__entry->size = size;
__entry->dtlb_entry = get_MMU_DTLB_ENTRY((e2k_addr_t)hva_base);
),
TP_printk(" %px (dtlb 0x%016lx) : 0x%016llx 0x%016llx "
"0x%016llx 0x%016llx",
__entry->hva_base, __entry->dtlb_entry,
__entry->hva_line[0],
__entry->hva_line[1],
__entry->hva_line[2],
__entry->hva_line[3]
)
);
#else /* !CONFIG_KVM_HOST_MODE */
static inline bool trace_host_copy_hw_stack_enabled(void)
{
return false;
}
static inline void
trace_host_copy_hw_stack(void *dst, void *src, unsigned long size, bool is_chain)
{
}
static inline bool trace_host_proc_stack_frame_enabled(void)
{
return false;
}
static inline void
trace_host_proc_stack_frame(kernel_mem_ps_t *ps_base, kernel_mem_ps_t *ps_frame)
{
}
static inline bool trace_host_chain_stack_frame_enabled(void)
{
return false;
}
static inline void
trace_host_chain_stack_frame(e2k_mem_crs_t *pcs_base, e2k_mem_crs_t *pcs_frame)
{
}
static inline bool trace_host_copy_hva_area_enabled(void)
{
return false;
}
static inline void
trace_host_copy_hva_area(void *dst, void *src, unsigned long size)
{
}
static inline bool trace_host_hva_area_line_enabled(void)
{
return false;
}
static inline void
trace_host_hva_area_line(u64 *hva_base, u64 size)
{
}
#endif /* CONFIG_KVM_HOST_MODE */
#endif /* _KVM_TRACE_COPY_HW_STACKS_H */
#ifdef CONFIG_KVM_HOST_MODE
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH ../../arch/e2k/include/asm/kvm
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE trace-hw-stacks
/* This part must be outside protection */
#include <trace/define_trace.h>
#endif /* CONFIG_KVM_HOST_MODE */

View File

@ -1090,6 +1090,79 @@ TRACE_EVENT(
TP_printk("Light hypercall exit: %llu\n", __entry->ret)
);
TRACE_EVENT(
guest_switch_to,
TP_PROTO(struct kvm_vcpu *vcpu, int gpid_from, int gmmid_from,
int gpid_to, int gmmid_to, struct sw_regs *next_gsw),
TP_ARGS(vcpu, gpid_from, gmmid_from, gpid_to, gmmid_to, next_gsw),
TP_STRUCT__entry(
__field( int, vcpu_id )
__field( int, gpid_from )
__field( int, gmmid_from )
__field( int, gpid_to )
__field( int, gmmid_to )
__field( e2k_addr_t, top )
__field_struct( e2k_usd_lo_t, usd_lo )
__field_struct( e2k_usd_hi_t, usd_hi )
__field_struct( e2k_psp_lo_t, psp_lo )
__field_struct( e2k_psp_hi_t, psp_hi )
__field_struct( e2k_pcsp_lo_t, pcsp_lo )
__field_struct( e2k_pcsp_hi_t, pcsp_hi )
__field( pgprotval_t, u_pptb )
__field( gva_t, u_vptb )
__field( hpa_t, root )
__field( u64, mmu_pptb )
__field( u64, mmu_pid )
__field( u64, ctxt_pid )
),
TP_fast_assign(
__entry->vcpu_id = vcpu->vcpu_id;
__entry->gpid_from = gpid_from;
__entry->gmmid_from = gmmid_from;
__entry->gpid_to = gpid_to;
__entry->gmmid_to = gmmid_to;
__entry->top = next_gsw->top;
__entry->usd_lo = next_gsw->usd_lo;
__entry->usd_hi = next_gsw->usd_hi;
__entry->psp_lo = next_gsw->psp_lo;
__entry->psp_hi = next_gsw->psp_hi;
__entry->pcsp_lo = next_gsw->pcsp_lo;
__entry->pcsp_hi = next_gsw->pcsp_hi;
__entry->u_pptb = vcpu->arch.mmu.get_vcpu_u_pptb(vcpu);
__entry->u_vptb = vcpu->arch.mmu.get_vcpu_sh_u_vptb(vcpu);
__entry->root = kvm_get_space_type_spt_u_root(vcpu);
__entry->mmu_pptb = get_mmu_u_pptb_reg();
__entry->mmu_pid = get_mmu_pid_reg();
__entry->ctxt_pid = kvm_get_guest_mmu_pid(vcpu);
),
TP_printk("VCPU #%d: switch from gpid #%d gmm #%d to gpid #%d gmm #%d\n"
" USD: base 0x%llx size 0x%x top at 0x%lx\n"
" PSP: base 0x%llx ind 0x%x size 0x%x\n"
" PCSP: base 0x%llx ind 0x%x size 0x%x\n"
" MMU: u_pptb 0x%lx u_vptb 0x%lx sh_pptb 0x%llx\n"
" mmu_upptb 0x%llx mmu_pid 0x%llx ctxt pid 0x%llx",
__entry->vcpu_id,
__entry->gpid_from, __entry->gmmid_from,
__entry->gpid_to, __entry->gmmid_to,
__entry->usd_lo.USD_lo_base,
__entry->usd_hi.USD_hi_size,
__entry->top,
__entry->psp_lo.PSP_lo_base,
__entry->psp_hi.PSP_hi_ind,
__entry->psp_hi.PSP_hi_size,
__entry->pcsp_lo.PCSP_lo_base,
__entry->pcsp_hi.PCSP_hi_ind,
__entry->pcsp_hi.PCSP_hi_size,
__entry->u_pptb, __entry->u_vptb, __entry->root,
__entry->mmu_pptb, __entry->mmu_pid, __entry->ctxt_pid
)
);
TRACE_EVENT(
vcpu_put,

View File

@ -368,6 +368,42 @@ TRACE_EVENT(
TP_printk("pmirr#%d val 0x%llx\n", __entry->pmirr, __entry->val)
);
TRACE_EVENT(
save_pnmirr,
TP_PROTO(u32 val),
TP_ARGS(val),
TP_STRUCT__entry(
__field( u32, val )
),
TP_fast_assign(
__entry->val = val;
),
TP_printk("pnmirr val 0x%x\n", __entry->val)
);
TRACE_EVENT(
restore_pnmirr,
TP_PROTO(u32 val),
TP_ARGS(val),
TP_STRUCT__entry(
__field( u32, val )
),
TP_fast_assign(
__entry->val = val;
),
TP_printk("pnmirr val 0x%x\n", __entry->val)
);
TRACE_EVENT(
save_cir,

View File

@ -744,6 +744,7 @@ typedef struct kvm_sw_cpu_context {
u64 rpr_hi;
u64 tcd;
#ifdef CONFIG_CLW_ENABLE
mmu_reg_t us_cl_d;
clw_reg_t us_cl_b;
clw_reg_t us_cl_up;
@ -751,6 +752,7 @@ typedef struct kvm_sw_cpu_context {
clw_reg_t us_cl_m1;
clw_reg_t us_cl_m2;
clw_reg_t us_cl_m3;
#endif
} kvm_sw_cpu_context_t;
/*

View File

@ -39,10 +39,13 @@ enum {
CPU_HWBUG_SPURIOUS_EXC_DATA_DEBUG,
CPU_HWBUG_TLB_FLUSH_L1D,
CPU_HWBUG_GUEST_ASYNC_PM,
CPU_HWBUG_E16C_SLEEP,
CPU_HWBUG_L1I_STOPS_WORKING,
CPU_HWBUG_CLW_STALE_L1_ENTRY,
CPU_HWBUG_C3_WAIT_MA_C,
CPU_HWBUG_VIRT_SCLKM3_INTC,
CPU_HWBUG_VIRT_PSIZE_INTERCEPTION,
CPU_HWBUG_USD_ALIGNMENT,
CPU_FEAT_WC_PCI_PREFETCH,
CPU_FEAT_FLUSH_DC_IC,
CPU_FEAT_EPIC,
@ -52,7 +55,6 @@ enum {
CPU_FEAT_ISET_V3,
CPU_FEAT_ISET_V5,
CPU_FEAT_ISET_V6,
CPU_HWBUG_E16C_SLEEP,
NR_CPU_FEATURES
};
@ -298,7 +300,8 @@ CPUHAS(CPU_HWBUG_DMA_AT_APIC_ADDR,
false,
cpu == IDR_ES2_DSP_MDL);
/* #88644 - data profiling events are lost if overflow happens
* under closed NM interrupts.
* under closed NM interrupts; also DDMCR writing does not clear
* pending exc_data_debug exceptions.
* Workaround - disable data monitor profiling in kernel. */
CPUHAS(CPU_HWBUG_KERNEL_DATA_MONITOR,
!IS_ENABLED(CONFIG_CPU_ES2) && !IS_ENABLED(CONFIG_CPU_E2S) &&
@ -491,6 +494,22 @@ CPUHAS(CPU_HWBUG_VIRT_SCLKM3_INTC,
cpu == IDR_E16C_MDL && revision == 0 ||
cpu == IDR_E12C_MDL && revision == 0 ||
cpu == IDR_E2C3_MDL && revision == 0);
/* #130039 - intercepting some specific sequences of call/return/setwd
* (that change WD.psize in a specific way) does not work.
* Workaround - avoid those sequences. */
CPUHAS(CPU_HWBUG_VIRT_PSIZE_INTERCEPTION,
IS_ENABLED(CONFIG_E2K_MACHINE),
IS_ENABLED(CONFIG_CPU_E16C) || IS_ENABLED(CONFIG_CPU_E2C3),
(cpu == IDR_E16C_MDL || cpu == IDR_E2C3_MDL) && revision == 0);
/* #129848 - alignment of usd_hi write depends on current usd_lo.p
* Workaround - write usd_lo before usd_hi, while keeping 2 tact distance from sbr write.
* Valid sequences are: sbr, nop, usd.lo, usd.hi OR sbr, usd.lo, usd.hi, usd.lo */
CPUHAS(CPU_HWBUG_USD_ALIGNMENT,
IS_ENABLED(CONFIG_E2K_MACHINE) && !IS_ENABLED(CONFIG_CPU_E16C) &&
!IS_ENABLED(CONFIG_CPU_E2C3),
!IS_ENABLED(CONFIG_CPU_E12C),
cpu == IDR_E16C_MDL && revision <= 1 ||
cpu == IDR_E2C3_MDL && revision <= 1);
/* Rely on IDR instead of iset version to choose between APIC and EPIC.
* For guest we use it's own fake IDR so that we choose between APIC and
* EPIC based on what hardware guest *thinks* it's being executed on. */

View File

@ -733,6 +733,7 @@ typedef unsigned long clw_reg_t;
#ifndef __ASSEMBLY__
typedef union {
u32 half_word[2];
struct { /* structure of register */
u32 user : 1; /* [ 0: 0] */
u32 system : 1; /* [ 1: 1] */
@ -755,7 +756,10 @@ typedef union {
u64 unus9 : 4;
u64 b3 : 8;
u64 unu10 : 4;
u64 unu11 : 16;
u64 unu11 : 1;
u64 m0 : 1;
u64 m1 : 1;
u64 unu12 : 13;
};
union {
struct {

View File

@ -29,8 +29,8 @@ typedef struct { pgprotval_t pgprot; } pgprot_t;
#define pgprot_val(x) ((x).pgprot)
#define __pte(x) ((pte_t) { (x) } )
#define __pmd(x) ((pmd_t) { (x) } )
#define __pud(x) ((pud_t) { (x) } )
#define __pmd(x) ((pmd_t) { (x) } )
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )

View File

@ -246,19 +246,12 @@ native_read_TIR_hi_reg(void)
NATIVE_SET_DSREG_OPEN(usd.lo, USD_lo_value)
#define NATIVE_NV_WRITE_USD_HI_REG_VALUE(USD_hi_value) \
NATIVE_SET_DSREG_OPEN(usd.hi, USD_hi_value)
#define NATIVE_NV_WRITE_USD_REG_VALUE(USD_hi_value, USD_lo_value) \
({ \
NATIVE_NV_WRITE_USD_HI_REG_VALUE(USD_hi_value); \
NATIVE_NV_WRITE_USD_LO_REG_VALUE(USD_lo_value); \
})
#define NATIVE_NV_WRITE_USD_REG(USD_hi, USD_lo) \
({ \
NATIVE_NV_WRITE_USD_REG_VALUE(USD_hi.USD_hi_half, USD_lo.USD_lo_half); \
})
#define NATIVE_NV_WRITE_USBR_USD_REG_VALUE(usbr, usd_hi, usd_lo) \
do { \
NATIVE_NV_WRITE_USBR_REG_VALUE(usbr); \
if (cpu_has(CPU_HWBUG_USD_ALIGNMENT)) \
NATIVE_NV_WRITE_USD_LO_REG_VALUE(usd_lo); \
NATIVE_NV_WRITE_USD_HI_REG_VALUE(usd_hi); \
NATIVE_NV_WRITE_USD_LO_REG_VALUE(usd_lo); \
} while (0)
@ -266,6 +259,8 @@ do { \
#define NATIVE_NV_WRITE_USBR_USD_REG(usbr, usd_hi, usd_lo) \
do { \
NATIVE_NV_WRITE_USBR_REG(usbr); \
if (cpu_has(CPU_HWBUG_USD_ALIGNMENT)) \
NATIVE_NV_WRITE_USD_LO_REG(usd_lo); \
NATIVE_NV_WRITE_USD_HI_REG(usd_hi); \
NATIVE_NV_WRITE_USD_LO_REG(usd_lo); \
} while (0)
@ -429,8 +424,10 @@ extern void native_write_SCLKM2_reg_value(unsigned long reg_value);
NATIVE_SET_SREG_CLOSED_NOEXC(dibcr, DIBCR_value, 4)
#define NATIVE_WRITE_DIBSR_REG_VALUE(DIBSR_value) \
NATIVE_SET_SREG_CLOSED_NOEXC(dibsr, DIBSR_value, 4)
/* 6 cycles delay guarantess that all counting
* is stopped and %dibsr is updated accordingly. */
#define NATIVE_WRITE_DIMCR_REG_VALUE(DIMCR_value) \
NATIVE_SET_DSREG_CLOSED_NOEXC(dimcr, DIMCR_value, 4)
NATIVE_SET_DSREG_CLOSED_NOEXC(dimcr, DIMCR_value, 5)
#define NATIVE_WRITE_DIBAR0_REG_VALUE(DIBAR0_value) \
NATIVE_SET_DSREG_CLOSED_NOEXC(dibar0, DIBAR0_value, 4)
#define NATIVE_WRITE_DIBAR1_REG_VALUE(DIBAR1_value) \

View File

@ -242,46 +242,44 @@ native_flush_ICACHE_all(void)
/*
* native MMU DEBUG registers access
*/
#define NATIVE_READ_MMU_DEBUG_REG(reg_mnemonic) \
NATIVE_GET_MMUREG(reg_mnemonic)
#define NATIVE_WRITE_MMU_DEBUG_REG(reg_mnemonic, reg_value) \
NATIVE_SET_MMUREG(reg_mnemonic, reg_value)
#define NATIVE_READ_DDBAR0_REG_VALUE() \
NATIVE_READ_MMU_DEBUG_REG(ddbar0)
NATIVE_GET_MMUREG(ddbar0)
#define NATIVE_READ_DDBAR1_REG_VALUE() \
NATIVE_READ_MMU_DEBUG_REG(ddbar1)
NATIVE_GET_MMUREG(ddbar1)
#define NATIVE_READ_DDBAR2_REG_VALUE() \
NATIVE_READ_MMU_DEBUG_REG(ddbar2)
NATIVE_GET_MMUREG(ddbar2)
#define NATIVE_READ_DDBAR3_REG_VALUE() \
NATIVE_READ_MMU_DEBUG_REG(ddbar3)
NATIVE_GET_MMUREG(ddbar3)
#define NATIVE_READ_DDBCR_REG_VALUE() \
NATIVE_READ_MMU_DEBUG_REG(ddbcr)
NATIVE_GET_MMUREG(ddbcr)
#define NATIVE_READ_DDBSR_REG_VALUE() \
NATIVE_READ_MMU_DEBUG_REG(ddbsr)
NATIVE_GET_MMUREG(ddbsr)
#define NATIVE_READ_DDMAR0_REG_VALUE() \
NATIVE_READ_MMU_DEBUG_REG(ddmar0)
NATIVE_GET_MMUREG(ddmar0)
#define NATIVE_READ_DDMAR1_REG_VALUE() \
NATIVE_READ_MMU_DEBUG_REG(ddmar1)
NATIVE_GET_MMUREG(ddmar1)
#define NATIVE_READ_DDMCR_REG_VALUE() \
NATIVE_READ_MMU_DEBUG_REG(ddmcr)
NATIVE_GET_MMUREG(ddmcr)
#define NATIVE_WRITE_DDBAR0_REG_VALUE(value) \
NATIVE_WRITE_MMU_DEBUG_REG(ddbar0, value)
NATIVE_SET_MMUREG(ddbar0, value)
#define NATIVE_WRITE_DDBAR1_REG_VALUE(value) \
NATIVE_WRITE_MMU_DEBUG_REG(ddbar1, value)
NATIVE_SET_MMUREG(ddbar1, value)
#define NATIVE_WRITE_DDBAR2_REG_VALUE(value) \
NATIVE_WRITE_MMU_DEBUG_REG(ddbar2, value)
NATIVE_SET_MMUREG(ddbar2, value)
#define NATIVE_WRITE_DDBAR3_REG_VALUE(value) \
NATIVE_WRITE_MMU_DEBUG_REG(ddbar3, value)
NATIVE_SET_MMUREG(ddbar3, value)
#define NATIVE_WRITE_DDBCR_REG_VALUE(value) \
NATIVE_WRITE_MMU_DEBUG_REG(ddbcr, value)
NATIVE_SET_MMUREG(ddbcr, value)
#define NATIVE_WRITE_DDBSR_REG_VALUE(value) \
NATIVE_WRITE_MMU_DEBUG_REG(ddbsr, value)
NATIVE_SET_MMUREG(ddbsr, value)
#define NATIVE_WRITE_DDMAR0_REG_VALUE(value) \
NATIVE_WRITE_MMU_DEBUG_REG(ddmar0, value)
NATIVE_SET_MMUREG(ddmar0, value)
#define NATIVE_WRITE_DDMAR1_REG_VALUE(value) \
NATIVE_WRITE_MMU_DEBUG_REG(ddmar1, value)
NATIVE_SET_MMUREG(ddmar1, value)
/* 4 cycles delay guarantess that all counting
* is stopped and %ddbsr is updated accordingly. */
#define NATIVE_WRITE_DDMCR_REG_VALUE(value) \
NATIVE_WRITE_MMU_DEBUG_REG(ddmcr, value)
NATIVE_SET_MMUREG_CLOSED(ddmcr, value, 3)
#define NATIVE_READ_DDBAR0_REG() \
NATIVE_READ_DDBAR0_REG_VALUE()

View File

@ -1,17 +1,16 @@
#ifndef _ASM_E2K_PERF_EVENT_H
#define _ASM_E2K_PERF_EVENT_H
#pragma once
#include <linux/percpu.h>
#include <asm/cpu_regs.h>
#include <asm/perf_event_types.h>
#include <asm/process.h>
#include <asm/regs_state.h>
static inline void set_perf_event_pending(void) {}
static inline void clear_perf_event_pending(void) {}
#define PERF_EVENT_INDEX_OFFSET 0
int perf_data_overflow_handle(struct pt_regs *);
int perf_instr_overflow_handle(struct pt_regs *);
void perf_data_overflow_handle(struct pt_regs *);
void perf_instr_overflow_handle(struct pt_regs *);
void dimtp_overflow(struct perf_event *event);
#define perf_arch_fetch_caller_regs perf_arch_fetch_caller_regs
@ -22,35 +21,81 @@ static __always_inline void perf_arch_fetch_caller_regs(struct pt_regs *regs,
WARN_ON_ONCE(instruction_pointer(regs) != ip);
}
#define ARCH_PERFMON_EVENT_MASK 0xffff
#define ARCH_PERFMON_OS (1 << 16)
#define ARCH_PERFMON_USR (1 << 17)
#define ARCH_PERFMON_ENABLED (1 << 18)
static inline e2k_dimcr_t dimcr_pause(void)
{
e2k_dimcr_t dimcr, dimcr_old;
/*
* Stop counting for more precise group counting and also
* to avoid races when one counter overflows while another
* is being handled.
*
* Writing %dimcr also clears other pending exc_instr_debug
*/
dimcr = READ_DIMCR_REG();
dimcr_old = dimcr;
AS(dimcr)[0].user = 0;
AS(dimcr)[0].system = 0;
AS(dimcr)[1].user = 0;
AS(dimcr)[1].system = 0;
WRITE_DIMCR_REG(dimcr);
DECLARE_PER_CPU(struct perf_event * [4], cpu_events);
return dimcr_old;
}
/*
* Bitmask for perf_monitors_used
*
* DIM0 has all counters from DIM1 and some more. So events for
* DIM1 are marked with DIM0_DIM1, and the actual used monitor
* will be determined at runtime.
*/
enum {
_DDM0 = 0,
_DDM1,
_DIM0,
_DIM1,
_DDM0_DDM1,
_DIM0_DIM1,
MAX_HW_MONITORS
};
#define DDM0 (1 << _DDM0)
#define DDM1 (1 << _DDM1)
#define DIM0 (1 << _DIM0)
#define DIM1 (1 << _DIM1)
#define DDM0_DDM1 (1 << _DDM0_DDM1)
#define DIM0_DIM1 (1 << _DIM0_DIM1)
static inline e2k_ddmcr_t ddmcr_pause(void)
{
e2k_ddmcr_t ddmcr, ddmcr_old;
/*
* Stop counting for more precise group counting and also
* to avoid races when one counter overflows while another
* is being handled.
*
* Writing %ddmcr also clears other pending exc_data_debug
*/
ddmcr = READ_DDMCR_REG();
ddmcr_old = ddmcr;
AS(ddmcr)[0].user = 0;
AS(ddmcr)[0].system = 0;
AS(ddmcr)[1].user = 0;
AS(ddmcr)[1].system = 0;
WRITE_DDMCR_REG(ddmcr);
return ddmcr_old;
}
#ifdef CONFIG_PERF_EVENTS
extern void dimcr_continue(e2k_dimcr_t dimcr_old);
extern void ddmcr_continue(e2k_ddmcr_t ddmcr_old);
#else
static inline void dimcr_continue(e2k_dimcr_t dimcr_old)
{
e2k_dimcr_t dimcr;
/*
* Restart counting
*/
dimcr = READ_DIMCR_REG();
AS(dimcr)[0].user = AS(dimcr_old)[0].user;
AS(dimcr)[0].system = AS(dimcr_old)[0].system;
AS(dimcr)[1].user = AS(dimcr_old)[1].user;
AS(dimcr)[1].system = AS(dimcr_old)[1].system;
WRITE_DIMCR_REG(dimcr);
}
static inline void ddmcr_continue(e2k_ddmcr_t ddmcr_old)
{
e2k_ddmcr_t ddmcr;
/*
* Restart counting
*/
ddmcr = READ_DDMCR_REG();
AS(ddmcr)[0].user = AS(ddmcr_old)[0].user;
AS(ddmcr)[0].system = AS(ddmcr_old)[0].system;
AS(ddmcr)[1].user = AS(ddmcr_old)[1].user;
AS(ddmcr)[1].system = AS(ddmcr_old)[1].system;
WRITE_DDMCR_REG(ddmcr);
}
#endif

View File

@ -0,0 +1,46 @@
#pragma once
#include <linux/percpu.h>
#include <asm/ptrace.h>
#define PERF_EVENT_INDEX_OFFSET 0
#define ARCH_PERFMON_EVENT_MASK 0xffff
#define ARCH_PERFMON_OS (1 << 16)
#define ARCH_PERFMON_USR (1 << 17)
#define ARCH_PERFMON_ENABLED (1 << 18)
DECLARE_PER_CPU(struct perf_event * [4], cpu_events);
#ifdef CONFIG_PERF_EVENTS
DECLARE_PER_CPU(u8, perf_monitors_used);
DECLARE_PER_CPU(u8, perf_bps_used);
# define perf_read_monitors_used() __this_cpu_read(perf_monitors_used)
# define perf_read_bps_used() __this_cpu_read(perf_bps_used)
#else /* ! CONFIG_PERF_EVENTS */
# define perf_read_monitors_used() 0
# define perf_read_bps_used() 0
#endif /* CONFIG_PERF_EVENTS */
/*
* Bitmask for perf_monitors_used
*
* DIM0 has all counters from DIM1 and some more. So events for
* DIM1 are marked with DIM0_DIM1, and the actual used monitor
* will be determined at runtime.
*/
enum {
_DDM0 = 0,
_DDM1,
_DIM0,
_DIM1,
_DDM0_DDM1,
_DIM0_DIM1,
MAX_HW_MONITORS
};
#define DDM0 (1 << _DDM0)
#define DDM1 (1 << _DDM1)
#define DIM0 (1 << _DIM0)
#define DIM1 (1 << _DIM1)
#define DDM0_DDM1 (1 << _DDM0_DDM1)
#define DIM0_DIM1 (1 << _DIM0_DIM1)

View File

@ -82,7 +82,7 @@ struct e2k_uncore {
struct e2k_uncore_valid_events *valid_events;
int (*validate_event)(struct e2k_uncore *, struct hw_perf_event *);
u64 (*get_event)(struct hw_perf_event *);
int (*add_event)(struct perf_event *);
int (*add_event)(struct e2k_uncore *, struct perf_event *);
struct e2k_uncore_reg_ops *reg_ops;
struct pmu pmu;

View File

@ -27,24 +27,6 @@
#include <asm/e2k_debug.h>
#include <asm/kvm/uaccess.h> /* host mode support */
#undef DEBUG_PV_UST_MODE
#undef DebugUST
#define DEBUG_PV_UST_MODE 0 /* guest user stacks debug */
#define DebugUST(fmt, args...) \
({ \
if (debug_guest_ust) \
pr_info("%s(): " fmt, __func__, ##args); \
})
#undef DEBUG_PV_SYSCALL_MODE
#define DEBUG_PV_SYSCALL_MODE 0 /* syscall injection debugging */
#if DEBUG_PV_UST_MODE || DEBUG_PV_SYSCALL_MODE
extern bool debug_guest_ust;
#else
#define debug_guest_ust false
#endif /* DEBUG_PV_UST_MODE || DEBUG_PV_SYSCALL_MODE */
#undef DEBUG_SS_MODE
#undef DebugSS
#define DEBUG_SS_MODE 0 /* stack switching */
@ -818,12 +800,10 @@ static inline void free_virt_task_struct(struct task_struct *task)
{
/* virtual machines is not supported */
}
#else /* CONFIG_VIRTUALIZATION */
#elif defined(CONFIG_KVM_HOST_MODE)
/* It is native host kernel with virtualization support */
/* or paravirtualized host and guest */
/* or native guest kernel
#include <asm/kvm/process.h>
*/
#endif /* ! CONFIG_VIRTUALIZATION */
/*
@ -852,119 +832,6 @@ native_preserve_user_hw_stacks_to_copy(e2k_stacks_t *u_stacks,
u_stacks->pcshtp = cur_stacks->pcshtp;
}
static __always_inline void
native_kernel_hw_stack_frames_copy(u64 *dst, const u64 *src, unsigned long size)
{
void *dst_tail;
const void *src_tail;
u64 copied;
int i;
/*
* Kernel does not use FP registers so do not copy them.
* This only applies to CPUs before V5 instruction set
* (since V5 FP registers become general-purpose QP registers).
*/
if (cpu_has(CPU_FEAT_QPREG)) {
#pragma loop count (10)
for (i = 0; i < size / 64; i++)
E2K_TAGGED_MEMMOVE_64(&dst[8 * i], &src[8 * i]);
copied = round_down(size, 64);
dst_tail = (void *) dst + copied;
src_tail = (void *) src + copied;
} else {
#pragma loop count (5)
for (i = 0; i < size / 128; i++)
E2K_TAGGED_MEMMOVE_128_RF_V2(&dst[16 * i],
&src[16 * i]);
copied = round_down(size, 128);
dst_tail = (void *) dst + copied;
src_tail = (void *) src + copied;
if (size & 64) {
E2K_TAGGED_MEMMOVE_64(dst_tail, src_tail);
dst_tail += 64;
src_tail += 64;
}
}
if (size & 32)
E2K_TAGGED_MEMMOVE_32(dst_tail, src_tail);
}
static __always_inline void
native_collapse_kernel_pcs(u64 *dst, const u64 *src, u64 spilled_size)
{
e2k_pcsp_hi_t k_pcsp_hi;
u64 size;
int i;
DebugUST("current host chain stack index 0x%x, PCSHTP 0x%llx\n",
NATIVE_NV_READ_PCSP_HI_REG().PCSP_hi_ind,
NATIVE_READ_PCSHTP_REG_SVALUE());
NATIVE_FLUSHC;
k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
size = k_pcsp_hi.PCSP_hi_ind - spilled_size;
BUG_ON(!IS_ALIGNED(size, ALIGN_PCSTACK_TOP_SIZE) || (s64) size < 0);
#pragma loop count (2)
for (i = 0; i < size / 32; i++) {
u64 v0, v1, v2, v3;
v0 = src[4 * i];
v1 = src[4 * i + 1];
v2 = src[4 * i + 2];
v3 = src[4 * i + 3];
dst[4 * i] = v0;
dst[4 * i + 1] = v1;
dst[4 * i + 2] = v2;
dst[4 * i + 3] = v3;
}
k_pcsp_hi.PCSP_hi_ind -= spilled_size;
NATIVE_NV_NOIRQ_WRITE_PCSP_HI_REG(k_pcsp_hi);
DebugUST("move spilled chain part from host top %px to "
"bottom %px, size 0x%llx\n",
src, dst, size);
DebugUST("host kernel chain stack index is now 0x%x, "
"guest user PCSHTP 0x%llx\n",
k_pcsp_hi.PCSP_hi_ind, spilled_size);
}
static __always_inline void
native_collapse_kernel_ps(u64 *dst, const u64 *src, u64 spilled_size)
{
e2k_psp_hi_t k_psp_hi;
u64 size;
DebugUST("current host procedure stack index 0x%x, PSHTP 0x%x\n",
NATIVE_NV_READ_PSP_HI_REG().PSP_hi_ind,
NATIVE_NV_READ_PSHTP_REG().PSHTP_ind);
NATIVE_FLUSHR;
k_psp_hi = NATIVE_NV_READ_PSP_HI_REG();
size = k_psp_hi.PSP_hi_ind - spilled_size;
BUG_ON(!IS_ALIGNED(size, ALIGN_PSTACK_TOP_SIZE) || (s64) size < 0);
prefetchw_range(src, size);
native_kernel_hw_stack_frames_copy(dst, src, size);
k_psp_hi.PSP_hi_ind -= spilled_size;
NATIVE_NV_NOIRQ_WRITE_PSP_HI_REG(k_psp_hi);
DebugUST("move spilled procedure part from host top %px to "
"bottom %px, size 0x%llx\n",
src, dst, size);
DebugUST("host kernel procedure stack index is now 0x%x, "
"guest user PSHTP 0x%llx\n",
k_psp_hi.PSP_hi_ind, spilled_size);
}
/**
* find_in_u_pcs_list - find frame offset from old_u_pcs_list
* @frame - frame to search
@ -1093,22 +960,6 @@ preserve_user_hw_stacks_to_copy(e2k_stacks_t *u_stacks,
native_preserve_user_hw_stacks_to_copy(u_stacks, cur_stacks);
}
static __always_inline void
kernel_hw_stack_frames_copy(u64 *dst, const u64 *src, unsigned long size)
{
native_kernel_hw_stack_frames_copy(dst, src, size);
}
static __always_inline void
collapse_kernel_pcs(u64 *dst, const u64 *src, u64 spilled_size)
{
native_collapse_kernel_pcs(dst, src, spilled_size);
}
static __always_inline void
collapse_kernel_ps(u64 *dst, const u64 *src, u64 spilled_size)
{
native_collapse_kernel_ps(dst, src, spilled_size);
}
#if !defined(CONFIG_VIRTUALIZATION)
/* native kernel without virtualization support */
#define do_map_user_hard_stack_to_kernel(node, kstart, ubase, size) \
@ -1164,104 +1015,6 @@ copy_spilled_user_stacks(struct e2k_stacks *child_stacks,
#error "Undefined virtualization mode"
#endif /* CONFIG_PARAVIRT_GUEST */
/*
* Copy hardware stack from user to *current* kernel stack.
* One has to be careful to avoid hardware FILL of this stack.
*/
static inline int __copy_user_to_current_hw_stack(void *dst, void __user *src,
unsigned long size, const pt_regs_t *regs, bool chain)
{
unsigned long min_flt, maj_flt, ts_flag;
if (likely(!host_test_intc_emul_mode(regs))) {
if (!__range_ok((unsigned long __force) src, size,
PAGE_OFFSET))
return -EFAULT;
}
ts_flag = set_ts_flag(TS_KERNEL_SYSCALL);
/*
* Every page fault here has a chance of FILL'ing the frame
* that is being copied, in which case we repeat the copy.
*/
do {
min_flt = READ_ONCE(current->min_flt);
maj_flt = READ_ONCE(current->maj_flt);
if (chain)
E2K_FLUSHC;
else
E2K_FLUSHR;
SET_USR_PFAULT("$.recovery_memcpy_fault");
fast_tagged_memory_copy_from_user(dst, src, size, regs,
TAGGED_MEM_STORE_REC_OPC |
MAS_BYPASS_L1_CACHE << LDST_REC_OPC_MAS_SHIFT,
TAGGED_MEM_LOAD_REC_OPC |
MAS_BYPASS_L1_CACHE << LDST_REC_OPC_MAS_SHIFT,
true);
if (RESTORE_USR_PFAULT) {
clear_ts_flag(ts_flag);
return -EFAULT;
}
} while (unlikely(min_flt != READ_ONCE(current->min_flt) ||
maj_flt != READ_ONCE(current->maj_flt)));
clear_ts_flag(ts_flag);
return 0;
}
static inline int copy_user_to_current_hw_stack(void *dst, void __user *src,
unsigned long size, pt_regs_t *regs, bool chain)
{
unsigned long flags;
int ret;
raw_all_irq_save(flags);
ret = __copy_user_to_current_hw_stack(dst, src, size, regs, chain);
raw_all_irq_restore(flags);
return ret;
}
static inline int copy_e2k_stack_from_user(void *dst, void __user *src,
unsigned long size, pt_regs_t *regs)
{
unsigned long ts_flag;
int ret;
if (likely(!host_test_intc_emul_mode(regs))) {
if (!__range_ok((unsigned long __force) src, size, PAGE_OFFSET))
return -EFAULT;
}
ts_flag = set_ts_flag(TS_KERNEL_SYSCALL);
ret = host_copy_from_user_with_tags(dst, src, size, regs);
clear_ts_flag(ts_flag);
return (ret) ? -EFAULT : 0;
}
static inline int copy_e2k_stack_to_user(void __user *dst, void *src,
unsigned long size, pt_regs_t *regs)
{
unsigned long ts_flag;
int ret;
if (likely(!host_test_intc_emul_mode(regs))) {
if (!__range_ok((unsigned long __force) dst, size, PAGE_OFFSET))
return -EFAULT;
}
ts_flag = set_ts_flag(TS_KERNEL_SYSCALL);
ret = host_copy_to_user_with_tags(dst, src, size, regs);
clear_ts_flag(ts_flag);
return (ret) ? -EFAULT : 0;
}
DECLARE_PER_CPU(void *, reserve_hw_stacks);
static inline int on_reserve_stacks(void)
{
@ -1278,134 +1031,6 @@ static inline int on_reserve_stacks(void)
KERNEL_PC_STACK_SIZE;
}
static __always_inline int
user_hw_stack_frames_copy(void __user *dst, void *src, unsigned long copy_size,
const pt_regs_t *regs, unsigned long hw_stack_ind, bool is_pcsp)
{
unsigned long ts_flag;
if (unlikely(hw_stack_ind < copy_size)) {
unsigned long flags;
raw_all_irq_save(flags);
if (is_pcsp) {
E2K_FLUSHC;
} else {
E2K_FLUSHR;
}
raw_all_irq_restore(flags);
}
SET_USR_PFAULT("$.recovery_memcpy_fault");
ts_flag = set_ts_flag(TS_KERNEL_SYSCALL);
fast_tagged_memory_copy_to_user(dst, src, copy_size, regs,
TAGGED_MEM_STORE_REC_OPC |
MAS_BYPASS_L1_CACHE << LDST_REC_OPC_MAS_SHIFT,
TAGGED_MEM_LOAD_REC_OPC |
MAS_BYPASS_L1_CACHE << LDST_REC_OPC_MAS_SHIFT, true);
clear_ts_flag(ts_flag);
if (RESTORE_USR_PFAULT) {
pr_err("process %s (%d) %s stack could not be copied "
"from %px to %px size 0x%lx (out of memory?)\n",
current->comm, current->pid,
(is_pcsp) ? "chain" : "procedure",
src, dst, copy_size);
return -EFAULT;
}
DebugUST("copying guest %s stack spilled to host from %px "
"to guest kernel stack from %px, size 0x%lx\n",
(is_pcsp) ? "chain" : "procedure", src, dst, copy_size);
return 0;
}
static __always_inline int
user_crs_frames_copy(e2k_mem_crs_t __user *u_frame, pt_regs_t *regs,
e2k_mem_crs_t *crs)
{
unsigned long ts_flag;
int ret;
ts_flag = set_ts_flag(TS_KERNEL_SYSCALL);
ret = host_copy_to_user(u_frame, crs, sizeof(*crs), regs);
clear_ts_flag(ts_flag);
if (unlikely(ret))
return -EFAULT;
return 0;
}
static __always_inline u64 get_wsz(enum restore_caller from)
{
return NATIVE_READ_WD_REG().size >> 4;
}
static __always_inline int user_psp_stack_copy(e2k_psp_lo_t u_psp_lo,
e2k_psp_hi_t u_psp_hi, s64 u_pshtp_size,
e2k_psp_lo_t k_psp_lo, e2k_psp_hi_t k_psp_hi,
unsigned long copy_size, const pt_regs_t *regs)
{
void __user *dst;
void *src;
dst = (void __user *) (AS(u_psp_lo).base + AS(u_psp_hi).ind -
u_pshtp_size);
src = (void *) AS(k_psp_lo).base;
return user_hw_stack_frames_copy(dst, src, copy_size,
regs, k_psp_hi.PSP_hi_ind, false);
}
static __always_inline int user_pcsp_stack_copy(e2k_pcsp_lo_t u_pcsp_lo,
e2k_pcsp_hi_t u_pcsp_hi, s64 u_pcshtp_size,
e2k_pcsp_lo_t k_pcsp_lo, e2k_pcsp_hi_t k_pcsp_hi,
unsigned long copy_size, const pt_regs_t *regs)
{
void __user *dst;
void *src;
dst = (void __user *)(AS(u_pcsp_lo).base + AS(u_pcsp_hi).ind -
u_pcshtp_size);
src = (void *) AS(k_pcsp_lo).base;
return user_hw_stack_frames_copy(dst, src, copy_size,
regs, k_pcsp_hi.PCSP_hi_ind, true);
}
static __always_inline u64 get_ps_clear_size(u64 cur_window_q,
e2k_pshtp_t pshtp)
{
s64 u_pshtp_size_q;
u_pshtp_size_q = GET_PSHTP_Q_INDEX(pshtp);
if (u_pshtp_size_q > E2K_MAXSR - cur_window_q)
u_pshtp_size_q = E2K_MAXSR - cur_window_q;
return E2K_MAXSR - (cur_window_q + u_pshtp_size_q);
}
static __always_inline s64 get_ps_copy_size(u64 cur_window_q, s64 u_pshtp_size)
{
return u_pshtp_size - (E2K_MAXSR - cur_window_q) * EXT_4_NR_SZ;
}
#ifdef CONFIG_CPU_HAS_FILL_INSTRUCTION
# define E2K_CF_MAX_FILL (E2K_CF_MAX_FILL_FILLC_q * 0x10)
#else
extern int cf_max_fill_return;
# define E2K_CF_MAX_FILL cf_max_fill_return
#endif
static __always_inline s64 get_pcs_copy_size(s64 u_pcshtp_size)
{
/* Before v6 it was possible to fill no more than 16 registers.
* Since E2K_MAXCR_q is much bigger than 16 we can be sure that
* there is enough space in CF for the FILL, so there is no
* need to take into account space taken by current window. */
return u_pcshtp_size - E2K_CF_MAX_FILL;
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* This function is used to fixup ret_stack, so make sure it itself
* does not rely on correct values in ret_stack by using "notrace". */
@ -1427,352 +1052,14 @@ static inline void apply_graph_tracer_delta(unsigned long delta)
}
#endif
/**
* user_hw_stacks_copy - copy user hardware stacks that have been
* SPILLed to kernel back to user space
* @stacks - saved user stack registers
* @cur_window_q - size of current window in procedure stack,
* needed only if @copy_full is not set
* @copy_full - set if want to copy _all_ of SPILLed stacks
*
* This does not update stacks->pshtp and stacks->pcshtp. Main reason is
* signals: if a signal arrives after copying then it must see a coherent
* state where saved stacks->pshtp and stacks->pcshtp values show how much
* data from user space is spilled to kernel space.
*/
static __always_inline int
native_user_hw_stacks_copy(struct e2k_stacks *stacks,
pt_regs_t *regs, u64 cur_window_q, bool copy_full)
{
trap_pt_regs_t *trap = regs->trap;
e2k_psp_lo_t u_psp_lo = stacks->psp_lo,
k_psp_lo = current_thread_info()->k_psp_lo;
e2k_psp_hi_t u_psp_hi = stacks->psp_hi;
e2k_pcsp_lo_t u_pcsp_lo = stacks->pcsp_lo,
k_pcsp_lo = current_thread_info()->k_pcsp_lo;
e2k_pcsp_hi_t u_pcsp_hi = stacks->pcsp_hi;
s64 u_pshtp_size, u_pcshtp_size, ps_copy_size, pcs_copy_size;
int ret;
u_pshtp_size = GET_PSHTP_MEM_INDEX(stacks->pshtp);
u_pcshtp_size = PCSHTP_SIGN_EXTEND(stacks->pcshtp);
/*
* Copy user's part from kernel stacks into user stacks
* Update user's stack registers
*/
if (copy_full) {
pcs_copy_size = u_pcshtp_size;
ps_copy_size = u_pshtp_size;
} else {
pcs_copy_size = get_pcs_copy_size(u_pcshtp_size);
ps_copy_size = get_ps_copy_size(cur_window_q, u_pshtp_size);
/* Make sure there is enough space in CF for the FILL */
BUG_ON((E2K_MAXCR_q - 4) * 16 < E2K_CF_MAX_FILL);
}
if (likely(pcs_copy_size <= 0 && ps_copy_size <= 0))
return 0;
if (unlikely(pcs_copy_size > 0)) {
e2k_pcsp_hi_t k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
/* Since not all user data has been SPILL'ed it is possible
* that we have already overflown user's hardware stack. */
if (unlikely(AS(u_pcsp_hi).ind > AS(u_pcsp_hi).size)) {
ret = handle_chain_stack_bounds(stacks, trap);
if (unlikely(ret)) {
pr_warning("process %s (%d) chain stack overflow (out of memory?)\n",
current->comm, current->pid);
return ret;
}
u_pcsp_lo = stacks->pcsp_lo;
u_pcsp_hi = stacks->pcsp_hi;
}
ret = user_pcsp_stack_copy(u_pcsp_lo, u_pcsp_hi, u_pcshtp_size,
k_pcsp_lo, k_pcsp_hi, pcs_copy_size, regs);
if (ret)
return ret;
}
if (unlikely(ps_copy_size > 0)) {
e2k_psp_hi_t k_psp_hi = NATIVE_NV_READ_PSP_HI_REG();
/* Since not all user data has been SPILL'ed it is possible
* that we have already overflowed user's hardware stack. */
if (unlikely(AS(u_psp_hi).ind > AS(u_psp_hi).size)) {
ret = handle_proc_stack_bounds(stacks, trap);
if (unlikely(ret)) {
pr_warning("process %s (%d) procedure stack overflow (out of memory?)\n",
current->comm, current->pid);
return ret;
}
u_psp_lo = stacks->psp_lo;
u_psp_hi = stacks->psp_hi;
}
ret = user_psp_stack_copy(u_psp_lo, u_psp_hi, u_pshtp_size,
k_psp_lo, k_psp_hi, ps_copy_size, regs);
if (ret)
return ret;
}
return 0;
}
static inline void collapse_kernel_hw_stacks(struct e2k_stacks *stacks)
{
e2k_pcsp_lo_t k_pcsp_lo = current_thread_info()->k_pcsp_lo;
e2k_psp_lo_t k_psp_lo = current_thread_info()->k_psp_lo;
unsigned long flags, spilled_pc_size, spilled_p_size;
e2k_pshtp_t pshtp = stacks->pshtp;
u64 *dst;
const u64 *src;
spilled_pc_size = PCSHTP_SIGN_EXTEND(stacks->pcshtp);
spilled_p_size = GET_PSHTP_MEM_INDEX(pshtp);
DebugUST("guest user spilled to host kernel stack part: chain 0x%lx "
"procedure 0x%lx\n",
spilled_pc_size, spilled_p_size);
/* When user tries to return from the last user frame
* we will have pcshtp = pcsp_hi.ind = 0. But situation
* with pcsp_hi.ind != 0 and pcshtp = 0 is impossible. */
if (WARN_ON_ONCE(spilled_pc_size < SZ_OF_CR &&
AS(stacks->pcsp_hi).ind != 0))
do_exit(SIGKILL);
/* Keep the last user frame (see user_hw_stacks_copy_full()) */
if (spilled_pc_size >= SZ_OF_CR) {
spilled_pc_size -= SZ_OF_CR;
DebugUST("Keep the prev user chain frame, so spilled chain "
"size is now 0x%lx\n",
spilled_pc_size);
}
raw_all_irq_save(flags);
if (spilled_pc_size) {
dst = (u64 *) AS(k_pcsp_lo).base;
src = (u64 *) (AS(k_pcsp_lo).base + spilled_pc_size);
collapse_kernel_pcs(dst, src, spilled_pc_size);
stacks->pcshtp = SZ_OF_CR;
apply_graph_tracer_delta(-spilled_pc_size);
}
if (spilled_p_size) {
dst = (u64 *) AS(k_psp_lo).base;
src = (u64 *) (AS(k_psp_lo).base + spilled_p_size);
collapse_kernel_ps(dst, src, spilled_p_size);
AS(pshtp).ind = 0;
stacks->pshtp = pshtp;
}
raw_all_irq_restore(flags);
}
/**
* user_hw_stacks_prepare - prepare user hardware stacks that have been
* SPILLed to kernel back to user space
* @stacks - saved user stack registers
* @cur_window_q - size of current window in procedure stack,
* needed only if @copy_full is not set
* @syscall - true if called upon direct system call exit (no signal handlers)
*
* This does two things:
*
* 1) It is possible that upon kernel entry pcshtp == 0 in some cases:
* - user signal handler had pcshtp==0x20 before return to sigreturn()
* - user context had pcshtp==0x20 before return to makecontext_trampoline()
* - chain stack underflow happened
* So it is possible in sigreturn() and traps, but not in system calls.
* If we are using the trick with return to FILL user hardware stacks than
* we must have frame in chain stack to return to. So in this case kernel's
* chain stack is moved up by one frame (0x20 bytes).
* We also fill the new frame with actual user data and update stacks->pcshtp,
* this is needed to keep the coherent state where saved stacks->pcshtp values
* shows how much data from user space has been spilled to kernel space.
*
* 2) It is not possible to always FILL all of user data that have been
* SPILLed to kernel stacks. So we manually copy the leftovers that can
* not be FILLed to user space.
* This copy does not update stacks->pshtp and stacks->pcshtp. Main reason
* is signals: if a signal arrives after copying then it must see a coherent
* state where saved stacks->pshtp and stacks->pcshtp values show how much
* data from user space has been spilled to kernel space.
*/
static __always_inline void native_user_hw_stacks_prepare(
struct e2k_stacks *stacks, pt_regs_t *regs,
u64 cur_window_q, enum restore_caller from, int syscall)
{
e2k_pcshtp_t u_pcshtp = stacks->pcshtp;
int ret;
BUG_ON(from & FROM_PV_VCPU_MODE);
/*
* 1) Make sure there is free space in kernel chain stack to return to
*/
if (!syscall && u_pcshtp == 0) {
unsigned long flags;
e2k_pcsp_lo_t u_pcsp_lo = stacks->pcsp_lo,
k_pcsp_lo = current_thread_info()->k_pcsp_lo;
e2k_pcsp_hi_t u_pcsp_hi = stacks->pcsp_hi, k_pcsp_hi;
e2k_mem_crs_t __user *u_cframe;
e2k_mem_crs_t *k_crs;
u64 u_cbase;
int ret = -EINVAL;
raw_all_irq_save(flags);
E2K_FLUSHC;
k_pcsp_hi = READ_PCSP_HI_REG();
BUG_ON(AS(k_pcsp_hi).ind);
AS(k_pcsp_hi).ind += SZ_OF_CR;
WRITE_PCSP_HI_REG(k_pcsp_hi);
k_crs = (e2k_mem_crs_t *) AS(k_pcsp_lo).base;
u_cframe = (e2k_mem_crs_t __user *) (AS(u_pcsp_lo).base +
AS(u_pcsp_hi).ind);
u_cbase = ((from & FROM_RETURN_PV_VCPU_TRAP) ||
host_test_intc_emul_mode(regs)) ?
u_pcsp_lo.PCSP_lo_base :
(u64) CURRENT_PCS_BASE();
if ((u64) u_cframe > u_cbase) {
ret = __copy_user_to_current_hw_stack(k_crs,
u_cframe - 1, sizeof(*k_crs), regs, true);
}
raw_all_irq_restore(flags);
/* Can happen if application returns until runs out of
* chain stack or there is no free memory for stacks.
* There is no user stack to return to - die. */
if (ret) {
E2K_LMS_HALT_OK;
SIGDEBUG_PRINT("SIGKILL. %s\n",
(ret == -EINVAL) ? "tried to return to kernel" :
"ran into Out-of-Memory on user stacks");
force_sig(SIGKILL);
return;
}
if (AS(u_pcsp_hi).ind < SZ_OF_CR) {
update_pcsp_regs(AS(u_pcsp_lo).base,
&u_pcsp_lo, &u_pcsp_hi);
stacks->pcsp_lo = u_pcsp_lo;
stacks->pcsp_hi = u_pcsp_hi;
BUG_ON(AS(u_pcsp_hi).ind < SZ_OF_CR);
}
u_pcshtp = SZ_OF_CR;
stacks->pcshtp = u_pcshtp;
}
/*
* 2) Copy user data that cannot be FILLed
*/
ret = native_user_hw_stacks_copy(stacks, regs, cur_window_q, false);
if (unlikely(ret))
do_exit(SIGKILL);
}
#ifndef CONFIG_VIRTUALIZATION
/* native kernel without virtualization support */
static __always_inline int
user_hw_stacks_copy(struct e2k_stacks *stacks,
pt_regs_t *regs, u64 cur_window_q, bool copy_full)
{
return native_user_hw_stacks_copy(stacks, regs, cur_window_q, copy_full);
}
static __always_inline void
host_user_hw_stacks_prepare(struct e2k_stacks *stacks, pt_regs_t *regs,
u64 cur_window_q, enum restore_caller from, int syscall)
{
native_user_hw_stacks_prepare(stacks, regs, cur_window_q,
from, syscall);
}
#elif defined(CONFIG_KVM_GUEST_KERNEL)
/* It is native guest kernel (without paravirtualization) */
#include <asm/kvm/guest/process.h>
#elif defined(CONFIG_PARAVIRT_GUEST)
/* It is paravirtualized kernel (host and guest) */
#include <asm/paravirt/process.h>
#elif defined(CONFIG_KVM_HOST_MODE)
/* It is host kernel with virtualization support */
#include <asm/kvm/process.h>
#else /* unknow mode */
#error "unknown virtualization mode"
#endif /* !CONFIG_VIRTUALIZATION */
/**
* user_hw_stacks_copy_full - copy part of user stacks that was SPILLed
* into kernel back to user stacks.
* @stacks - saved user stack registers
* @regs - pt_regs pointer
* @crs - last frame to copy
*
* If @crs is not NULL then the frame pointed to by it will also be copied
* to userspace. Note that 'stacks->pcsp_hi.ind' is _not_ updated after
* copying since it would leave stack in inconsistent state (with two
* copies of the same @crs frame), this is left to the caller. *
*
* Inlining this reduces the amount of memory to copy in
* collapse_kernel_hw_stacks().
*/
static inline int user_hw_stacks_copy_full(struct e2k_stacks *stacks,
pt_regs_t *regs, e2k_mem_crs_t *crs)
{
int ret;
/*
* Copy part of user stacks that were SPILLed into kernel stacks
*/
ret = user_hw_stacks_copy(stacks, regs, 0, true);
if (unlikely(ret))
return ret;
/*
* Nothing to FILL so remove the resulting hole from kernel stacks.
*
* IMPORTANT: there is always at least one user frame at the top of
* kernel stack - the one that issued a system call (in case of an
* exception we uphold this rule manually, see user_hw_stacks_prepare())
* We keep this ABI and _always_ leave space for one user frame,
* this way we can later FILL using return trick (otherwise there
* would be no space in chain stack for the trick).
*/
collapse_kernel_hw_stacks(stacks);
/*
* Copy saved %cr registers
*
* Caller must take care of filling of resulting hole
* (last user frame from pcshtp == SZ_OF_CR).
*/
if (crs) {
e2k_mem_crs_t __user *u_frame;
int ret;
u_frame = (void __user *) (AS(stacks->pcsp_lo).base +
AS(stacks->pcsp_hi).ind);
ret = user_crs_frames_copy(u_frame, regs, &regs->crs);
if (unlikely(ret))
return ret;
}
return 0;
}
extern int user_hw_stacks_copy_full(struct e2k_stacks *stacks,
pt_regs_t *regs, e2k_mem_crs_t *crs);
extern e2k_addr_t get_nested_kernel_IP(pt_regs_t *regs, int n);
extern unsigned long remap_e2k_stack(unsigned long addr,
unsigned long old_size, unsigned long new_size, bool after);
extern int find_cui_by_ip(unsigned long ip);
#endif /* _E2K_PROCESS_H */

View File

@ -48,6 +48,7 @@
#include <asm/monitors.h>
#include <asm/mmu.h>
#include <asm/mmu_regs.h>
#include <asm/perf_event_types.h>
#include <asm/system.h>
#include <asm/ptrace.h>
#include <asm/p2v/boot_head.h>
@ -219,14 +220,6 @@ do { \
NATIVE_DO_SAVE_MONITOR_COUNTERS(sw_regs); \
} while (0)
/*
* When we use monitor registers, we count monitor events for the whole system,
* so DIMAR0, DIMAR1, DDMAR0 and DDMAR1 registers are not depend on process and
* need not be saved while process switching. DIMCR and DDMCR registers are not
* depend on process too, but they should be saved while process switching,
* because they are used to determine monitoring start moment during monitor
* events counting for a process.
*/
static inline void native_save_user_only_regs(struct sw_regs *sw_regs)
{
if (machine.save_dimtp)
@ -946,27 +939,46 @@ static inline void native_restore_user_only_regs(struct sw_regs *sw_regs)
}
}
#ifdef CONFIG_PERF_EVENTS
DECLARE_PER_CPU(u8, perf_monitors_used);
DECLARE_PER_CPU(u8, perf_bps_used);
# define is_perf_using_monitors __this_cpu_read(perf_monitors_used)
# define is_perf_using_bps __this_cpu_read(perf_bps_used)
#else /* ! CONFIG_PERF_EVENTS */
#define is_perf_using_monitors false
#define is_perf_using_bps false
#endif /* CONFIG_PERF_EVENTS */
static inline void native_clear_user_only_regs(void)
{
if (!is_perf_using_bps) {
u8 monitors_used = perf_read_monitors_used();
u8 bps_used = perf_read_bps_used();
if (!bps_used) {
NATIVE_WRITE_DIBCR_REG_VALUE(0);
NATIVE_WRITE_DDBCR_REG_VALUE(0);
}
if (!MONITORING_IS_ACTIVE && !is_perf_using_monitors) {
NATIVE_WRITE_DIMCR_REG_VALUE(0);
NATIVE_WRITE_DIBSR_REG_VALUE(0);
NATIVE_WRITE_DDMCR_REG_VALUE(0);
NATIVE_WRITE_DDBSR_REG_VALUE(0);
if (!MONITORING_IS_ACTIVE) {
if (!monitors_used) {
NATIVE_WRITE_DIMCR_REG_VALUE(0);
NATIVE_WRITE_DIBSR_REG_VALUE(0);
NATIVE_WRITE_DDMCR_REG_VALUE(0);
NATIVE_WRITE_DDBSR_REG_VALUE(0);
} else {
e2k_dimcr_t dimcr = NATIVE_READ_DIMCR_REG();
e2k_ddmcr_t ddmcr = NATIVE_READ_DDMCR_REG();
e2k_dibsr_t dibsr = NATIVE_READ_DIBSR_REG();
e2k_ddbsr_t ddbsr = NATIVE_READ_DDBSR_REG();
if (!(monitors_used & DIM0)) {
dimcr.half_word[0] = 0;
dibsr.m0 = 0;
}
if (!(monitors_used & DIM1)) {
dimcr.half_word[1] = 0;
dibsr.m1 = 0;
}
if (!(monitors_used & DDM0)) {
ddmcr.half_word[0] = 0;
ddbsr.m0 = 0;
}
if (!(monitors_used & DDM1)) {
ddmcr.half_word[1] = 0;
ddbsr.m1 = 0;
}
NATIVE_WRITE_DIMCR_REG(dimcr);
NATIVE_WRITE_DDMCR_REG(ddmcr);
NATIVE_WRITE_DIBSR_REG(dibsr);
NATIVE_WRITE_DDBSR_REG(ddbsr);
}
}
}
@ -1299,7 +1311,6 @@ native_set_current_thread_info(struct thread_info *thread,
set_osgd_task_struct(task);
}
static inline void
set_current_thread_info(struct thread_info *thread, struct task_struct *task)
{
@ -1308,5 +1319,21 @@ set_current_thread_info(struct thread_info *thread, struct task_struct *task)
set_osgd_task_struct(task);
}
#define SAVE_PSYSCALL_RVAL(regs, _rval, _rval1, _rval2, _rv1_tag, \
_rv2_tag, _return_desk) \
({ \
(regs)->sys_rval = (_rval); \
(regs)->rval1 = (_rval1); \
(regs)->rval2 = (_rval2); \
(regs)->rv1_tag = (_rv1_tag); \
(regs)->rv2_tag = (_rv2_tag); \
(regs)->return_desk = (_return_desk); \
})
#define SAVE_SYSCALL_RVAL(regs, rval) \
({ \
(regs)->sys_rval = (rval); \
})
#endif /* _E2K_REGS_STATE_H */

View File

@ -204,6 +204,18 @@
#define SIC_mc_ch 0x400
#define SIC_mc_status 0x44c
#define SIC_mc_opmb 0x424
#define SIC_mc0_opmb 0x414
#define SIC_mc1_opmb 0x454
#define SIC_mc2_opmb 0x494
#define SIC_mc3_opmb 0x4d4
#define SIC_mc_cfg 0x418
#define SIC_mc0_cfg 0x418
#define SIC_mc1_cfg 0x458
#define SIC_mc2_cfg 0x498
#define SIC_mc3_cfg 0x4d8
/* HMU */
#define SIC_hmu_mic 0xd00
#define SIC_hmu0_int 0xd40
@ -1085,6 +1097,79 @@ typedef union e2k_mc_ecc_struct { /* Structure word */
#define E2K_MC_ECC_secnt fields.secnt /* single error counter */
#define E2K_MC_ECC_reg word
/*
* Read/Write MCX_OPMb (X={0, 1, 2, 3}) registers
* ! only for P1 processor type !
*/
typedef unsigned int e2k_mc_opmb_t; /* single word (32 bits) */
typedef struct e2k_mc_opmb_fields {
e2k_mc_opmb_t ct0 : 3; /* [0:2] */
e2k_mc_opmb_t ct1 : 3; /* [3:5] */
e2k_mc_opmb_t pbm0 : 2; /* [6:7] */
e2k_mc_opmb_t pbm1 : 2; /* [8:9] */
e2k_mc_opmb_t rm : 1; /* [10] */
e2k_mc_opmb_t rdodt : 1; /* [11] */
e2k_mc_opmb_t wrodt : 1; /* [12] */
e2k_mc_opmb_t bl8int : 1; /* [13] */
e2k_mc_opmb_t mi_fast : 1; /* [14] */
e2k_mc_opmb_t mt : 1; /* [15] */
e2k_mc_opmb_t il : 1; /* [16] */
e2k_mc_opmb_t rcven_del : 2; /* [17:18] */
e2k_mc_opmb_t mc_ps : 1; /* [19] */
e2k_mc_opmb_t arp_en : 1; /* [20] */
e2k_mc_opmb_t flt_brop : 1; /* [21] */
e2k_mc_opmb_t flt_rdpr : 1; /* [22] */
e2k_mc_opmb_t flt_blk : 1; /* [23] */
e2k_mc_opmb_t parerr : 1; /* [24] */
e2k_mc_opmb_t cmdpack : 1; /* [25] */
e2k_mc_opmb_t sldwr : 1; /* [26] */
e2k_mc_opmb_t sldrd : 1; /* [27] */
e2k_mc_opmb_t mirr : 1; /* [28] */
e2k_mc_opmb_t twrwr : 2; /* [29:30] */
e2k_mc_opmb_t mcln : 1; /* [31] */
} e2k_mc_opmb_fields_t;
typedef union e2k_mc_opmb_struct { /* Structure word */
e2k_mc_opmb_fields_t fields; /* as fields */
e2k_mc_opmb_t word; /* as entire register */
} e2k_mc_opmb_struct_t;
#define E2K_MC_OPMB_pbm0 fields.pbm0 /* physycal bank map slot 0 */
#define E2K_MC_OPMB_pbm1 fields.pbm1 /* physycal bank map slot 1 */
#define E2K_MC_OPMB_reg word
/*
* Read/Write MCX_CFG (X={0, 1, 2, 3}) registers
* P9, E2C3, E12 and E16 processor type
*/
typedef unsigned int e2k_mc_cfg_t; /* single word (32 bits) */
typedef struct e2k_mc_cfg_fields {
e2k_mc_cfg_t ct0 : 3; /* [0:2] */
e2k_mc_cfg_t ct1 : 3; /* [3:5] */
e2k_mc_cfg_t pbm0 : 2; /* [6:7] */
e2k_mc_cfg_t pbm1 : 2; /* [8:9] */
e2k_mc_cfg_t rm : 1; /* [10] */
e2k_mc_cfg_t reserve1 : 2; /* [11:12] */
e2k_mc_cfg_t mirr : 1; /* [13] */
e2k_mc_cfg_t sf : 3; /* [14:16] */
e2k_mc_cfg_t mt : 1; /* [17] */
e2k_mc_cfg_t cs8 : 1; /* [18] */
e2k_mc_cfg_t ptrr_mode : 2; /* [19:20] */
e2k_mc_cfg_t mcrc : 1; /* [21] */
e2k_mc_cfg_t odt_ext : 2; /* [22:23] */
e2k_mc_cfg_t pbswap : 1; /* [24] */
e2k_mc_cfg_t dqw : 2; /* [25:26] */
e2k_mc_cfg_t pda_sel : 5; /* [27:31] */
} e2k_mc_cfg_fields_t;
typedef union e2k_mc_cfg_struct { /* Structure word */
e2k_mc_cfg_fields_t fields; /* as fields */
e2k_mc_cfg_t word; /* as entire register */
} e2k_mc_cfg_struct_t;
#define E2K_MC_CFG_pbm0 fields.pbm0 /* physycal bank map slot 0 */
#define E2K_MC_CFG_pbm1 fields.pbm1 /* physycal bank map slot 1 */
#define E2K_MC_CFG_reg word
/*
* Read/Write IPCC_CSRX (X={1, 2, 3}) registers
*/

View File

@ -77,6 +77,9 @@ boot_sic_write_node_nbsr_reg(int node_id, int reg_offset, unsigned int reg_val)
unsigned int sic_get_mc_ecc(int node, int num);
void sic_set_mc_ecc(int node, int num, unsigned int reg_value);
unsigned int sic_get_mc_opmb(int node, int num);
unsigned int sic_get_mc_cfg(int node, int num);
unsigned int sic_get_ipcc_csr(int node, int num);
void sic_set_ipcc_csr(int node, int num, unsigned int val);

View File

@ -135,7 +135,8 @@ do { \
/* Reserve 64 bytes for kernel per C calling convention */ \
u64 used_dstack_size = round_up(64, E2K_ALIGN_STACK); \
\
sbr = (u64)thread_info_task(ti)->stack + KERNEL_C_STACK_SIZE; \
sbr = (u64)thread_info_task(ti)->stack + KERNEL_C_STACK_SIZE + \
KERNEL_C_STACK_OFFSET; \
AW(usd_lo) = AW((ti)->k_usd_lo); \
AW(usd_hi) = AW((ti)->k_usd_hi); \
AS(usd_lo).base -= used_dstack_size; \

View File

@ -241,6 +241,7 @@ typedef struct thread_info {
/* hypercall */
/* End of flags only for virtualization support */
#define TIF_SYSCALL_TRACEPOINT 30 /* syscall tracepoint instrumentation */
#define TIF_NAPI_WORK 31 /* napi_wq_worker() is running */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@ -266,6 +267,7 @@ typedef struct thread_info {
#define _TIF_LIGHT_HYPERCALL (1 << TIF_LIGHT_HYPERCALL)
#define _TIF_GENERIC_HYPERCALL (1 << TIF_GENERIC_HYPERCALL)
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_NAPI_WORK (1 << TIF_NAPI_WORK)
#define _TIF_WORK_SYSCALL_TRACE (_TIF_SYSCALL_TRACE | \
_TIF_KERNEL_TRACE | \

View File

@ -104,4 +104,50 @@ get_MMU_phys_addr(e2k_addr_t virt_addr)
return __probe_entry(GET_MMU_PHYS_ADDR(virt_addr));
}
#endif
typedef struct tlb_set_state {
tlb_tag_t tlb_tag;
pte_t tlb_entry;
} tlb_set_state_t;
typedef struct tlb_line_state {
e2k_addr_t va;
bool huge;
tlb_set_state_t sets[NATIVE_TLB_SETS_NUM];
} tlb_line_state_t;
static inline tlb_tag_t
get_va_tlb_set_tag(e2k_addr_t addr, int set_no, bool large_page)
{
return read_DTLB_va_tag_reg(addr, set_no, large_page);
}
static inline pte_t
get_va_tlb_set_entry(e2k_addr_t addr, int set_no, bool large_page)
{
pte_t tlb_entry;
pte_val(tlb_entry) = read_DTLB_va_entry_reg(addr, set_no, large_page);
return tlb_entry;
}
static inline void
get_va_tlb_state(tlb_line_state_t *tlb, e2k_addr_t addr, bool large_page)
{
tlb_set_state_t *set_state;
int set_no;
tlb->va = addr;
tlb->huge = large_page;
for (set_no = 0; set_no < NATIVE_TLB_SETS_NUM; set_no++) {
set_state = &tlb->sets[set_no];
tlb_tag_t tlb_tag;
pte_t tlb_entry;
tlb_tag = get_va_tlb_set_tag(addr, set_no, large_page);
tlb_entry = get_va_tlb_set_entry(addr, set_no, large_page);
set_state->tlb_tag = tlb_tag;
set_state->tlb_entry;
}
}
#endif /* !_E2K_TLB_REGS_ACCESS_H_ */

View File

@ -0,0 +1,115 @@
#ifndef _E2K_TRACE_DEFS_H_
#define _E2K_TRACE_DEFS_H_
#include <linux/types.h>
#include <asm/mmu_types.h>
#include <asm/pgtable_def.h>
static inline void
trace_get_va_translation(struct mm_struct *mm, e2k_addr_t address,
pgdval_t *pgd, pudval_t *pud, pmdval_t *pmd, pteval_t *pte, int *pt_level)
{
pgd_t *pgdp;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
if (likely(address < TASK_SIZE)) {
pgdp = pgd_offset(mm, address);
*pgd = pgd_val(*pgdp);
*pt_level = E2K_PGD_LEVEL_NUM;
if (!pgd_huge(*pgdp) && !pgd_none(*pgdp) && !pgd_bad(*pgdp)) {
pudp = pud_offset(pgdp, address);
*pud = pud_val(*pudp);
*pt_level = E2K_PUD_LEVEL_NUM;
if (!pud_huge(*pudp) && !pud_none(*pudp) &&
!pud_bad(*pudp)) {
pmdp = pmd_offset(pudp, address);
*pmd = pmd_val(*pmdp);
*pt_level = E2K_PMD_LEVEL_NUM;
if (!pmd_huge(*pmdp) && !pmd_none(*pmdp) &&
!pmd_bad(*pmdp)) {
ptep = pte_offset_map(pmdp, address);
*pte = pte_val(*ptep);
*pt_level = E2K_PTE_LEVEL_NUM;
}
}
}
return;
}
pgdp = pgd_offset_k(address);
*pgd = pgd_val(*pgdp);
*pt_level = E2K_PGD_LEVEL_NUM;
if (!kernel_pgd_huge(*pgdp) && !pgd_none(*pgdp) && !pgd_bad(*pgdp)) {
pudp = pud_offset(pgdp, address);
*pud = pud_val(*pudp);
*pt_level = E2K_PUD_LEVEL_NUM;
if (!kernel_pud_huge(*pudp) && !pud_none(*pudp) &&
!pud_bad(*pudp)) {
pmdp = pmd_offset(pudp, address);
*pmd = pmd_val(*pmdp);
*pt_level = E2K_PMD_LEVEL_NUM;
if (!kernel_pmd_huge(*pmdp) && !pmd_none(*pmdp) &&
!pmd_bad(*pmdp)) {
ptep = pte_offset_kernel(pmdp, address);
*pte = pte_val(*ptep);
*pt_level = E2K_PTE_LEVEL_NUM;
}
}
}
}
/*
* Save DTLB entries.
*
* Do not access not existing entries to avoid
* creating "empty" records in DTLB for no reason.
*/
static inline void
trace_get_dtlb_translation(struct mm_struct *mm, e2k_addr_t address,
u64 *dtlb_pgd, u64 *dtlb_pud, u64 *dtlb_pmd, u64 *dtlb_pte, int pt_level)
{
*dtlb_pgd = get_MMU_DTLB_ENTRY(address);
if (pt_level <= E2K_PUD_LEVEL_NUM)
*dtlb_pud = get_MMU_DTLB_ENTRY(pud_virt_offset(address));
if (pt_level <= E2K_PMD_LEVEL_NUM)
*dtlb_pmd = get_MMU_DTLB_ENTRY(pmd_virt_offset(address));
if (pt_level <= E2K_PTE_LEVEL_NUM)
*dtlb_pte = get_MMU_DTLB_ENTRY(pte_virt_offset(address));
}
#define mmu_print_pt_flags(entry, print, mmu_pt_v6) \
(mmu_pt_v6) ? E2K_TRACE_PRINT_PT_V6_FLAGS(entry, print) \
: \
E2K_TRACE_PRINT_PT_V2_FLAGS(entry, print)
#define print_pt_flags(entry, print) \
mmu_print_pt_flags(entry, print, MMU_IS_PT_V6())
#define E2K_TRACE_PRINT_PT_FLAGS(entry, print) print_pt_flags(entry, print)
#define mmu_print_dtlb_entry(entry, mmu_dtlb_v6) \
((mmu_dtlb_v6) ? E2K_TRACE_PRINT_DTLB_ENTRY_V2(entry) \
: \
E2K_TRACE_PRINT_DTLB_ENTRY_V6(entry))
#define print_dtlb_entry(entry) \
mmu_print_dtlb_entry(entry, MMU_IS_DTLB_V6())
#define E2K_TRACE_PRINT_DTLB(entry) print_dtlb_entry(entry)
#endif /* _E2K_TRACE_DEFS_H_ */

View File

@ -15,6 +15,7 @@
#include <asm/trace-mmu-dtlb-v2.h>
#include <asm/trace-mmu-dtlb-v6.h>
#include <asm/trap_def.h>
#include <asm/trace-defs.h>
#define E2K_TC_TYPE_STORE (1ULL << 17)
#define E2K_TC_TYPE_S_F (1ULL << 19)
@ -125,25 +126,6 @@ TRACE_EVENT(
))
);
#define mmu_print_pt_flags(entry, print, mmu_pt_v6) \
(mmu_pt_v6) ? E2K_TRACE_PRINT_PT_V6_FLAGS(entry, print) \
: \
E2K_TRACE_PRINT_PT_V2_FLAGS(entry, print)
#define print_pt_flags(entry, print) \
mmu_print_pt_flags(entry, print, MMU_IS_PT_V6())
#define E2K_TRACE_PRINT_PT_FLAGS(entry, print) print_pt_flags(entry, print)
#define mmu_print_dtlb_entry(entry, mmu_dtlb_v6) \
((mmu_dtlb_v6) ? E2K_TRACE_PRINT_DTLB_ENTRY_V2(entry) \
: \
E2K_TRACE_PRINT_DTLB_ENTRY_V6(entry))
#define print_dtlb_entry(entry) \
mmu_print_dtlb_entry(entry, MMU_IS_DTLB_V6())
#define E2K_TRACE_PRINT_DTLB(entry) print_dtlb_entry(entry)
TRACE_EVENT(
unhandled_page_fault,
@ -157,91 +139,19 @@ TRACE_EVENT(
__field( u64, dtlb_pud )
__field( u64, dtlb_pmd )
__field( u64, dtlb_pte )
__field( u64, pgd )
__field( u64, pud )
__field( u64, pmd )
__field( u64, pte )
__field( pgdval_t, pgd )
__field( pudval_t, pud )
__field( pmdval_t, pmd )
__field( pteval_t, pte )
__field( int, pt_level )
),
TP_fast_assign(
pgd_t *pgdp;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
__entry->address = address;
/*
* Save page table entries
*/
__entry->pt_level = 0;
if (address < TASK_SIZE) {
struct mm_struct *mm = current->mm;
pgdp = pgd_offset(mm, address);
__entry->pgd = pgd_val(*pgdp);
__entry->pt_level = 1;
if (!pgd_huge(*pgdp) && !pgd_none(*pgdp) &&
!pgd_bad(*pgdp)) {
pudp = pud_offset(pgdp, address);
__entry->pud = pud_val(*pudp);
__entry->pt_level = 2;
if (!pud_huge(*pudp) && !pud_none(*pudp) &&
!pud_bad(*pudp)) {
pmdp = pmd_offset(pudp, address);
__entry->pmd = pmd_val(*pmdp);
__entry->pt_level = 3;
if (!pmd_huge(*pmdp) &&
!pmd_none(*pmdp) &&
!pmd_bad(*pmdp)) {
ptep = pte_offset_map(pmdp,
address);
__entry->pte = pte_val(*ptep);
__entry->pt_level = 4;
}
}
}
} else {
pgdp = pgd_offset_k(address);
__entry->pgd = pgd_val(*pgdp);
__entry->pt_level = 1;
if (!kernel_pgd_huge(*pgdp) &&
!pgd_none(*pgdp) && !pgd_bad(*pgdp)) {
pudp = pud_offset(pgdp, address);
__entry->pud = pud_val(*pudp);
__entry->pt_level = 2;
if (!kernel_pud_huge(*pudp) &&
!pud_none(*pudp) && !pud_bad(*pudp)) {
pmdp = pmd_offset(pudp, address);
__entry->pmd = pmd_val(*pmdp);
__entry->pt_level = 3;
if (!kernel_pmd_huge(*pmdp) &&
!pmd_none(*pmdp) &&
!pmd_bad(*pmdp)) {
ptep = pte_offset_kernel(pmdp,
address);
__entry->pte = pte_val(*ptep);
__entry->pt_level = 4;
}
}
}
}
trace_get_va_translation(current->mm, address,
&__entry->pgd, &__entry->pud, &__entry->pmd,
&__entry->pte, &__entry->pt_level);
/*
* Save DTLB entries.
@ -249,30 +159,21 @@ TRACE_EVENT(
* Do not access not existing entries to avoid
* creating "empty" records in DTLB for no reason.
*/
__entry->dtlb_entry = get_MMU_DTLB_ENTRY(address);
if (__entry->pt_level >= 2)
__entry->dtlb_pud = get_MMU_DTLB_ENTRY(
pud_virt_offset(address));
if (__entry->pt_level >= 3)
__entry->dtlb_pmd = get_MMU_DTLB_ENTRY(
pmd_virt_offset(address));
if (__entry->pt_level >= 4)
__entry->dtlb_pte = get_MMU_DTLB_ENTRY(
pte_virt_offset(address));
trace_get_dtlb_translation(current->mm, address,
&__entry->dtlb_entry, &__entry->dtlb_pud,
&__entry->dtlb_pmd, &__entry->dtlb_pte,
__entry->pt_level);
),
TP_printk("\n"
"Page table for address 0x%lx (all f's are returned if the entry has not been read)\n"
" pgd 0x%llx: %s\n"
" pgd 0x%lx: %s\n"
" Access mode: %s%s\n"
" pud 0x%llx: %s\n"
" pud 0x%lx: %s\n"
" Access mode: %s%s\n"
" pmd 0x%llx: %s\n"
" pmd 0x%lx: %s\n"
" Access mode: %s%s\n"
" pte 0x%llx: %s\n"
" pte 0x%lx: %s\n"
" Access mode: %s%s\n"
"Probed DTLB entries:\n"
" pud address entry 0x%llx: %s\n"
@ -281,22 +182,26 @@ TRACE_EVENT(
" address entry 0x%llx: %s"
,
__entry->address,
(__entry->pt_level >= 1) ? __entry->pgd : -1ULL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->pgd, __entry->pt_level >= 1),
(__entry->pt_level >= 2) ? __entry->pud : -1ULL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->pud, __entry->pt_level >= 2),
(__entry->pt_level >= 3) ? __entry->pmd : -1ULL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->pmd, __entry->pt_level >= 3),
(__entry->pt_level >= 4) ? __entry->pte : -1ULL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->pte, __entry->pt_level >= 4),
(__entry->pt_level >= 2) ? __entry->dtlb_pud : -1ULL,
(__entry->pt_level >= 2) ?
(__entry->pt_level <= E2K_PGD_LEVEL_NUM) ? __entry->pgd : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->pgd,
__entry->pt_level <= E2K_PGD_LEVEL_NUM),
(__entry->pt_level <= E2K_PUD_LEVEL_NUM) ? __entry->pud : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->pud,
__entry->pt_level <= E2K_PUD_LEVEL_NUM),
(__entry->pt_level <= E2K_PMD_LEVEL_NUM) ? __entry->pmd : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->pmd,
__entry->pt_level <= E2K_PMD_LEVEL_NUM),
(__entry->pt_level <= E2K_PTE_LEVEL_NUM) ? __entry->pte : -1UL,
E2K_TRACE_PRINT_PT_FLAGS(__entry->pte,
__entry->pt_level <= E2K_PTE_LEVEL_NUM),
(__entry->pt_level <= E2K_PUD_LEVEL_NUM) ? __entry->dtlb_pud : -1ULL,
(__entry->pt_level <= E2K_PUD_LEVEL_NUM) ?
E2K_TRACE_PRINT_DTLB(__entry->dtlb_pud) : "(not read)",
(__entry->pt_level >= 3) ? __entry->dtlb_pmd : -1ULL,
(__entry->pt_level >= 3) ?
(__entry->pt_level <= E2K_PMD_LEVEL_NUM) ? __entry->dtlb_pmd : -1ULL,
(__entry->pt_level <= E2K_PMD_LEVEL_NUM) ?
E2K_TRACE_PRINT_DTLB(__entry->dtlb_pmd) : "(not read)",
(__entry->pt_level >= 4) ? __entry->dtlb_pte : -1ULL,
(__entry->pt_level >= 4) ?
(__entry->pt_level <= E2K_PTE_LEVEL_NUM) ? __entry->dtlb_pte : -1ULL,
(__entry->pt_level <= E2K_PTE_LEVEL_NUM) ?
E2K_TRACE_PRINT_DTLB(__entry->dtlb_pte) : "(not read)",
__entry->dtlb_entry,
E2K_TRACE_PRINT_DTLB(__entry->dtlb_entry))

View File

@ -1,6 +1,3 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM e2k
#if !defined(_TRACE_E2K_PGTABLE_V2_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_E2K_PGTABLE_V2_H

View File

@ -1,6 +1,3 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM e2k
#if !defined(_TRACE_E2K_PGTABLE_V6_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_E2K_PGTABLE_V6_H

View File

@ -146,9 +146,10 @@
.macro HANDLER_TRAMPOLINE ctprN, scallN, fn, wbsL
/* Force load OSGD->GD. Alternative is to use non-0 CUI for kernel */
{
nop
sdisp \ctprN, \scallN
}
/* CPU_HWBUG_VIRT_PSIZE_INTERCEPTION */
{ nop } { nop } { nop } { nop }
call \ctprN, wbs=\wbsL
disp \ctprN, \fn
SWITCH_HW_STACKS_FROM_USER()

View File

@ -249,7 +249,8 @@ is_kernel_data_stack_bounds(bool on_kernel, e2k_usd_lo_t usd_lo)
#include <asm/kvm/trap_table.h>
#ifndef __ASSEMBLY__
static inline void init_pt_regs_for_syscall(struct pt_regs *regs)
__always_inline /* For CPU_HWBUG_VIRT_PSIZE_INTERCEPTION */
static void init_pt_regs_for_syscall(struct pt_regs *regs)
{
regs->next = NULL;
regs->trap = NULL;

View File

@ -31,6 +31,7 @@
#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_SYS_ALARM
#define __ARCH_WANT_SYS_CLONE
#define __ARCH_WANT_SYS_CLONE3
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_SYS_GETHOSTNAME
#define __ARCH_WANT_SYS_IPC

View File

@ -222,25 +222,10 @@
#define __NR_stat64 195
#define __NR_lstat64 196
#define __NR_fstat64 197
#define __NR_lchown32 198
#define __NR_getuid32 199
#define __NR_getgid32 200
#define __NR_geteuid32 201
#define __NR_getegid32 202
#define __NR_setreuid32 203
#define __NR_setregid32 204
#define __NR_pidfd_send_signal 205
#define __NR_pidfd_open 206
#define __NR_fchown32 207
#define __NR_setresuid32 208
#define __NR_getresuid32 209
#define __NR_setresgid32 210
#define __NR_getresgid32 211
#define __NR_chown32 212
#define __NR_setuid32 213
#define __NR_setgid32 214
#define __NR_setfsuid32 215
#define __NR_setfsgid32 216
#define __NR_pivot_root 217
#define __NR_mincore 218
#define __NR_madvise 219

View File

@ -701,6 +701,9 @@ struct sk_buff {
};
struct rb_node rbnode; /* used in netem, ip4 defrag, and tcp stack */
struct list_head list;
#ifdef CONFIG_MCST
struct list_head napi_skb_list;
#endif
};
union {